Commit 6adae108 authored by Dave Airlie's avatar Dave Airlie

Merge branch 'drm-next-3.18' of git://people.freedesktop.org/~agd5f/linux into drm-next

More radeon changes for drm-next.  Highlights:
- UVD support for older asics
- Reset rework in preparation for Maarten's fence patches
I have a few more patches which depend on Christian's ttm changes,
I'll send them out separately once you've merged the ttm changes.

* 'drm-next-3.18' of git://people.freedesktop.org/~agd5f/linux:
  drm/radeon: drop doing resets in a work item
  drm/radeon: drop RADEON_FENCE_SIGNALED_SEQ v2
  drm/radeon: add timeout argument to radeon_fence_wait_seq v2
  drm/radeon: handle lockup in delayed work, v5
  drm/radeon: take exclusive_lock in read mode during ring tests, v5
  drm/radeon: force fence completion only on problematic rings (v2)
  drm/radeon: wake up all fences on manual reset
  drm/radeon: add UVD fw names for older asic
  drm/radeon: enable RB_ARB before resetting the VCPU
  drm/radeon: 760G/780V/880V don't have UVD
  drm/radeon: implement UVD hw workarounds for R6xx v3
  drm/radeon: add UVD support for older asics v4
  drm/radeon: add set_uvd_clocks callback for r6xx v4
  drm/radeon: properly init UVD MC bits on R600
  drm/radeon: force UVD buffers into VRAM on RS[78]80 v2
  drm/radeon: move the IB test after the AGP fallback
parents fb1aacae 3c036389
...@@ -8246,8 +8246,10 @@ int cik_irq_process(struct radeon_device *rdev) ...@@ -8246,8 +8246,10 @@ int cik_irq_process(struct radeon_device *rdev)
} }
if (queue_hotplug) if (queue_hotplug)
schedule_work(&rdev->hotplug_work); schedule_work(&rdev->hotplug_work);
if (queue_reset) if (queue_reset) {
schedule_work(&rdev->reset_work); rdev->needs_reset = true;
wake_up_all(&rdev->fence_queue);
}
if (queue_thermal) if (queue_thermal)
schedule_work(&rdev->pm.dpm.thermal.work); schedule_work(&rdev->pm.dpm.thermal.work);
rdev->ih.rptr = rptr; rdev->ih.rptr = rptr;
......
...@@ -122,6 +122,94 @@ u32 r600_get_xclk(struct radeon_device *rdev) ...@@ -122,6 +122,94 @@ u32 r600_get_xclk(struct radeon_device *rdev)
int r600_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) int r600_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
{ {
unsigned fb_div = 0, ref_div, vclk_div = 0, dclk_div = 0;
int r;
/* bypass vclk and dclk with bclk */
WREG32_P(CG_UPLL_FUNC_CNTL_2,
VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
/* assert BYPASS_EN, deassert UPLL_RESET, UPLL_SLEEP and UPLL_CTLREQ */
WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~(
UPLL_RESET_MASK | UPLL_SLEEP_MASK | UPLL_CTLREQ_MASK));
if (rdev->family >= CHIP_RS780)
WREG32_P(GFX_MACRO_BYPASS_CNTL, UPLL_BYPASS_CNTL,
~UPLL_BYPASS_CNTL);
if (!vclk || !dclk) {
/* keep the Bypass mode, put PLL to sleep */
WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
return 0;
}
if (rdev->clock.spll.reference_freq == 10000)
ref_div = 34;
else
ref_div = 4;
r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 50000, 160000,
ref_div + 1, 0xFFF, 2, 30, ~0,
&fb_div, &vclk_div, &dclk_div);
if (r)
return r;
if (rdev->family >= CHIP_RV670 && rdev->family < CHIP_RS780)
fb_div >>= 1;
else
fb_div |= 1;
r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
if (r)
return r;
/* assert PLL_RESET */
WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
/* For RS780 we have to choose ref clk */
if (rdev->family >= CHIP_RS780)
WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_REFCLK_SRC_SEL_MASK,
~UPLL_REFCLK_SRC_SEL_MASK);
/* set the required fb, ref and post divder values */
WREG32_P(CG_UPLL_FUNC_CNTL,
UPLL_FB_DIV(fb_div) |
UPLL_REF_DIV(ref_div),
~(UPLL_FB_DIV_MASK | UPLL_REF_DIV_MASK));
WREG32_P(CG_UPLL_FUNC_CNTL_2,
UPLL_SW_HILEN(vclk_div >> 1) |
UPLL_SW_LOLEN((vclk_div >> 1) + (vclk_div & 1)) |
UPLL_SW_HILEN2(dclk_div >> 1) |
UPLL_SW_LOLEN2((dclk_div >> 1) + (dclk_div & 1)) |
UPLL_DIVEN_MASK | UPLL_DIVEN2_MASK,
~UPLL_SW_MASK);
/* give the PLL some time to settle */
mdelay(15);
/* deassert PLL_RESET */
WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
mdelay(15);
/* deassert BYPASS EN */
WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
if (rdev->family >= CHIP_RS780)
WREG32_P(GFX_MACRO_BYPASS_CNTL, 0, ~UPLL_BYPASS_CNTL);
r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
if (r)
return r;
/* switch VCLK and DCLK selection */
WREG32_P(CG_UPLL_FUNC_CNTL_2,
VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
mdelay(100);
return 0; return 0;
} }
...@@ -992,6 +1080,8 @@ static int r600_pcie_gart_enable(struct radeon_device *rdev) ...@@ -992,6 +1080,8 @@ static int r600_pcie_gart_enable(struct radeon_device *rdev)
WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp);
WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp);
WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp);
WREG32(MC_VM_L1_TLB_MCB_RD_UVD_CNTL, tmp);
WREG32(MC_VM_L1_TLB_MCB_WR_UVD_CNTL, tmp);
WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE); WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE); WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
...@@ -1042,6 +1132,8 @@ static void r600_pcie_gart_disable(struct radeon_device *rdev) ...@@ -1042,6 +1132,8 @@ static void r600_pcie_gart_disable(struct radeon_device *rdev)
WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp);
WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp);
WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp);
WREG32(MC_VM_L1_TLB_MCB_RD_UVD_CNTL, tmp);
WREG32(MC_VM_L1_TLB_MCB_WR_UVD_CNTL, tmp);
radeon_gart_table_vram_unpin(rdev); radeon_gart_table_vram_unpin(rdev);
} }
...@@ -2917,6 +3009,18 @@ static int r600_startup(struct radeon_device *rdev) ...@@ -2917,6 +3009,18 @@ static int r600_startup(struct radeon_device *rdev)
return r; return r;
} }
if (rdev->has_uvd) {
r = uvd_v1_0_resume(rdev);
if (!r) {
r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
if (r) {
dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
}
}
if (r)
rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
}
/* Enable IRQ */ /* Enable IRQ */
if (!rdev->irq.installed) { if (!rdev->irq.installed) {
r = radeon_irq_kms_init(rdev); r = radeon_irq_kms_init(rdev);
...@@ -2945,6 +3049,18 @@ static int r600_startup(struct radeon_device *rdev) ...@@ -2945,6 +3049,18 @@ static int r600_startup(struct radeon_device *rdev)
if (r) if (r)
return r; return r;
if (rdev->has_uvd) {
ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
if (ring->ring_size) {
r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
RADEON_CP_PACKET2);
if (!r)
r = uvd_v1_0_init(rdev);
if (r)
DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
}
}
r = radeon_ib_pool_init(rdev); r = radeon_ib_pool_init(rdev);
if (r) { if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r); dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
...@@ -3004,6 +3120,10 @@ int r600_suspend(struct radeon_device *rdev) ...@@ -3004,6 +3120,10 @@ int r600_suspend(struct radeon_device *rdev)
radeon_pm_suspend(rdev); radeon_pm_suspend(rdev);
r600_audio_fini(rdev); r600_audio_fini(rdev);
r600_cp_stop(rdev); r600_cp_stop(rdev);
if (rdev->has_uvd) {
uvd_v1_0_fini(rdev);
radeon_uvd_suspend(rdev);
}
r600_irq_suspend(rdev); r600_irq_suspend(rdev);
radeon_wb_disable(rdev); radeon_wb_disable(rdev);
r600_pcie_gart_disable(rdev); r600_pcie_gart_disable(rdev);
...@@ -3083,6 +3203,14 @@ int r600_init(struct radeon_device *rdev) ...@@ -3083,6 +3203,14 @@ int r600_init(struct radeon_device *rdev)
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
if (rdev->has_uvd) {
r = radeon_uvd_init(rdev);
if (!r) {
rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
}
}
rdev->ih.ring_obj = NULL; rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024); r600_ih_ring_init(rdev, 64 * 1024);
...@@ -3112,6 +3240,10 @@ void r600_fini(struct radeon_device *rdev) ...@@ -3112,6 +3240,10 @@ void r600_fini(struct radeon_device *rdev)
r600_audio_fini(rdev); r600_audio_fini(rdev);
r600_cp_fini(rdev); r600_cp_fini(rdev);
r600_irq_fini(rdev); r600_irq_fini(rdev);
if (rdev->has_uvd) {
uvd_v1_0_fini(rdev);
radeon_uvd_fini(rdev);
}
radeon_wb_fini(rdev); radeon_wb_fini(rdev);
radeon_ib_pool_fini(rdev); radeon_ib_pool_fini(rdev);
radeon_irq_kms_fini(rdev); radeon_irq_kms_fini(rdev);
......
...@@ -330,11 +330,12 @@ ...@@ -330,11 +330,12 @@
#define HDP_TILING_CONFIG 0x2F3C #define HDP_TILING_CONFIG 0x2F3C
#define HDP_DEBUG1 0x2F34 #define HDP_DEBUG1 0x2F34
#define MC_CONFIG 0x2000
#define MC_VM_AGP_TOP 0x2184 #define MC_VM_AGP_TOP 0x2184
#define MC_VM_AGP_BOT 0x2188 #define MC_VM_AGP_BOT 0x2188
#define MC_VM_AGP_BASE 0x218C #define MC_VM_AGP_BASE 0x218C
#define MC_VM_FB_LOCATION 0x2180 #define MC_VM_FB_LOCATION 0x2180
#define MC_VM_L1_TLB_MCD_RD_A_CNTL 0x219C #define MC_VM_L1_TLB_MCB_RD_UVD_CNTL 0x2124
#define ENABLE_L1_TLB (1 << 0) #define ENABLE_L1_TLB (1 << 0)
#define ENABLE_L1_FRAGMENT_PROCESSING (1 << 1) #define ENABLE_L1_FRAGMENT_PROCESSING (1 << 1)
#define ENABLE_L1_STRICT_ORDERING (1 << 2) #define ENABLE_L1_STRICT_ORDERING (1 << 2)
...@@ -354,12 +355,14 @@ ...@@ -354,12 +355,14 @@
#define EFFECTIVE_L1_QUEUE_SIZE(x) (((x) & 7) << 15) #define EFFECTIVE_L1_QUEUE_SIZE(x) (((x) & 7) << 15)
#define EFFECTIVE_L1_QUEUE_SIZE_MASK 0x00038000 #define EFFECTIVE_L1_QUEUE_SIZE_MASK 0x00038000
#define EFFECTIVE_L1_QUEUE_SIZE_SHIFT 15 #define EFFECTIVE_L1_QUEUE_SIZE_SHIFT 15
#define MC_VM_L1_TLB_MCD_RD_A_CNTL 0x219C
#define MC_VM_L1_TLB_MCD_RD_B_CNTL 0x21A0 #define MC_VM_L1_TLB_MCD_RD_B_CNTL 0x21A0
#define MC_VM_L1_TLB_MCB_RD_GFX_CNTL 0x21FC #define MC_VM_L1_TLB_MCB_RD_GFX_CNTL 0x21FC
#define MC_VM_L1_TLB_MCB_RD_HDP_CNTL 0x2204 #define MC_VM_L1_TLB_MCB_RD_HDP_CNTL 0x2204
#define MC_VM_L1_TLB_MCB_RD_PDMA_CNTL 0x2208 #define MC_VM_L1_TLB_MCB_RD_PDMA_CNTL 0x2208
#define MC_VM_L1_TLB_MCB_RD_SEM_CNTL 0x220C #define MC_VM_L1_TLB_MCB_RD_SEM_CNTL 0x220C
#define MC_VM_L1_TLB_MCB_RD_SYS_CNTL 0x2200 #define MC_VM_L1_TLB_MCB_RD_SYS_CNTL 0x2200
#define MC_VM_L1_TLB_MCB_WR_UVD_CNTL 0x212c
#define MC_VM_L1_TLB_MCD_WR_A_CNTL 0x21A4 #define MC_VM_L1_TLB_MCD_WR_A_CNTL 0x21A4
#define MC_VM_L1_TLB_MCD_WR_B_CNTL 0x21A8 #define MC_VM_L1_TLB_MCD_WR_B_CNTL 0x21A8
#define MC_VM_L1_TLB_MCB_WR_GFX_CNTL 0x2210 #define MC_VM_L1_TLB_MCB_WR_GFX_CNTL 0x2210
...@@ -373,6 +376,8 @@ ...@@ -373,6 +376,8 @@
#define MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x2194 #define MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x2194
#define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x2198 #define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x2198
#define RS_DQ_RD_RET_CONF 0x2348
#define PA_CL_ENHANCE 0x8A14 #define PA_CL_ENHANCE 0x8A14
#define CLIP_VTX_REORDER_ENA (1 << 0) #define CLIP_VTX_REORDER_ENA (1 << 0)
#define NUM_CLIP_SEQ(x) ((x) << 1) #define NUM_CLIP_SEQ(x) ((x) << 1)
...@@ -1483,6 +1488,7 @@ ...@@ -1483,6 +1488,7 @@
#define UVD_CGC_GATE 0xf4a8 #define UVD_CGC_GATE 0xf4a8
#define UVD_LMI_CTRL2 0xf4f4 #define UVD_LMI_CTRL2 0xf4f4
#define UVD_MASTINT_EN 0xf500 #define UVD_MASTINT_EN 0xf500
#define UVD_FW_START 0xf51C
#define UVD_LMI_ADDR_EXT 0xf594 #define UVD_LMI_ADDR_EXT 0xf594
#define UVD_LMI_CTRL 0xf598 #define UVD_LMI_CTRL 0xf598
#define UVD_LMI_SWAP_CNTL 0xf5b4 #define UVD_LMI_SWAP_CNTL 0xf5b4
...@@ -1495,6 +1501,13 @@ ...@@ -1495,6 +1501,13 @@
#define UVD_MPC_SET_MUX 0xf5f4 #define UVD_MPC_SET_MUX 0xf5f4
#define UVD_MPC_SET_ALU 0xf5f8 #define UVD_MPC_SET_ALU 0xf5f8
#define UVD_VCPU_CACHE_OFFSET0 0xf608
#define UVD_VCPU_CACHE_SIZE0 0xf60c
#define UVD_VCPU_CACHE_OFFSET1 0xf610
#define UVD_VCPU_CACHE_SIZE1 0xf614
#define UVD_VCPU_CACHE_OFFSET2 0xf618
#define UVD_VCPU_CACHE_SIZE2 0xf61c
#define UVD_VCPU_CNTL 0xf660 #define UVD_VCPU_CNTL 0xf660
#define UVD_SOFT_RESET 0xf680 #define UVD_SOFT_RESET 0xf680
#define RBC_SOFT_RESET (1<<0) #define RBC_SOFT_RESET (1<<0)
...@@ -1524,9 +1537,35 @@ ...@@ -1524,9 +1537,35 @@
#define UVD_CONTEXT_ID 0xf6f4 #define UVD_CONTEXT_ID 0xf6f4
/* rs780 only */
#define GFX_MACRO_BYPASS_CNTL 0x30c0
#define SPLL_BYPASS_CNTL (1 << 0)
#define UPLL_BYPASS_CNTL (1 << 1)
#define CG_UPLL_FUNC_CNTL 0x7e0
# define UPLL_RESET_MASK 0x00000001
# define UPLL_SLEEP_MASK 0x00000002
# define UPLL_BYPASS_EN_MASK 0x00000004
# define UPLL_CTLREQ_MASK 0x00000008 # define UPLL_CTLREQ_MASK 0x00000008
# define UPLL_FB_DIV(x) ((x) << 4)
# define UPLL_FB_DIV_MASK 0x0000FFF0
# define UPLL_REF_DIV(x) ((x) << 16)
# define UPLL_REF_DIV_MASK 0x003F0000
# define UPLL_REFCLK_SRC_SEL_MASK 0x20000000
# define UPLL_CTLACK_MASK 0x40000000 # define UPLL_CTLACK_MASK 0x40000000
# define UPLL_CTLACK2_MASK 0x80000000 # define UPLL_CTLACK2_MASK 0x80000000
#define CG_UPLL_FUNC_CNTL_2 0x7e4
# define UPLL_SW_HILEN(x) ((x) << 0)
# define UPLL_SW_LOLEN(x) ((x) << 4)
# define UPLL_SW_HILEN2(x) ((x) << 8)
# define UPLL_SW_LOLEN2(x) ((x) << 12)
# define UPLL_DIVEN_MASK 0x00010000
# define UPLL_DIVEN2_MASK 0x00020000
# define UPLL_SW_MASK 0x0003FFFF
# define VCLK_SRC_SEL(x) ((x) << 20)
# define VCLK_SRC_SEL_MASK 0x01F00000
# define DCLK_SRC_SEL(x) ((x) << 25)
# define DCLK_SRC_SEL_MASK 0x3E000000
/* /*
* PM4 * PM4
......
...@@ -120,9 +120,6 @@ extern int radeon_bapm; ...@@ -120,9 +120,6 @@ extern int radeon_bapm;
#define RADEONFB_CONN_LIMIT 4 #define RADEONFB_CONN_LIMIT 4
#define RADEON_BIOS_NUM_SCRATCH 8 #define RADEON_BIOS_NUM_SCRATCH 8
/* fence seq are set to this number when signaled */
#define RADEON_FENCE_SIGNALED_SEQ 0LL
/* internal ring indices */ /* internal ring indices */
/* r1xx+ has gfx CP ring */ /* r1xx+ has gfx CP ring */
#define RADEON_RING_TYPE_GFX_INDEX 0 #define RADEON_RING_TYPE_GFX_INDEX 0
...@@ -350,6 +347,7 @@ extern void evergreen_tiling_fields(unsigned tiling_flags, unsigned *bankw, ...@@ -350,6 +347,7 @@ extern void evergreen_tiling_fields(unsigned tiling_flags, unsigned *bankw,
* Fences. * Fences.
*/ */
struct radeon_fence_driver { struct radeon_fence_driver {
struct radeon_device *rdev;
uint32_t scratch_reg; uint32_t scratch_reg;
uint64_t gpu_addr; uint64_t gpu_addr;
volatile uint32_t *cpu_addr; volatile uint32_t *cpu_addr;
...@@ -357,6 +355,7 @@ struct radeon_fence_driver { ...@@ -357,6 +355,7 @@ struct radeon_fence_driver {
uint64_t sync_seq[RADEON_NUM_RINGS]; uint64_t sync_seq[RADEON_NUM_RINGS];
atomic64_t last_seq; atomic64_t last_seq;
bool initialized; bool initialized;
struct delayed_work lockup_work;
}; };
struct radeon_fence { struct radeon_fence {
...@@ -371,7 +370,7 @@ struct radeon_fence { ...@@ -371,7 +370,7 @@ struct radeon_fence {
int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
int radeon_fence_driver_init(struct radeon_device *rdev); int radeon_fence_driver_init(struct radeon_device *rdev);
void radeon_fence_driver_fini(struct radeon_device *rdev); void radeon_fence_driver_fini(struct radeon_device *rdev);
void radeon_fence_driver_force_completion(struct radeon_device *rdev); void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring);
int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring); int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring);
void radeon_fence_process(struct radeon_device *rdev, int ring); void radeon_fence_process(struct radeon_device *rdev, int ring);
bool radeon_fence_signaled(struct radeon_fence *fence); bool radeon_fence_signaled(struct radeon_fence *fence);
...@@ -2326,7 +2325,7 @@ struct radeon_device { ...@@ -2326,7 +2325,7 @@ struct radeon_device {
bool need_dma32; bool need_dma32;
bool accel_working; bool accel_working;
bool fastfb_working; /* IGP feature*/ bool fastfb_working; /* IGP feature*/
bool needs_reset; bool needs_reset, in_reset;
struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES]; struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
const struct firmware *me_fw; /* all family ME firmware */ const struct firmware *me_fw; /* all family ME firmware */
const struct firmware *pfp_fw; /* r6/700 PFP firmware */ const struct firmware *pfp_fw; /* r6/700 PFP firmware */
...@@ -2347,7 +2346,6 @@ struct radeon_device { ...@@ -2347,7 +2346,6 @@ struct radeon_device {
struct radeon_mec mec; struct radeon_mec mec;
struct work_struct hotplug_work; struct work_struct hotplug_work;
struct work_struct audio_work; struct work_struct audio_work;
struct work_struct reset_work;
int num_crtc; /* number of crtcs */ int num_crtc; /* number of crtcs */
struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */ struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */
bool has_uvd; bool has_uvd;
......
...@@ -965,6 +965,19 @@ static struct radeon_asic r600_asic = { ...@@ -965,6 +965,19 @@ static struct radeon_asic r600_asic = {
}, },
}; };
static struct radeon_asic_ring rv6xx_uvd_ring = {
.ib_execute = &uvd_v1_0_ib_execute,
.emit_fence = &uvd_v1_0_fence_emit,
.emit_semaphore = &uvd_v1_0_semaphore_emit,
.cs_parse = &radeon_uvd_cs_parse,
.ring_test = &uvd_v1_0_ring_test,
.ib_test = &uvd_v1_0_ib_test,
.is_lockup = &radeon_ring_test_lockup,
.get_rptr = &uvd_v1_0_get_rptr,
.get_wptr = &uvd_v1_0_get_wptr,
.set_wptr = &uvd_v1_0_set_wptr,
};
static struct radeon_asic rv6xx_asic = { static struct radeon_asic rv6xx_asic = {
.init = &r600_init, .init = &r600_init,
.fini = &r600_fini, .fini = &r600_fini,
...@@ -984,6 +997,7 @@ static struct radeon_asic rv6xx_asic = { ...@@ -984,6 +997,7 @@ static struct radeon_asic rv6xx_asic = {
.ring = { .ring = {
[RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring, [RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring,
[R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring, [R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring,
[R600_RING_TYPE_UVD_INDEX] = &rv6xx_uvd_ring,
}, },
.irq = { .irq = {
.set = &r600_irq_set, .set = &r600_irq_set,
...@@ -1074,6 +1088,7 @@ static struct radeon_asic rs780_asic = { ...@@ -1074,6 +1088,7 @@ static struct radeon_asic rs780_asic = {
.ring = { .ring = {
[RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring, [RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring,
[R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring, [R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring,
[R600_RING_TYPE_UVD_INDEX] = &rv6xx_uvd_ring,
}, },
.irq = { .irq = {
.set = &r600_irq_set, .set = &r600_irq_set,
...@@ -2298,7 +2313,15 @@ int radeon_asic_init(struct radeon_device *rdev) ...@@ -2298,7 +2313,15 @@ int radeon_asic_init(struct radeon_device *rdev)
case CHIP_RS780: case CHIP_RS780:
case CHIP_RS880: case CHIP_RS880:
rdev->asic = &rs780_asic; rdev->asic = &rs780_asic;
rdev->has_uvd = true; /* 760G/780V/880V don't have UVD */
if ((rdev->pdev->device == 0x9616)||
(rdev->pdev->device == 0x9611)||
(rdev->pdev->device == 0x9613)||
(rdev->pdev->device == 0x9711)||
(rdev->pdev->device == 0x9713))
rdev->has_uvd = false;
else
rdev->has_uvd = true;
break; break;
case CHIP_RV770: case CHIP_RV770:
case CHIP_RV730: case CHIP_RV730:
......
...@@ -883,6 +883,7 @@ uint32_t uvd_v1_0_get_wptr(struct radeon_device *rdev, ...@@ -883,6 +883,7 @@ uint32_t uvd_v1_0_get_wptr(struct radeon_device *rdev,
struct radeon_ring *ring); struct radeon_ring *ring);
void uvd_v1_0_set_wptr(struct radeon_device *rdev, void uvd_v1_0_set_wptr(struct radeon_device *rdev,
struct radeon_ring *ring); struct radeon_ring *ring);
int uvd_v1_0_resume(struct radeon_device *rdev);
int uvd_v1_0_init(struct radeon_device *rdev); int uvd_v1_0_init(struct radeon_device *rdev);
void uvd_v1_0_fini(struct radeon_device *rdev); void uvd_v1_0_fini(struct radeon_device *rdev);
...@@ -890,6 +891,8 @@ int uvd_v1_0_start(struct radeon_device *rdev); ...@@ -890,6 +891,8 @@ int uvd_v1_0_start(struct radeon_device *rdev);
void uvd_v1_0_stop(struct radeon_device *rdev); void uvd_v1_0_stop(struct radeon_device *rdev);
int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring);
void uvd_v1_0_fence_emit(struct radeon_device *rdev,
struct radeon_fence *fence);
int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev, bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev,
struct radeon_ring *ring, struct radeon_ring *ring,
......
...@@ -137,10 +137,13 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) ...@@ -137,10 +137,13 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
+ !!r->write_domain; + !!r->write_domain;
/* the first reloc of an UVD job is the msg and that must be in /* the first reloc of an UVD job is the msg and that must be in
VRAM, also but everything into VRAM on AGP cards to avoid VRAM, also but everything into VRAM on AGP cards and older
image corruptions */ IGP chips to avoid image corruptions */
if (p->ring == R600_RING_TYPE_UVD_INDEX && if (p->ring == R600_RING_TYPE_UVD_INDEX &&
(i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) { (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
p->rdev->family == CHIP_RS780 ||
p->rdev->family == CHIP_RS880)) {
/* TODO: is this still needed for NI+ ? */ /* TODO: is this still needed for NI+ ? */
p->relocs[i].prefered_domains = p->relocs[i].prefered_domains =
RADEON_GEM_DOMAIN_VRAM; RADEON_GEM_DOMAIN_VRAM;
...@@ -650,6 +653,13 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ...@@ -650,6 +653,13 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
up_read(&rdev->exclusive_lock); up_read(&rdev->exclusive_lock);
return -EBUSY; return -EBUSY;
} }
if (rdev->in_reset) {
up_read(&rdev->exclusive_lock);
r = radeon_gpu_reset(rdev);
if (!r)
r = -EAGAIN;
return r;
}
/* initialize parser */ /* initialize parser */
memset(&parser, 0, sizeof(struct radeon_cs_parser)); memset(&parser, 0, sizeof(struct radeon_cs_parser));
parser.filp = filp; parser.filp = filp;
......
...@@ -1397,10 +1397,6 @@ int radeon_device_init(struct radeon_device *rdev, ...@@ -1397,10 +1397,6 @@ int radeon_device_init(struct radeon_device *rdev,
if (r) if (r)
return r; return r;
r = radeon_ib_ring_tests(rdev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
r = radeon_gem_debugfs_init(rdev); r = radeon_gem_debugfs_init(rdev);
if (r) { if (r) {
DRM_ERROR("registering gem debugfs failed (%d).\n", r); DRM_ERROR("registering gem debugfs failed (%d).\n", r);
...@@ -1418,6 +1414,10 @@ int radeon_device_init(struct radeon_device *rdev, ...@@ -1418,6 +1414,10 @@ int radeon_device_init(struct radeon_device *rdev,
return r; return r;
} }
r = radeon_ib_ring_tests(rdev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
if ((radeon_testing & 1)) { if ((radeon_testing & 1)) {
if (rdev->accel_working) if (rdev->accel_working)
radeon_test_moves(rdev); radeon_test_moves(rdev);
...@@ -1488,7 +1488,6 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) ...@@ -1488,7 +1488,6 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
struct drm_crtc *crtc; struct drm_crtc *crtc;
struct drm_connector *connector; struct drm_connector *connector;
int i, r; int i, r;
bool force_completion = false;
if (dev == NULL || dev->dev_private == NULL) { if (dev == NULL || dev->dev_private == NULL) {
return -ENODEV; return -ENODEV;
...@@ -1532,12 +1531,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) ...@@ -1532,12 +1531,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
r = radeon_fence_wait_empty(rdev, i); r = radeon_fence_wait_empty(rdev, i);
if (r) { if (r) {
/* delay GPU reset to resume */ /* delay GPU reset to resume */
force_completion = true; radeon_fence_driver_force_completion(rdev, i);
} }
} }
if (force_completion) {
radeon_fence_driver_force_completion(rdev);
}
radeon_save_bios_scratch_regs(rdev); radeon_save_bios_scratch_regs(rdev);
...@@ -1677,8 +1673,6 @@ int radeon_gpu_reset(struct radeon_device *rdev) ...@@ -1677,8 +1673,6 @@ int radeon_gpu_reset(struct radeon_device *rdev)
return 0; return 0;
} }
rdev->needs_reset = false;
radeon_save_bios_scratch_regs(rdev); radeon_save_bios_scratch_regs(rdev);
/* block TTM */ /* block TTM */
resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev); resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
...@@ -1695,7 +1689,6 @@ int radeon_gpu_reset(struct radeon_device *rdev) ...@@ -1695,7 +1689,6 @@ int radeon_gpu_reset(struct radeon_device *rdev)
} }
} }
retry:
r = radeon_asic_reset(rdev); r = radeon_asic_reset(rdev);
if (!r) { if (!r) {
dev_info(rdev->dev, "GPU reset succeeded, trying to resume\n"); dev_info(rdev->dev, "GPU reset succeeded, trying to resume\n");
...@@ -1704,26 +1697,12 @@ int radeon_gpu_reset(struct radeon_device *rdev) ...@@ -1704,26 +1697,12 @@ int radeon_gpu_reset(struct radeon_device *rdev)
radeon_restore_bios_scratch_regs(rdev); radeon_restore_bios_scratch_regs(rdev);
if (!r) { for (i = 0; i < RADEON_NUM_RINGS; ++i) {
for (i = 0; i < RADEON_NUM_RINGS; ++i) { if (!r && ring_data[i]) {
radeon_ring_restore(rdev, &rdev->ring[i], radeon_ring_restore(rdev, &rdev->ring[i],
ring_sizes[i], ring_data[i]); ring_sizes[i], ring_data[i]);
ring_sizes[i] = 0; } else {
ring_data[i] = NULL; radeon_fence_driver_force_completion(rdev, i);
}
r = radeon_ib_ring_tests(rdev);
if (r) {
dev_err(rdev->dev, "ib ring test failed (%d).\n", r);
if (saved) {
saved = false;
radeon_suspend(rdev);
goto retry;
}
}
} else {
radeon_fence_driver_force_completion(rdev);
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
kfree(ring_data[i]); kfree(ring_data[i]);
} }
} }
...@@ -1755,19 +1734,32 @@ int radeon_gpu_reset(struct radeon_device *rdev) ...@@ -1755,19 +1734,32 @@ int radeon_gpu_reset(struct radeon_device *rdev)
/* reset hpd state */ /* reset hpd state */
radeon_hpd_init(rdev); radeon_hpd_init(rdev);
ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
rdev->in_reset = true;
rdev->needs_reset = false;
downgrade_write(&rdev->exclusive_lock);
drm_helper_resume_force_mode(rdev->ddev); drm_helper_resume_force_mode(rdev->ddev);
/* set the power state here in case we are a PX system or headless */ /* set the power state here in case we are a PX system or headless */
if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled)
radeon_pm_compute_clocks(rdev); radeon_pm_compute_clocks(rdev);
ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); if (!r) {
if (r) { r = radeon_ib_ring_tests(rdev);
if (r && saved)
r = -EAGAIN;
} else {
/* bad news, how to tell it to userspace ? */ /* bad news, how to tell it to userspace ? */
dev_info(rdev->dev, "GPU reset failed\n"); dev_info(rdev->dev, "GPU reset failed\n");
} }
up_write(&rdev->exclusive_lock); rdev->needs_reset = r == -EAGAIN;
rdev->in_reset = false;
up_read(&rdev->exclusive_lock);
return r; return r;
} }
......
...@@ -405,7 +405,9 @@ static void radeon_flip_work_func(struct work_struct *__work) ...@@ -405,7 +405,9 @@ static void radeon_flip_work_func(struct work_struct *__work)
r = radeon_fence_wait(work->fence, false); r = radeon_fence_wait(work->fence, false);
if (r == -EDEADLK) { if (r == -EDEADLK) {
up_read(&rdev->exclusive_lock); up_read(&rdev->exclusive_lock);
r = radeon_gpu_reset(rdev); do {
r = radeon_gpu_reset(rdev);
} while (r == -EAGAIN);
down_read(&rdev->exclusive_lock); down_read(&rdev->exclusive_lock);
} }
if (r) if (r)
......
...@@ -97,6 +97,25 @@ static u32 radeon_fence_read(struct radeon_device *rdev, int ring) ...@@ -97,6 +97,25 @@ static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
return seq; return seq;
} }
/**
* radeon_fence_schedule_check - schedule lockup check
*
* @rdev: radeon_device pointer
* @ring: ring index we should work with
*
* Queues a delayed work item to check for lockups.
*/
static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring)
{
/*
* Do not reset the timer here with mod_delayed_work,
* this can livelock in an interaction with TTM delayed destroy.
*/
queue_delayed_work(system_power_efficient_wq,
&rdev->fence_drv[ring].lockup_work,
RADEON_FENCE_JIFFIES_TIMEOUT);
}
/** /**
* radeon_fence_emit - emit a fence on the requested ring * radeon_fence_emit - emit a fence on the requested ring
* *
...@@ -122,19 +141,21 @@ int radeon_fence_emit(struct radeon_device *rdev, ...@@ -122,19 +141,21 @@ int radeon_fence_emit(struct radeon_device *rdev,
(*fence)->ring = ring; (*fence)->ring = ring;
radeon_fence_ring_emit(rdev, ring, *fence); radeon_fence_ring_emit(rdev, ring, *fence);
trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq); trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
radeon_fence_schedule_check(rdev, ring);
return 0; return 0;
} }
/** /**
* radeon_fence_process - process a fence * radeon_fence_activity - check for fence activity
* *
* @rdev: radeon_device pointer * @rdev: radeon_device pointer
* @ring: ring index the fence is associated with * @ring: ring index the fence is associated with
* *
* Checks the current fence value and wakes the fence queue * Checks the current fence value and calculates the last
* if the sequence number has increased (all asics). * signalled fence value. Returns true if activity occured
* on the ring, and the fence_queue should be waken up.
*/ */
void radeon_fence_process(struct radeon_device *rdev, int ring) static bool radeon_fence_activity(struct radeon_device *rdev, int ring)
{ {
uint64_t seq, last_seq, last_emitted; uint64_t seq, last_seq, last_emitted;
unsigned count_loop = 0; unsigned count_loop = 0;
...@@ -190,7 +211,67 @@ void radeon_fence_process(struct radeon_device *rdev, int ring) ...@@ -190,7 +211,67 @@ void radeon_fence_process(struct radeon_device *rdev, int ring)
} }
} while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq); } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
if (wake) if (seq < last_emitted)
radeon_fence_schedule_check(rdev, ring);
return wake;
}
/**
* radeon_fence_check_lockup - check for hardware lockup
*
* @work: delayed work item
*
* Checks for fence activity and if there is none probe
* the hardware if a lockup occured.
*/
static void radeon_fence_check_lockup(struct work_struct *work)
{
struct radeon_fence_driver *fence_drv;
struct radeon_device *rdev;
int ring;
fence_drv = container_of(work, struct radeon_fence_driver,
lockup_work.work);
rdev = fence_drv->rdev;
ring = fence_drv - &rdev->fence_drv[0];
if (!down_read_trylock(&rdev->exclusive_lock)) {
/* just reschedule the check if a reset is going on */
radeon_fence_schedule_check(rdev, ring);
return;
}
if (radeon_fence_activity(rdev, ring))
wake_up_all(&rdev->fence_queue);
else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
/* good news we believe it's a lockup */
dev_warn(rdev->dev, "GPU lockup (current fence id "
"0x%016llx last fence id 0x%016llx on ring %d)\n",
(uint64_t)atomic64_read(&fence_drv->last_seq),
fence_drv->sync_seq[ring], ring);
/* remember that we need an reset */
rdev->needs_reset = true;
wake_up_all(&rdev->fence_queue);
}
up_read(&rdev->exclusive_lock);
}
/**
* radeon_fence_process - process a fence
*
* @rdev: radeon_device pointer
* @ring: ring index the fence is associated with
*
* Checks the current fence value and wakes the fence queue
* if the sequence number has increased (all asics).
*/
void radeon_fence_process(struct radeon_device *rdev, int ring)
{
if (radeon_fence_activity(rdev, ring))
wake_up_all(&rdev->fence_queue); wake_up_all(&rdev->fence_queue);
} }
...@@ -247,16 +328,10 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev, ...@@ -247,16 +328,10 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
*/ */
bool radeon_fence_signaled(struct radeon_fence *fence) bool radeon_fence_signaled(struct radeon_fence *fence)
{ {
if (!fence) { if (!fence)
return true; return true;
} if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring))
if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) {
return true;
}
if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
fence->seq = RADEON_FENCE_SIGNALED_SEQ;
return true; return true;
}
return false; return false;
} }
...@@ -283,110 +358,70 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq) ...@@ -283,110 +358,70 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
} }
/** /**
* radeon_fence_wait_seq - wait for a specific sequence numbers * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers
* *
* @rdev: radeon device pointer * @rdev: radeon device pointer
* @target_seq: sequence number(s) we want to wait for * @target_seq: sequence number(s) we want to wait for
* @intr: use interruptable sleep * @intr: use interruptable sleep
* @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
* *
* Wait for the requested sequence number(s) to be written by any ring * Wait for the requested sequence number(s) to be written by any ring
* (all asics). Sequnce number array is indexed by ring id. * (all asics). Sequnce number array is indexed by ring id.
* @intr selects whether to use interruptable (true) or non-interruptable * @intr selects whether to use interruptable (true) or non-interruptable
* (false) sleep when waiting for the sequence number. Helper function * (false) sleep when waiting for the sequence number. Helper function
* for radeon_fence_wait_*(). * for radeon_fence_wait_*().
* Returns 0 if the sequence number has passed, error for all other cases. * Returns remaining time if the sequence number has passed, 0 when
* the wait timeout, or an error for all other cases.
* -EDEADLK is returned when a GPU lockup has been detected. * -EDEADLK is returned when a GPU lockup has been detected.
*/ */
static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
bool intr) u64 *target_seq, bool intr,
long timeout)
{ {
uint64_t last_seq[RADEON_NUM_RINGS]; long r;
bool signaled; int i;
int i, r;
while (!radeon_fence_any_seq_signaled(rdev, target_seq)) {
/* Save current sequence values, used to check for GPU lockups */ if (radeon_fence_any_seq_signaled(rdev, target_seq))
for (i = 0; i < RADEON_NUM_RINGS; ++i) { return timeout;
if (!target_seq[i])
continue;
last_seq[i] = atomic64_read(&rdev->fence_drv[i].last_seq); /* enable IRQs and tracing */
trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]); for (i = 0; i < RADEON_NUM_RINGS; ++i) {
radeon_irq_kms_sw_irq_get(rdev, i); if (!target_seq[i])
} continue;
if (intr) { trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
r = wait_event_interruptible_timeout(rdev->fence_queue, ( radeon_irq_kms_sw_irq_get(rdev, i);
(signaled = radeon_fence_any_seq_signaled(rdev, target_seq)) }
|| rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT);
} else {
r = wait_event_timeout(rdev->fence_queue, (
(signaled = radeon_fence_any_seq_signaled(rdev, target_seq))
|| rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT);
}
for (i = 0; i < RADEON_NUM_RINGS; ++i) { if (intr) {
if (!target_seq[i]) r = wait_event_interruptible_timeout(rdev->fence_queue, (
continue; radeon_fence_any_seq_signaled(rdev, target_seq)
|| rdev->needs_reset), timeout);
} else {
r = wait_event_timeout(rdev->fence_queue, (
radeon_fence_any_seq_signaled(rdev, target_seq)
|| rdev->needs_reset), timeout);
}
radeon_irq_kms_sw_irq_put(rdev, i); if (rdev->needs_reset)
trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]); r = -EDEADLK;
}
if (unlikely(r < 0)) for (i = 0; i < RADEON_NUM_RINGS; ++i) {
return r; if (!target_seq[i])
continue;
if (unlikely(!signaled)) { radeon_irq_kms_sw_irq_put(rdev, i);
if (rdev->needs_reset) trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
return -EDEADLK;
/* we were interrupted for some reason and fence
* isn't signaled yet, resume waiting */
if (r)
continue;
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
if (!target_seq[i])
continue;
if (last_seq[i] != atomic64_read(&rdev->fence_drv[i].last_seq))
break;
}
if (i != RADEON_NUM_RINGS)
continue;
for (i = 0; i < RADEON_NUM_RINGS; ++i) {
if (!target_seq[i])
continue;
if (radeon_ring_is_lockup(rdev, i, &rdev->ring[i]))
break;
}
if (i < RADEON_NUM_RINGS) {
/* good news we believe it's a lockup */
dev_warn(rdev->dev, "GPU lockup (waiting for "
"0x%016llx last fence id 0x%016llx on"
" ring %d)\n",
target_seq[i], last_seq[i], i);
/* remember that we need an reset */
rdev->needs_reset = true;
wake_up_all(&rdev->fence_queue);
return -EDEADLK;
}
}
} }
return 0;
return r;
} }
/** /**
* radeon_fence_wait - wait for a fence to signal * radeon_fence_wait - wait for a fence to signal
* *
* @fence: radeon fence object * @fence: radeon fence object
* @intr: use interruptable sleep * @intr: use interruptible sleep
* *
* Wait for the requested fence to signal (all asics). * Wait for the requested fence to signal (all asics).
* @intr selects whether to use interruptable (true) or non-interruptable * @intr selects whether to use interruptable (true) or non-interruptable
...@@ -396,7 +431,7 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, ...@@ -396,7 +431,7 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
int radeon_fence_wait(struct radeon_fence *fence, bool intr) int radeon_fence_wait(struct radeon_fence *fence, bool intr)
{ {
uint64_t seq[RADEON_NUM_RINGS] = {}; uint64_t seq[RADEON_NUM_RINGS] = {};
int r; long r;
if (fence == NULL) { if (fence == NULL) {
WARN(1, "Querying an invalid fence : %p !\n", fence); WARN(1, "Querying an invalid fence : %p !\n", fence);
...@@ -404,14 +439,11 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) ...@@ -404,14 +439,11 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
} }
seq[fence->ring] = fence->seq; seq[fence->ring] = fence->seq;
if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ) r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
return 0; if (r < 0) {
r = radeon_fence_wait_seq(fence->rdev, seq, intr);
if (r)
return r; return r;
}
fence->seq = RADEON_FENCE_SIGNALED_SEQ;
return 0; return 0;
} }
...@@ -434,7 +466,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev, ...@@ -434,7 +466,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
{ {
uint64_t seq[RADEON_NUM_RINGS]; uint64_t seq[RADEON_NUM_RINGS];
unsigned i, num_rings = 0; unsigned i, num_rings = 0;
int r; long r;
for (i = 0; i < RADEON_NUM_RINGS; ++i) { for (i = 0; i < RADEON_NUM_RINGS; ++i) {
seq[i] = 0; seq[i] = 0;
...@@ -445,18 +477,14 @@ int radeon_fence_wait_any(struct radeon_device *rdev, ...@@ -445,18 +477,14 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
seq[i] = fences[i]->seq; seq[i] = fences[i]->seq;
++num_rings; ++num_rings;
/* test if something was allready signaled */
if (seq[i] == RADEON_FENCE_SIGNALED_SEQ)
return 0;
} }
/* nothing to wait for ? */ /* nothing to wait for ? */
if (num_rings == 0) if (num_rings == 0)
return -ENOENT; return -ENOENT;
r = radeon_fence_wait_seq(rdev, seq, intr); r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
if (r) { if (r < 0) {
return r; return r;
} }
return 0; return 0;
...@@ -475,6 +503,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev, ...@@ -475,6 +503,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
int radeon_fence_wait_next(struct radeon_device *rdev, int ring) int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
{ {
uint64_t seq[RADEON_NUM_RINGS] = {}; uint64_t seq[RADEON_NUM_RINGS] = {};
long r;
seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL; seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) { if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) {
...@@ -482,7 +511,10 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring) ...@@ -482,7 +511,10 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
already the last emited fence */ already the last emited fence */
return -ENOENT; return -ENOENT;
} }
return radeon_fence_wait_seq(rdev, seq, false); r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
if (r < 0)
return r;
return 0;
} }
/** /**
...@@ -498,18 +530,18 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring) ...@@ -498,18 +530,18 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
int radeon_fence_wait_empty(struct radeon_device *rdev, int ring) int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
{ {
uint64_t seq[RADEON_NUM_RINGS] = {}; uint64_t seq[RADEON_NUM_RINGS] = {};
int r; long r;
seq[ring] = rdev->fence_drv[ring].sync_seq[ring]; seq[ring] = rdev->fence_drv[ring].sync_seq[ring];
if (!seq[ring]) if (!seq[ring])
return 0; return 0;
r = radeon_fence_wait_seq(rdev, seq, false); r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
if (r) { if (r < 0) {
if (r == -EDEADLK) if (r == -EDEADLK)
return -EDEADLK; return -EDEADLK;
dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n", dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n",
ring, r); ring, r);
} }
return 0; return 0;
...@@ -711,6 +743,9 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) ...@@ -711,6 +743,9 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
rdev->fence_drv[ring].sync_seq[i] = 0; rdev->fence_drv[ring].sync_seq[i] = 0;
atomic64_set(&rdev->fence_drv[ring].last_seq, 0); atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
rdev->fence_drv[ring].initialized = false; rdev->fence_drv[ring].initialized = false;
INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work,
radeon_fence_check_lockup);
rdev->fence_drv[ring].rdev = rdev;
} }
/** /**
...@@ -758,8 +793,9 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) ...@@ -758,8 +793,9 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
r = radeon_fence_wait_empty(rdev, ring); r = radeon_fence_wait_empty(rdev, ring);
if (r) { if (r) {
/* no need to trigger GPU reset as we are unloading */ /* no need to trigger GPU reset as we are unloading */
radeon_fence_driver_force_completion(rdev); radeon_fence_driver_force_completion(rdev, ring);
} }
cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
wake_up_all(&rdev->fence_queue); wake_up_all(&rdev->fence_queue);
radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
rdev->fence_drv[ring].initialized = false; rdev->fence_drv[ring].initialized = false;
...@@ -771,18 +807,16 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) ...@@ -771,18 +807,16 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
* radeon_fence_driver_force_completion - force all fence waiter to complete * radeon_fence_driver_force_completion - force all fence waiter to complete
* *
* @rdev: radeon device pointer * @rdev: radeon device pointer
* @ring: the ring to complete
* *
* In case of GPU reset failure make sure no process keep waiting on fence * In case of GPU reset failure make sure no process keep waiting on fence
* that will never complete. * that will never complete.
*/ */
void radeon_fence_driver_force_completion(struct radeon_device *rdev) void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring)
{ {
int ring; if (rdev->fence_drv[ring].initialized) {
for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
if (!rdev->fence_drv[ring].initialized)
continue;
radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring); radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
} }
} }
...@@ -833,6 +867,7 @@ static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data) ...@@ -833,6 +867,7 @@ static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data)
down_read(&rdev->exclusive_lock); down_read(&rdev->exclusive_lock);
seq_printf(m, "%d\n", rdev->needs_reset); seq_printf(m, "%d\n", rdev->needs_reset);
rdev->needs_reset = true; rdev->needs_reset = true;
wake_up_all(&rdev->fence_queue);
up_read(&rdev->exclusive_lock); up_read(&rdev->exclusive_lock);
return 0; return 0;
......
...@@ -269,6 +269,7 @@ int radeon_ib_ring_tests(struct radeon_device *rdev) ...@@ -269,6 +269,7 @@ int radeon_ib_ring_tests(struct radeon_device *rdev)
r = radeon_ib_test(rdev, i, ring); r = radeon_ib_test(rdev, i, ring);
if (r) { if (r) {
radeon_fence_driver_force_completion(rdev, i);
ring->ready = false; ring->ready = false;
rdev->needs_reset = false; rdev->needs_reset = false;
......
...@@ -87,23 +87,6 @@ static void radeon_hotplug_work_func(struct work_struct *work) ...@@ -87,23 +87,6 @@ static void radeon_hotplug_work_func(struct work_struct *work)
drm_helper_hpd_irq_event(dev); drm_helper_hpd_irq_event(dev);
} }
/**
* radeon_irq_reset_work_func - execute gpu reset
*
* @work: work struct
*
* Execute scheduled gpu reset (cayman+).
* This function is called when the irq handler
* thinks we need a gpu reset.
*/
static void radeon_irq_reset_work_func(struct work_struct *work)
{
struct radeon_device *rdev = container_of(work, struct radeon_device,
reset_work);
radeon_gpu_reset(rdev);
}
/** /**
* radeon_driver_irq_preinstall_kms - drm irq preinstall callback * radeon_driver_irq_preinstall_kms - drm irq preinstall callback
* *
...@@ -284,7 +267,6 @@ int radeon_irq_kms_init(struct radeon_device *rdev) ...@@ -284,7 +267,6 @@ int radeon_irq_kms_init(struct radeon_device *rdev)
INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func); INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func);
INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi); INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi);
INIT_WORK(&rdev->reset_work, radeon_irq_reset_work_func);
rdev->irq.installed = true; rdev->irq.installed = true;
r = drm_irq_install(rdev->ddev, rdev->ddev->pdev->irq); r = drm_irq_install(rdev->ddev, rdev->ddev->pdev->irq);
......
...@@ -40,12 +40,18 @@ ...@@ -40,12 +40,18 @@
#define UVD_IDLE_TIMEOUT_MS 1000 #define UVD_IDLE_TIMEOUT_MS 1000
/* Firmware Names */ /* Firmware Names */
#define FIRMWARE_R600 "radeon/R600_uvd.bin"
#define FIRMWARE_RS780 "radeon/RS780_uvd.bin"
#define FIRMWARE_RV770 "radeon/RV770_uvd.bin"
#define FIRMWARE_RV710 "radeon/RV710_uvd.bin" #define FIRMWARE_RV710 "radeon/RV710_uvd.bin"
#define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin" #define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin"
#define FIRMWARE_SUMO "radeon/SUMO_uvd.bin" #define FIRMWARE_SUMO "radeon/SUMO_uvd.bin"
#define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin" #define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin"
#define FIRMWARE_BONAIRE "radeon/BONAIRE_uvd.bin" #define FIRMWARE_BONAIRE "radeon/BONAIRE_uvd.bin"
MODULE_FIRMWARE(FIRMWARE_R600);
MODULE_FIRMWARE(FIRMWARE_RS780);
MODULE_FIRMWARE(FIRMWARE_RV770);
MODULE_FIRMWARE(FIRMWARE_RV710); MODULE_FIRMWARE(FIRMWARE_RV710);
MODULE_FIRMWARE(FIRMWARE_CYPRESS); MODULE_FIRMWARE(FIRMWARE_CYPRESS);
MODULE_FIRMWARE(FIRMWARE_SUMO); MODULE_FIRMWARE(FIRMWARE_SUMO);
...@@ -63,6 +69,23 @@ int radeon_uvd_init(struct radeon_device *rdev) ...@@ -63,6 +69,23 @@ int radeon_uvd_init(struct radeon_device *rdev)
INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler); INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler);
switch (rdev->family) { switch (rdev->family) {
case CHIP_RV610:
case CHIP_RV630:
case CHIP_RV670:
case CHIP_RV620:
case CHIP_RV635:
fw_name = FIRMWARE_R600;
break;
case CHIP_RS780:
case CHIP_RS880:
fw_name = FIRMWARE_RS780;
break;
case CHIP_RV770:
fw_name = FIRMWARE_RV770;
break;
case CHIP_RV710: case CHIP_RV710:
case CHIP_RV730: case CHIP_RV730:
case CHIP_RV740: case CHIP_RV740:
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
* Authors: Christian König <christian.koenig@amd.com> * Authors: Christian König <christian.koenig@amd.com>
*/ */
#include <linux/firmware.h>
#include <drm/drmP.h> #include <drm/drmP.h>
#include "radeon.h" #include "radeon.h"
#include "radeon_asic.h" #include "radeon_asic.h"
...@@ -69,6 +70,82 @@ void uvd_v1_0_set_wptr(struct radeon_device *rdev, ...@@ -69,6 +70,82 @@ void uvd_v1_0_set_wptr(struct radeon_device *rdev,
WREG32(UVD_RBC_RB_WPTR, ring->wptr); WREG32(UVD_RBC_RB_WPTR, ring->wptr);
} }
/**
* uvd_v1_0_fence_emit - emit an fence & trap command
*
* @rdev: radeon_device pointer
* @fence: fence to emit
*
* Write a fence and a trap command to the ring.
*/
void uvd_v1_0_fence_emit(struct radeon_device *rdev,
struct radeon_fence *fence)
{
struct radeon_ring *ring = &rdev->ring[fence->ring];
uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr;
radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
radeon_ring_write(ring, addr & 0xffffffff);
radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
radeon_ring_write(ring, fence->seq);
radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
radeon_ring_write(ring, 0);
radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
radeon_ring_write(ring, 0);
radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
radeon_ring_write(ring, 0);
radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
radeon_ring_write(ring, 2);
return;
}
/**
* uvd_v1_0_resume - memory controller programming
*
* @rdev: radeon_device pointer
*
* Let the UVD memory controller know it's offsets
*/
int uvd_v1_0_resume(struct radeon_device *rdev)
{
uint64_t addr;
uint32_t size;
int r;
r = radeon_uvd_resume(rdev);
if (r)
return r;
/* programm the VCPU memory controller bits 0-27 */
addr = (rdev->uvd.gpu_addr >> 3) + 16;
size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size) >> 3;
WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
WREG32(UVD_VCPU_CACHE_SIZE0, size);
addr += size;
size = RADEON_UVD_STACK_SIZE >> 3;
WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
WREG32(UVD_VCPU_CACHE_SIZE1, size);
addr += size;
size = RADEON_UVD_HEAP_SIZE >> 3;
WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
WREG32(UVD_VCPU_CACHE_SIZE2, size);
/* bits 28-31 */
addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
/* bits 32-39 */
addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
WREG32(UVD_FW_START, *((uint32_t*)rdev->uvd.cpu_addr));
return 0;
}
/** /**
* uvd_v1_0_init - start and test UVD block * uvd_v1_0_init - start and test UVD block
* *
...@@ -130,8 +207,32 @@ int uvd_v1_0_init(struct radeon_device *rdev) ...@@ -130,8 +207,32 @@ int uvd_v1_0_init(struct radeon_device *rdev)
/* lower clocks again */ /* lower clocks again */
radeon_set_uvd_clocks(rdev, 0, 0); radeon_set_uvd_clocks(rdev, 0, 0);
if (!r) if (!r) {
switch (rdev->family) {
case CHIP_RV610:
case CHIP_RV630:
case CHIP_RV620:
/* 64byte granularity workaround */
WREG32(MC_CONFIG, 0);
WREG32(MC_CONFIG, 1 << 4);
WREG32(RS_DQ_RD_RET_CONF, 0x3f);
WREG32(MC_CONFIG, 0x1f);
/* fall through */
case CHIP_RV670:
case CHIP_RV635:
/* write clean workaround */
WREG32_P(UVD_VCPU_CNTL, 0x10, ~0x10);
break;
default:
/* TODO: Do we need more? */
break;
}
DRM_INFO("UVD initialized successfully.\n"); DRM_INFO("UVD initialized successfully.\n");
}
return r; return r;
} }
...@@ -218,12 +319,12 @@ int uvd_v1_0_start(struct radeon_device *rdev) ...@@ -218,12 +319,12 @@ int uvd_v1_0_start(struct radeon_device *rdev)
/* enable UMC */ /* enable UMC */
WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8)); WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8));
WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
/* boot up the VCPU */ /* boot up the VCPU */
WREG32(UVD_SOFT_RESET, 0); WREG32(UVD_SOFT_RESET, 0);
mdelay(10); mdelay(10);
WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
for (i = 0; i < 10; ++i) { for (i = 0; i < 10; ++i) {
uint32_t status; uint32_t status;
for (j = 0; j < 100; ++j) { for (j = 0; j < 100; ++j) {
......
...@@ -72,6 +72,10 @@ int uvd_v2_2_resume(struct radeon_device *rdev) ...@@ -72,6 +72,10 @@ int uvd_v2_2_resume(struct radeon_device *rdev)
uint32_t chip_id, size; uint32_t chip_id, size;
int r; int r;
/* RV770 uses V1.0 MC */
if (rdev->family == CHIP_RV770)
return uvd_v1_0_resume(rdev);
r = radeon_uvd_resume(rdev); r = radeon_uvd_resume(rdev);
if (r) if (r)
return r; return r;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment