Commit 99495589 authored by Dave Airlie's avatar Dave Airlie

Merge branch 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux into drm-next

More fixes for radeon and amdgpu for 4.3:
- Send full DP aux address fixes for radeon and amdgpu
- Fix an HDMI display regression for pre-DCE5 parts
- UVD suspend fixes for amdgpu
- Add an rs480 suspend quirk
- Fix bo reserve handling in amdgpu GEM_OP ioctl
- GPU scheduler fixes
- SDMA optimizations
- MEC fix for Fiji

* 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux: (21 commits)
  drm/amdgpu: set MEC doorbell range for Fiji
  drm/amdgpu: implement burst NOP for SDMA
  drm/amdgpu: add insert_nop ring func and default implementation
  drm/amdgpu: add amdgpu_get_sdma_instance helper function
  drm/amdgpu: add AMDGPU_MAX_SDMA_INSTANCES
  drm/amdgpu: add burst_nop flag for sdma
  drm/amdgpu: add count field for the SDMA NOP packet v2
  drm/amdgpu: use PT for VM sync on unmap
  drm/amdgpu: make wait_event uninterruptible in push_job
  drm/amdgpu: fix amdgpu_bo_unreserve order in GEM_OP IOCTL v2
  drm/amdgpu: partially revert "modify amdgpu_fence_wait_any() to amdgpu_fence_wait_multiple()" v2
  Add radeon suspend/resume quirk for HP Compaq dc5750.
  drm/amdgpu: re-work sync_resv
  drm/amdgpu/atom: Send out the full AUX address
  drm/radeon/native: Send out the full AUX address
  drm/radeon/atom: Send out the full AUX address
  drm/amdgpu: use IB for fill_buffer instead of direct command
  drm/amdgpu: stop trying to suspend UVD sessions v2
  drm/amdgpu: add scheduler dependency callback v2
  drm/amdgpu: let the scheduler work more with jobs v2
  ...
parents 879a37d0 bddf8026
...@@ -98,6 +98,9 @@ extern int amdgpu_sched_hw_submission; ...@@ -98,6 +98,9 @@ extern int amdgpu_sched_hw_submission;
#define AMDGPU_MAX_COMPUTE_RINGS 8 #define AMDGPU_MAX_COMPUTE_RINGS 8
#define AMDGPU_MAX_VCE_RINGS 2 #define AMDGPU_MAX_VCE_RINGS 2
/* max number of IP instances */
#define AMDGPU_MAX_SDMA_INSTANCES 2
/* number of hw syncs before falling back on blocking */ /* number of hw syncs before falling back on blocking */
#define AMDGPU_NUM_SYNCS 4 #define AMDGPU_NUM_SYNCS 4
...@@ -262,7 +265,7 @@ struct amdgpu_buffer_funcs { ...@@ -262,7 +265,7 @@ struct amdgpu_buffer_funcs {
unsigned fill_num_dw; unsigned fill_num_dw;
/* used for buffer clearing */ /* used for buffer clearing */
void (*emit_fill_buffer)(struct amdgpu_ring *ring, void (*emit_fill_buffer)(struct amdgpu_ib *ib,
/* value to write to memory */ /* value to write to memory */
uint32_t src_data, uint32_t src_data,
/* dst addr in bytes */ /* dst addr in bytes */
...@@ -340,6 +343,8 @@ struct amdgpu_ring_funcs { ...@@ -340,6 +343,8 @@ struct amdgpu_ring_funcs {
int (*test_ring)(struct amdgpu_ring *ring); int (*test_ring)(struct amdgpu_ring *ring);
int (*test_ib)(struct amdgpu_ring *ring); int (*test_ib)(struct amdgpu_ring *ring);
bool (*is_lockup)(struct amdgpu_ring *ring); bool (*is_lockup)(struct amdgpu_ring *ring);
/* insert NOP packets */
void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
}; };
/* /*
...@@ -440,12 +445,11 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring); ...@@ -440,12 +445,11 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev, signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
struct fence **array, struct fence **array,
uint32_t count, uint32_t count,
bool wait_all, bool intr,
bool intr, signed long t);
signed long t);
struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence); struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence);
void amdgpu_fence_unref(struct amdgpu_fence **fence); void amdgpu_fence_unref(struct amdgpu_fence **fence);
...@@ -717,6 +721,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, ...@@ -717,6 +721,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
void *owner); void *owner);
int amdgpu_sync_rings(struct amdgpu_sync *sync, int amdgpu_sync_rings(struct amdgpu_sync *sync,
struct amdgpu_ring *ring); struct amdgpu_ring *ring);
struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
int amdgpu_sync_wait(struct amdgpu_sync *sync); int amdgpu_sync_wait(struct amdgpu_sync *sync);
void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync, void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct fence *fence); struct fence *fence);
...@@ -1214,6 +1219,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev); ...@@ -1214,6 +1219,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
void amdgpu_ring_free_size(struct amdgpu_ring *ring); void amdgpu_ring_free_size(struct amdgpu_ring *ring);
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw); int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw);
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_ring_commit(struct amdgpu_ring *ring); void amdgpu_ring_commit(struct amdgpu_ring *ring);
void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring); void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring);
void amdgpu_ring_undo(struct amdgpu_ring *ring); void amdgpu_ring_undo(struct amdgpu_ring *ring);
...@@ -1665,7 +1671,6 @@ struct amdgpu_uvd { ...@@ -1665,7 +1671,6 @@ struct amdgpu_uvd {
struct amdgpu_bo *vcpu_bo; struct amdgpu_bo *vcpu_bo;
void *cpu_addr; void *cpu_addr;
uint64_t gpu_addr; uint64_t gpu_addr;
void *saved_bo;
atomic_t handles[AMDGPU_MAX_UVD_HANDLES]; atomic_t handles[AMDGPU_MAX_UVD_HANDLES];
struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES]; struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES];
struct delayed_work idle_work; struct delayed_work idle_work;
...@@ -1709,6 +1714,7 @@ struct amdgpu_sdma { ...@@ -1709,6 +1714,7 @@ struct amdgpu_sdma {
uint32_t feature_version; uint32_t feature_version;
struct amdgpu_ring ring; struct amdgpu_ring ring;
bool burst_nop;
}; };
/* /*
...@@ -2057,7 +2063,7 @@ struct amdgpu_device { ...@@ -2057,7 +2063,7 @@ struct amdgpu_device {
struct amdgpu_gfx gfx; struct amdgpu_gfx gfx;
/* sdma */ /* sdma */
struct amdgpu_sdma sdma[2]; struct amdgpu_sdma sdma[AMDGPU_MAX_SDMA_INSTANCES];
struct amdgpu_irq_src sdma_trap_irq; struct amdgpu_irq_src sdma_trap_irq;
struct amdgpu_irq_src sdma_illegal_inst_irq; struct amdgpu_irq_src sdma_illegal_inst_irq;
...@@ -2196,6 +2202,21 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) ...@@ -2196,6 +2202,21 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
ring->ring_free_dw--; ring->ring_free_dw--;
} }
static inline struct amdgpu_sdma * amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
int i;
for (i = 0; i < AMDGPU_MAX_SDMA_INSTANCES; i++)
if (&adev->sdma[i].ring == ring)
break;
if (i < AMDGPU_MAX_SDMA_INSTANCES)
return &adev->sdma[i];
else
return NULL;
}
/* /*
* ASICs macro. * ASICs macro.
*/ */
...@@ -2248,7 +2269,7 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) ...@@ -2248,7 +2269,7 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
#define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s)) #define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s))
#define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s)) #define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s))
#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b)) #define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b))
#define amdgpu_emit_fill_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((r), (s), (d), (b)) #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
#define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev)) #define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev))
#define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev)) #define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev))
#define amdgpu_dpm_set_power_state(adev) (adev)->pm.funcs->set_power_state((adev)) #define amdgpu_dpm_set_power_state(adev) (adev)->pm.funcs->set_power_state((adev))
......
...@@ -851,22 +851,6 @@ static bool amdgpu_test_signaled_any(struct fence **fences, uint32_t count) ...@@ -851,22 +851,6 @@ static bool amdgpu_test_signaled_any(struct fence **fences, uint32_t count)
return false; return false;
} }
static bool amdgpu_test_signaled_all(struct fence **fences, uint32_t count)
{
int idx;
struct fence *fence;
for (idx = 0; idx < count; ++idx) {
fence = fences[idx];
if (fence) {
if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
return false;
}
}
return true;
}
struct amdgpu_wait_cb { struct amdgpu_wait_cb {
struct fence_cb base; struct fence_cb base;
struct task_struct *task; struct task_struct *task;
...@@ -885,7 +869,7 @@ static signed long amdgpu_fence_default_wait(struct fence *f, bool intr, ...@@ -885,7 +869,7 @@ static signed long amdgpu_fence_default_wait(struct fence *f, bool intr,
struct amdgpu_fence *fence = to_amdgpu_fence(f); struct amdgpu_fence *fence = to_amdgpu_fence(f);
struct amdgpu_device *adev = fence->ring->adev; struct amdgpu_device *adev = fence->ring->adev;
return amdgpu_fence_wait_multiple(adev, &f, 1, false, intr, t); return amdgpu_fence_wait_any(adev, &f, 1, intr, t);
} }
/** /**
...@@ -894,23 +878,18 @@ static signed long amdgpu_fence_default_wait(struct fence *f, bool intr, ...@@ -894,23 +878,18 @@ static signed long amdgpu_fence_default_wait(struct fence *f, bool intr,
* @adev: amdgpu device * @adev: amdgpu device
* @array: the fence array with amdgpu fence pointer * @array: the fence array with amdgpu fence pointer
* @count: the number of the fence array * @count: the number of the fence array
* @wait_all: the flag of wait all(true) or wait any(false)
* @intr: when sleep, set the current task interruptable or not * @intr: when sleep, set the current task interruptable or not
* @t: timeout to wait * @t: timeout to wait
* *
* If wait_all is true, it will return when all fences are signaled or timeout. * It will return when any fence is signaled or timeout.
* If wait_all is false, it will return when any fence is signaled or timeout.
*/ */
signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev, signed long amdgpu_fence_wait_any(struct amdgpu_device *adev,
struct fence **array, struct fence **array, uint32_t count,
uint32_t count, bool intr, signed long t)
bool wait_all, {
bool intr,
signed long t)
{
long idx = 0;
struct amdgpu_wait_cb *cb; struct amdgpu_wait_cb *cb;
struct fence *fence; struct fence *fence;
unsigned idx;
BUG_ON(!array); BUG_ON(!array);
...@@ -927,10 +906,7 @@ signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev, ...@@ -927,10 +906,7 @@ signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev,
if (fence_add_callback(fence, if (fence_add_callback(fence,
&cb[idx].base, amdgpu_fence_wait_cb)) { &cb[idx].base, amdgpu_fence_wait_cb)) {
/* The fence is already signaled */ /* The fence is already signaled */
if (wait_all) goto fence_rm_cb;
continue;
else
goto fence_rm_cb;
} }
} }
} }
...@@ -945,9 +921,7 @@ signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev, ...@@ -945,9 +921,7 @@ signed long amdgpu_fence_wait_multiple(struct amdgpu_device *adev,
* amdgpu_test_signaled_any must be called after * amdgpu_test_signaled_any must be called after
* set_current_state to prevent a race with wake_up_process * set_current_state to prevent a race with wake_up_process
*/ */
if (!wait_all && amdgpu_test_signaled_any(array, count)) if (amdgpu_test_signaled_any(array, count))
break;
if (wait_all && amdgpu_test_signaled_all(array, count))
break; break;
if (adev->needs_reset) { if (adev->needs_reset) {
......
...@@ -615,6 +615,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, ...@@ -615,6 +615,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT; info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT;
info.domains = robj->initial_domain; info.domains = robj->initial_domain;
info.domain_flags = robj->flags; info.domain_flags = robj->flags;
amdgpu_bo_unreserve(robj);
if (copy_to_user(out, &info, sizeof(info))) if (copy_to_user(out, &info, sizeof(info)))
r = -EFAULT; r = -EFAULT;
break; break;
...@@ -622,17 +623,19 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, ...@@ -622,17 +623,19 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
case AMDGPU_GEM_OP_SET_PLACEMENT: case AMDGPU_GEM_OP_SET_PLACEMENT:
if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm)) { if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm)) {
r = -EPERM; r = -EPERM;
amdgpu_bo_unreserve(robj);
break; break;
} }
robj->initial_domain = args->value & (AMDGPU_GEM_DOMAIN_VRAM | robj->initial_domain = args->value & (AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_GTT |
AMDGPU_GEM_DOMAIN_CPU); AMDGPU_GEM_DOMAIN_CPU);
amdgpu_bo_unreserve(robj);
break; break;
default: default:
amdgpu_bo_unreserve(robj);
r = -EINVAL; r = -EINVAL;
} }
amdgpu_bo_unreserve(robj);
out: out:
drm_gem_object_unreference_unlocked(gobj); drm_gem_object_unreference_unlocked(gobj);
return r; return r;
......
...@@ -131,6 +131,21 @@ int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw) ...@@ -131,6 +131,21 @@ int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw)
return 0; return 0;
} }
/** amdgpu_ring_insert_nop - insert NOP packets
*
* @ring: amdgpu_ring structure holding ring information
* @count: the number of NOP packets to insert
*
* This is the generic insert_nop function for rings except SDMA
*/
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
int i;
for (i = 0; i < count; i++)
amdgpu_ring_write(ring, ring->nop);
}
/** /**
* amdgpu_ring_commit - tell the GPU to execute the new * amdgpu_ring_commit - tell the GPU to execute the new
* commands on the ring buffer * commands on the ring buffer
...@@ -143,10 +158,13 @@ int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw) ...@@ -143,10 +158,13 @@ int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw)
*/ */
void amdgpu_ring_commit(struct amdgpu_ring *ring) void amdgpu_ring_commit(struct amdgpu_ring *ring)
{ {
uint32_t count;
/* We pad to match fetch size */ /* We pad to match fetch size */
while (ring->wptr & ring->align_mask) { count = ring->align_mask + 1 - (ring->wptr & ring->align_mask);
amdgpu_ring_write(ring, ring->nop); count %= ring->align_mask + 1;
} ring->funcs->insert_nop(ring, count);
mb(); mb();
amdgpu_ring_set_wptr(ring); amdgpu_ring_set_wptr(ring);
} }
......
...@@ -367,8 +367,8 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev, ...@@ -367,8 +367,8 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
} while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries)); } while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
spin_unlock(&sa_manager->wq.lock); spin_unlock(&sa_manager->wq.lock);
t = amdgpu_fence_wait_multiple(adev, fences, AMDGPU_MAX_RINGS, false, false, t = amdgpu_fence_wait_any(adev, fences, AMDGPU_MAX_RINGS,
MAX_SCHEDULE_TIMEOUT); false, MAX_SCHEDULE_TIMEOUT);
r = (t > 0) ? 0 : t; r = (t > 0) ? 0 : t;
spin_lock(&sa_manager->wq.lock); spin_lock(&sa_manager->wq.lock);
/* if we have nothing to wait for block */ /* if we have nothing to wait for block */
......
...@@ -27,6 +27,12 @@ ...@@ -27,6 +27,12 @@
#include <drm/drmP.h> #include <drm/drmP.h>
#include "amdgpu.h" #include "amdgpu.h"
static struct fence *amdgpu_sched_dependency(struct amd_sched_job *job)
{
struct amdgpu_job *sched_job = (struct amdgpu_job *)job;
return amdgpu_sync_get_fence(&sched_job->ibs->sync);
}
static struct fence *amdgpu_sched_run_job(struct amd_sched_job *job) static struct fence *amdgpu_sched_run_job(struct amd_sched_job *job)
{ {
struct amdgpu_job *sched_job; struct amdgpu_job *sched_job;
...@@ -75,6 +81,7 @@ static void amdgpu_sched_process_job(struct amd_sched_job *job) ...@@ -75,6 +81,7 @@ static void amdgpu_sched_process_job(struct amd_sched_job *job)
} }
struct amd_sched_backend_ops amdgpu_sched_ops = { struct amd_sched_backend_ops amdgpu_sched_ops = {
.dependency = amdgpu_sched_dependency,
.run_job = amdgpu_sched_run_job, .run_job = amdgpu_sched_run_job,
.process_job = amdgpu_sched_process_job .process_job = amdgpu_sched_process_job
}; };
......
...@@ -142,6 +142,18 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, ...@@ -142,6 +142,18 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
return 0; return 0;
} }
static void *amdgpu_sync_get_owner(struct fence *f)
{
struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
if (s_fence)
return s_fence->owner;
else if (a_fence)
return a_fence->owner;
return AMDGPU_FENCE_OWNER_UNDEFINED;
}
/** /**
* amdgpu_sync_resv - use the semaphores to sync to a reservation object * amdgpu_sync_resv - use the semaphores to sync to a reservation object
* *
...@@ -158,7 +170,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, ...@@ -158,7 +170,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
{ {
struct reservation_object_list *flist; struct reservation_object_list *flist;
struct fence *f; struct fence *f;
struct amdgpu_fence *fence; void *fence_owner;
unsigned i; unsigned i;
int r = 0; int r = 0;
...@@ -176,22 +188,22 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, ...@@ -176,22 +188,22 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
for (i = 0; i < flist->shared_count; ++i) { for (i = 0; i < flist->shared_count; ++i) {
f = rcu_dereference_protected(flist->shared[i], f = rcu_dereference_protected(flist->shared[i],
reservation_object_held(resv)); reservation_object_held(resv));
fence = f ? to_amdgpu_fence(f) : NULL; if (amdgpu_sync_same_dev(adev, f)) {
if (fence && fence->ring->adev == adev) {
/* VM updates are only interesting /* VM updates are only interesting
* for other VM updates and moves. * for other VM updates and moves.
*/ */
fence_owner = amdgpu_sync_get_owner(f);
if ((owner != AMDGPU_FENCE_OWNER_MOVE) && if ((owner != AMDGPU_FENCE_OWNER_MOVE) &&
(fence->owner != AMDGPU_FENCE_OWNER_MOVE) && (fence_owner != AMDGPU_FENCE_OWNER_MOVE) &&
((owner == AMDGPU_FENCE_OWNER_VM) != ((owner == AMDGPU_FENCE_OWNER_VM) !=
(fence->owner == AMDGPU_FENCE_OWNER_VM))) (fence_owner == AMDGPU_FENCE_OWNER_VM)))
continue; continue;
/* Ignore fence from the same owner as /* Ignore fence from the same owner as
* long as it isn't undefined. * long as it isn't undefined.
*/ */
if (owner != AMDGPU_FENCE_OWNER_UNDEFINED && if (owner != AMDGPU_FENCE_OWNER_UNDEFINED &&
fence->owner == owner) fence_owner == owner)
continue; continue;
} }
...@@ -202,6 +214,28 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, ...@@ -202,6 +214,28 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
return r; return r;
} }
struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
{
struct amdgpu_sync_entry *e;
struct hlist_node *tmp;
struct fence *f;
int i;
hash_for_each_safe(sync->fences, i, tmp, e, node) {
f = e->fence;
hash_del(&e->node);
kfree(e);
if (!fence_is_signaled(f))
return f;
fence_put(f);
}
return NULL;
}
int amdgpu_sync_wait(struct amdgpu_sync *sync) int amdgpu_sync_wait(struct amdgpu_sync *sync)
{ {
struct amdgpu_sync_entry *e; struct amdgpu_sync_entry *e;
......
...@@ -221,31 +221,32 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) ...@@ -221,31 +221,32 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
int amdgpu_uvd_suspend(struct amdgpu_device *adev) int amdgpu_uvd_suspend(struct amdgpu_device *adev)
{ {
unsigned size; struct amdgpu_ring *ring = &adev->uvd.ring;
void *ptr; int i, r;
const struct common_firmware_header *hdr;
int i;
if (adev->uvd.vcpu_bo == NULL) if (adev->uvd.vcpu_bo == NULL)
return 0; return 0;
for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
if (atomic_read(&adev->uvd.handles[i])) uint32_t handle = atomic_read(&adev->uvd.handles[i]);
break; if (handle != 0) {
struct fence *fence;
if (i == AMDGPU_MAX_UVD_HANDLES)
return 0;
hdr = (const struct common_firmware_header *)adev->uvd.fw->data; amdgpu_uvd_note_usage(adev);
size = amdgpu_bo_size(adev->uvd.vcpu_bo); r = amdgpu_uvd_get_destroy_msg(ring, handle, &fence);
size -= le32_to_cpu(hdr->ucode_size_bytes); if (r) {
DRM_ERROR("Error destroying UVD (%d)!\n", r);
continue;
}
ptr = adev->uvd.cpu_addr; fence_wait(fence, false);
ptr += le32_to_cpu(hdr->ucode_size_bytes); fence_put(fence);
adev->uvd.saved_bo = kmalloc(size, GFP_KERNEL); adev->uvd.filp[i] = NULL;
memcpy(adev->uvd.saved_bo, ptr, size); atomic_set(&adev->uvd.handles[i], 0);
}
}
return 0; return 0;
} }
...@@ -270,12 +271,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev) ...@@ -270,12 +271,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
ptr = adev->uvd.cpu_addr; ptr = adev->uvd.cpu_addr;
ptr += le32_to_cpu(hdr->ucode_size_bytes); ptr += le32_to_cpu(hdr->ucode_size_bytes);
if (adev->uvd.saved_bo != NULL) { memset(ptr, 0, size);
memcpy(ptr, adev->uvd.saved_bo, size);
kfree(adev->uvd.saved_bo);
adev->uvd.saved_bo = NULL;
} else
memset(ptr, 0, size);
return 0; return 0;
} }
......
...@@ -627,9 +627,14 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev, ...@@ -627,9 +627,14 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
{ {
uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
uint64_t last_pte = ~0, last_dst = ~0; uint64_t last_pte = ~0, last_dst = ~0;
void *owner = AMDGPU_FENCE_OWNER_VM;
unsigned count = 0; unsigned count = 0;
uint64_t addr; uint64_t addr;
/* sync to everything on unmapping */
if (!(flags & AMDGPU_PTE_VALID))
owner = AMDGPU_FENCE_OWNER_UNDEFINED;
/* walk over the address space and update the page tables */ /* walk over the address space and update the page tables */
for (addr = start; addr < end; ) { for (addr = start; addr < end; ) {
uint64_t pt_idx = addr >> amdgpu_vm_block_size; uint64_t pt_idx = addr >> amdgpu_vm_block_size;
...@@ -638,8 +643,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev, ...@@ -638,8 +643,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
uint64_t pte; uint64_t pte;
int r; int r;
amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv, amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv, owner);
AMDGPU_FENCE_OWNER_VM);
r = reservation_object_reserve_shared(pt->tbo.resv); r = reservation_object_reserve_shared(pt->tbo.resv);
if (r) if (r)
return r; return r;
...@@ -790,17 +794,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ...@@ -790,17 +794,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
ib->length_dw = 0; ib->length_dw = 0;
if (!(flags & AMDGPU_PTE_VALID)) {
unsigned i;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_fence *f = vm->ids[i].last_id_use;
r = amdgpu_sync_fence(adev, &ib->sync, &f->base);
if (r)
return r;
}
}
r = amdgpu_vm_update_ptes(adev, vm, ib, mapping->it.start, r = amdgpu_vm_update_ptes(adev, vm, ib, mapping->it.start,
mapping->it.last + 1, addr + mapping->offset, mapping->it.last + 1, addr + mapping->offset,
flags, gtt_flags); flags, gtt_flags);
......
...@@ -139,7 +139,8 @@ amdgpu_atombios_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *m ...@@ -139,7 +139,8 @@ amdgpu_atombios_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *m
tx_buf[0] = msg->address & 0xff; tx_buf[0] = msg->address & 0xff;
tx_buf[1] = msg->address >> 8; tx_buf[1] = msg->address >> 8;
tx_buf[2] = msg->request << 4; tx_buf[2] = (msg->request << 4) |
((msg->address >> 16) & 0xf);
tx_buf[3] = msg->size ? (msg->size - 1) : 0; tx_buf[3] = msg->size ? (msg->size - 1) : 0;
switch (msg->request & ~DP_AUX_I2C_MOT) { switch (msg->request & ~DP_AUX_I2C_MOT) {
......
...@@ -188,6 +188,19 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring) ...@@ -188,6 +188,19 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc);
} }
static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring);
int i;
for (i = 0; i < count; i++)
if (sdma && sdma->burst_nop && (i == 0))
amdgpu_ring_write(ring, ring->nop |
SDMA_NOP_COUNT(count - 1));
else
amdgpu_ring_write(ring, ring->nop);
}
/** /**
* cik_sdma_ring_emit_ib - Schedule an IB on the DMA engine * cik_sdma_ring_emit_ib - Schedule an IB on the DMA engine
* *
...@@ -213,8 +226,8 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, ...@@ -213,8 +226,8 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, next_rptr); amdgpu_ring_write(ring, next_rptr);
/* IB packet must end on a 8 DW boundary */ /* IB packet must end on a 8 DW boundary */
while ((ring->wptr & 7) != 4) cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8);
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
...@@ -501,6 +514,8 @@ static int cik_sdma_load_microcode(struct amdgpu_device *adev) ...@@ -501,6 +514,8 @@ static int cik_sdma_load_microcode(struct amdgpu_device *adev)
fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
if (adev->sdma[i].feature_version >= 20)
adev->sdma[i].burst_nop = true;
fw_data = (const __le32 *) fw_data = (const __le32 *)
(adev->sdma[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); (adev->sdma[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0); WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0);
...@@ -815,8 +830,19 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, ...@@ -815,8 +830,19 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib,
*/ */
static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib) static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib)
{ {
while (ib->length_dw & 0x7) struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring);
ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); u32 pad_count;
int i;
pad_count = (8 - (ib->length_dw & 0x7)) % 8;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0) |
SDMA_NOP_COUNT(pad_count - 1);
else
ib->ptr[ib->length_dw++] =
SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
} }
/** /**
...@@ -1303,6 +1329,7 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = { ...@@ -1303,6 +1329,7 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
.test_ring = cik_sdma_ring_test_ring, .test_ring = cik_sdma_ring_test_ring,
.test_ib = cik_sdma_ring_test_ib, .test_ib = cik_sdma_ring_test_ib,
.is_lockup = cik_sdma_ring_is_lockup, .is_lockup = cik_sdma_ring_is_lockup,
.insert_nop = cik_sdma_ring_insert_nop,
}; };
static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev) static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
...@@ -1363,16 +1390,16 @@ static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib, ...@@ -1363,16 +1390,16 @@ static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib,
* *
* Fill GPU buffers using the DMA engine (CIK). * Fill GPU buffers using the DMA engine (CIK).
*/ */
static void cik_sdma_emit_fill_buffer(struct amdgpu_ring *ring, static void cik_sdma_emit_fill_buffer(struct amdgpu_ib *ib,
uint32_t src_data, uint32_t src_data,
uint64_t dst_offset, uint64_t dst_offset,
uint32_t byte_count) uint32_t byte_count)
{ {
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0)); ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0);
amdgpu_ring_write(ring, lower_32_bits(dst_offset)); ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
amdgpu_ring_write(ring, upper_32_bits(dst_offset)); ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
amdgpu_ring_write(ring, src_data); ib->ptr[ib->length_dw++] = src_data;
amdgpu_ring_write(ring, byte_count); ib->ptr[ib->length_dw++] = byte_count;
} }
static const struct amdgpu_buffer_funcs cik_sdma_buffer_funcs = { static const struct amdgpu_buffer_funcs cik_sdma_buffer_funcs = {
......
...@@ -487,6 +487,7 @@ ...@@ -487,6 +487,7 @@
(((op) & 0xFF) << 0)) (((op) & 0xFF) << 0))
/* sDMA opcodes */ /* sDMA opcodes */
#define SDMA_OPCODE_NOP 0 #define SDMA_OPCODE_NOP 0
# define SDMA_NOP_COUNT(x) (((x) & 0x3FFF) << 16)
#define SDMA_OPCODE_COPY 1 #define SDMA_OPCODE_COPY 1
# define SDMA_COPY_SUB_OPCODE_LINEAR 0 # define SDMA_COPY_SUB_OPCODE_LINEAR 0
# define SDMA_COPY_SUB_OPCODE_TILED 1 # define SDMA_COPY_SUB_OPCODE_TILED 1
......
...@@ -5598,6 +5598,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { ...@@ -5598,6 +5598,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
.test_ring = gfx_v7_0_ring_test_ring, .test_ring = gfx_v7_0_ring_test_ring,
.test_ib = gfx_v7_0_ring_test_ib, .test_ib = gfx_v7_0_ring_test_ib,
.is_lockup = gfx_v7_0_ring_is_lockup, .is_lockup = gfx_v7_0_ring_is_lockup,
.insert_nop = amdgpu_ring_insert_nop,
}; };
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
...@@ -5614,6 +5615,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { ...@@ -5614,6 +5615,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
.test_ring = gfx_v7_0_ring_test_ring, .test_ring = gfx_v7_0_ring_test_ring,
.test_ib = gfx_v7_0_ring_test_ib, .test_ib = gfx_v7_0_ring_test_ib,
.is_lockup = gfx_v7_0_ring_is_lockup, .is_lockup = gfx_v7_0_ring_is_lockup,
.insert_nop = amdgpu_ring_insert_nop,
}; };
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev) static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
......
...@@ -3240,7 +3240,8 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) ...@@ -3240,7 +3240,8 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
/* enable the doorbell if requested */ /* enable the doorbell if requested */
if (use_doorbell) { if (use_doorbell) {
if (adev->asic_type == CHIP_CARRIZO) { if ((adev->asic_type == CHIP_CARRIZO) ||
(adev->asic_type == CHIP_FIJI)) {
WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
AMDGPU_DOORBELL_KIQ << 2); AMDGPU_DOORBELL_KIQ << 2);
WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
...@@ -4378,6 +4379,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { ...@@ -4378,6 +4379,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.test_ring = gfx_v8_0_ring_test_ring, .test_ring = gfx_v8_0_ring_test_ring,
.test_ib = gfx_v8_0_ring_test_ib, .test_ib = gfx_v8_0_ring_test_ib,
.is_lockup = gfx_v8_0_ring_is_lockup, .is_lockup = gfx_v8_0_ring_is_lockup,
.insert_nop = amdgpu_ring_insert_nop,
}; };
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
...@@ -4394,6 +4396,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { ...@@ -4394,6 +4396,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.test_ring = gfx_v8_0_ring_test_ring, .test_ring = gfx_v8_0_ring_test_ring,
.test_ib = gfx_v8_0_ring_test_ib, .test_ib = gfx_v8_0_ring_test_ib,
.is_lockup = gfx_v8_0_ring_is_lockup, .is_lockup = gfx_v8_0_ring_is_lockup,
.insert_nop = amdgpu_ring_insert_nop,
}; };
static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
......
...@@ -2163,5 +2163,10 @@ ...@@ -2163,5 +2163,10 @@
#define SDMA_PKT_NOP_HEADER_sub_op_shift 8 #define SDMA_PKT_NOP_HEADER_sub_op_shift 8
#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift) #define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift)
/*define for count field*/
#define SDMA_PKT_NOP_HEADER_count_offset 0
#define SDMA_PKT_NOP_HEADER_count_mask 0x00003FFF
#define SDMA_PKT_NOP_HEADER_count_shift 16
#define SDMA_PKT_NOP_HEADER_COUNT(x) (((x) & SDMA_PKT_NOP_HEADER_count_mask) << SDMA_PKT_NOP_HEADER_count_shift)
#endif /* __ICELAND_SDMA_PKT_OPEN_H_ */ #endif /* __ICELAND_SDMA_PKT_OPEN_H_ */
...@@ -146,6 +146,8 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) ...@@ -146,6 +146,8 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data; hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data;
adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
if (adev->sdma[i].feature_version >= 20)
adev->sdma[i].burst_nop = true;
if (adev->firmware.smu_load) { if (adev->firmware.smu_load) {
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
...@@ -218,6 +220,19 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring) ...@@ -218,6 +220,19 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2); WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2);
} }
static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring);
int i;
for (i = 0; i < count; i++)
if (sdma && sdma->burst_nop && (i == 0))
amdgpu_ring_write(ring, ring->nop |
SDMA_PKT_NOP_HEADER_COUNT(count - 1));
else
amdgpu_ring_write(ring, ring->nop);
}
/** /**
* sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine * sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine
* *
...@@ -245,8 +260,8 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, ...@@ -245,8 +260,8 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, next_rptr); amdgpu_ring_write(ring, next_rptr);
/* IB packet must end on a 8 DW boundary */ /* IB packet must end on a 8 DW boundary */
while ((ring->wptr & 7) != 2) sdma_v2_4_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_NOP));
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
/* base must be 32 byte aligned */ /* base must be 32 byte aligned */
...@@ -879,8 +894,19 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, ...@@ -879,8 +894,19 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib,
*/ */
static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib) static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib)
{ {
while (ib->length_dw & 0x7) struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring);
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); u32 pad_count;
int i;
pad_count = (8 - (ib->length_dw & 0x7)) % 8;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
else
ib->ptr[ib->length_dw++] =
SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
} }
/** /**
...@@ -1314,6 +1340,7 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { ...@@ -1314,6 +1340,7 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
.test_ring = sdma_v2_4_ring_test_ring, .test_ring = sdma_v2_4_ring_test_ring,
.test_ib = sdma_v2_4_ring_test_ib, .test_ib = sdma_v2_4_ring_test_ib,
.is_lockup = sdma_v2_4_ring_is_lockup, .is_lockup = sdma_v2_4_ring_is_lockup,
.insert_nop = sdma_v2_4_ring_insert_nop,
}; };
static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev) static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
...@@ -1375,16 +1402,16 @@ static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, ...@@ -1375,16 +1402,16 @@ static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,
* *
* Fill GPU buffers using the DMA engine (VI). * Fill GPU buffers using the DMA engine (VI).
*/ */
static void sdma_v2_4_emit_fill_buffer(struct amdgpu_ring *ring, static void sdma_v2_4_emit_fill_buffer(struct amdgpu_ib *ib,
uint32_t src_data, uint32_t src_data,
uint64_t dst_offset, uint64_t dst_offset,
uint32_t byte_count) uint32_t byte_count)
{ {
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL)); ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
amdgpu_ring_write(ring, lower_32_bits(dst_offset)); ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
amdgpu_ring_write(ring, upper_32_bits(dst_offset)); ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
amdgpu_ring_write(ring, src_data); ib->ptr[ib->length_dw++] = src_data;
amdgpu_ring_write(ring, byte_count); ib->ptr[ib->length_dw++] = byte_count;
} }
static const struct amdgpu_buffer_funcs sdma_v2_4_buffer_funcs = { static const struct amdgpu_buffer_funcs sdma_v2_4_buffer_funcs = {
......
...@@ -218,6 +218,8 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) ...@@ -218,6 +218,8 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data; hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data;
adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
if (adev->sdma[i].feature_version >= 20)
adev->sdma[i].burst_nop = true;
if (adev->firmware.smu_load) { if (adev->firmware.smu_load) {
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
...@@ -304,6 +306,19 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) ...@@ -304,6 +306,19 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
} }
} }
static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring);
int i;
for (i = 0; i < count; i++)
if (sdma && sdma->burst_nop && (i == 0))
amdgpu_ring_write(ring, ring->nop |
SDMA_PKT_NOP_HEADER_COUNT(count - 1));
else
amdgpu_ring_write(ring, ring->nop);
}
/** /**
* sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine * sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine
* *
...@@ -330,8 +345,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, ...@@ -330,8 +345,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, next_rptr); amdgpu_ring_write(ring, next_rptr);
/* IB packet must end on a 8 DW boundary */ /* IB packet must end on a 8 DW boundary */
while ((ring->wptr & 7) != 2) sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_NOP));
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
...@@ -999,8 +1013,19 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, ...@@ -999,8 +1013,19 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib,
*/ */
static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib) static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib)
{ {
while (ib->length_dw & 0x7) struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring);
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); u32 pad_count;
int i;
pad_count = (8 - (ib->length_dw & 0x7)) % 8;
for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] =
SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
else
ib->ptr[ib->length_dw++] =
SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
} }
/** /**
...@@ -1438,6 +1463,7 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { ...@@ -1438,6 +1463,7 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
.test_ring = sdma_v3_0_ring_test_ring, .test_ring = sdma_v3_0_ring_test_ring,
.test_ib = sdma_v3_0_ring_test_ib, .test_ib = sdma_v3_0_ring_test_ib,
.is_lockup = sdma_v3_0_ring_is_lockup, .is_lockup = sdma_v3_0_ring_is_lockup,
.insert_nop = sdma_v3_0_ring_insert_nop,
}; };
static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev) static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)
...@@ -1499,16 +1525,16 @@ static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib, ...@@ -1499,16 +1525,16 @@ static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib,
* *
* Fill GPU buffers using the DMA engine (VI). * Fill GPU buffers using the DMA engine (VI).
*/ */
static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ring *ring, static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ib *ib,
uint32_t src_data, uint32_t src_data,
uint64_t dst_offset, uint64_t dst_offset,
uint32_t byte_count) uint32_t byte_count)
{ {
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL)); ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
amdgpu_ring_write(ring, lower_32_bits(dst_offset)); ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
amdgpu_ring_write(ring, upper_32_bits(dst_offset)); ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
amdgpu_ring_write(ring, src_data); ib->ptr[ib->length_dw++] = src_data;
amdgpu_ring_write(ring, byte_count); ib->ptr[ib->length_dw++] = byte_count;
} }
static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = { static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = {
......
...@@ -2236,5 +2236,10 @@ ...@@ -2236,5 +2236,10 @@
#define SDMA_PKT_NOP_HEADER_sub_op_shift 8 #define SDMA_PKT_NOP_HEADER_sub_op_shift 8
#define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift) #define SDMA_PKT_NOP_HEADER_SUB_OP(x) (((x) & SDMA_PKT_NOP_HEADER_sub_op_mask) << SDMA_PKT_NOP_HEADER_sub_op_shift)
/*define for count field*/
#define SDMA_PKT_NOP_HEADER_count_offset 0
#define SDMA_PKT_NOP_HEADER_count_mask 0x00003FFF
#define SDMA_PKT_NOP_HEADER_count_shift 16
#define SDMA_PKT_NOP_HEADER_COUNT(x) (((x) & SDMA_PKT_NOP_HEADER_count_mask) << SDMA_PKT_NOP_HEADER_count_shift)
#endif /* __TONGA_SDMA_PKT_OPEN_H_ */ #endif /* __TONGA_SDMA_PKT_OPEN_H_ */
...@@ -886,6 +886,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { ...@@ -886,6 +886,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
.test_ring = uvd_v4_2_ring_test_ring, .test_ring = uvd_v4_2_ring_test_ring,
.test_ib = uvd_v4_2_ring_test_ib, .test_ib = uvd_v4_2_ring_test_ib,
.is_lockup = amdgpu_ring_test_lockup, .is_lockup = amdgpu_ring_test_lockup,
.insert_nop = amdgpu_ring_insert_nop,
}; };
static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev) static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev)
......
...@@ -825,6 +825,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { ...@@ -825,6 +825,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
.test_ring = uvd_v5_0_ring_test_ring, .test_ring = uvd_v5_0_ring_test_ring,
.test_ib = uvd_v5_0_ring_test_ib, .test_ib = uvd_v5_0_ring_test_ib,
.is_lockup = amdgpu_ring_test_lockup, .is_lockup = amdgpu_ring_test_lockup,
.insert_nop = amdgpu_ring_insert_nop,
}; };
static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev) static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev)
......
...@@ -805,6 +805,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_funcs = { ...@@ -805,6 +805,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_funcs = {
.test_ring = uvd_v6_0_ring_test_ring, .test_ring = uvd_v6_0_ring_test_ring,
.test_ib = uvd_v6_0_ring_test_ib, .test_ib = uvd_v6_0_ring_test_ib,
.is_lockup = amdgpu_ring_test_lockup, .is_lockup = amdgpu_ring_test_lockup,
.insert_nop = amdgpu_ring_insert_nop,
}; };
static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev) static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev)
......
...@@ -643,6 +643,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = { ...@@ -643,6 +643,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {
.test_ring = amdgpu_vce_ring_test_ring, .test_ring = amdgpu_vce_ring_test_ring,
.test_ib = amdgpu_vce_ring_test_ib, .test_ib = amdgpu_vce_ring_test_ib,
.is_lockup = amdgpu_ring_test_lockup, .is_lockup = amdgpu_ring_test_lockup,
.insert_nop = amdgpu_ring_insert_nop,
}; };
static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev) static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev)
......
...@@ -608,6 +608,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_funcs = { ...@@ -608,6 +608,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_funcs = {
.test_ring = amdgpu_vce_ring_test_ring, .test_ring = amdgpu_vce_ring_test_ring,
.test_ib = amdgpu_vce_ring_test_ib, .test_ib = amdgpu_vce_ring_test_ib,
.is_lockup = amdgpu_ring_test_lockup, .is_lockup = amdgpu_ring_test_lockup,
.insert_nop = amdgpu_ring_insert_nop,
}; };
static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev) static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
......
...@@ -27,6 +27,8 @@ ...@@ -27,6 +27,8 @@
#include <drm/drmP.h> #include <drm/drmP.h>
#include "gpu_scheduler.h" #include "gpu_scheduler.h"
static struct amd_sched_job *
amd_sched_entity_pop_job(struct amd_sched_entity *entity);
static void amd_sched_wakeup(struct amd_gpu_scheduler *sched); static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
/* Initialize a given run queue struct */ /* Initialize a given run queue struct */
...@@ -56,34 +58,36 @@ static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq, ...@@ -56,34 +58,36 @@ static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq,
} }
/** /**
* Select next entity from a specified run queue with round robin policy. * Select next job from a specified run queue with round robin policy.
* It could return the same entity as current one if current is the only * Return NULL if nothing available.
* available one in the queue. Return NULL if nothing available.
*/ */
static struct amd_sched_entity * static struct amd_sched_job *
amd_sched_rq_select_entity(struct amd_sched_rq *rq) amd_sched_rq_select_job(struct amd_sched_rq *rq)
{ {
struct amd_sched_entity *entity; struct amd_sched_entity *entity;
struct amd_sched_job *job;
spin_lock(&rq->lock); spin_lock(&rq->lock);
entity = rq->current_entity; entity = rq->current_entity;
if (entity) { if (entity) {
list_for_each_entry_continue(entity, &rq->entities, list) { list_for_each_entry_continue(entity, &rq->entities, list) {
if (!kfifo_is_empty(&entity->job_queue)) { job = amd_sched_entity_pop_job(entity);
if (job) {
rq->current_entity = entity; rq->current_entity = entity;
spin_unlock(&rq->lock); spin_unlock(&rq->lock);
return rq->current_entity; return job;
} }
} }
} }
list_for_each_entry(entity, &rq->entities, list) { list_for_each_entry(entity, &rq->entities, list) {
if (!kfifo_is_empty(&entity->job_queue)) { job = amd_sched_entity_pop_job(entity);
if (job) {
rq->current_entity = entity; rq->current_entity = entity;
spin_unlock(&rq->lock); spin_unlock(&rq->lock);
return rq->current_entity; return job;
} }
if (entity == rq->current_entity) if (entity == rq->current_entity)
...@@ -188,6 +192,39 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, ...@@ -188,6 +192,39 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
kfifo_free(&entity->job_queue); kfifo_free(&entity->job_queue);
} }
static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb)
{
struct amd_sched_entity *entity =
container_of(cb, struct amd_sched_entity, cb);
entity->dependency = NULL;
fence_put(f);
amd_sched_wakeup(entity->scheduler);
}
static struct amd_sched_job *
amd_sched_entity_pop_job(struct amd_sched_entity *entity)
{
struct amd_gpu_scheduler *sched = entity->scheduler;
struct amd_sched_job *job;
if (ACCESS_ONCE(entity->dependency))
return NULL;
if (!kfifo_out_peek(&entity->job_queue, &job, sizeof(job)))
return NULL;
while ((entity->dependency = sched->ops->dependency(job))) {
if (fence_add_callback(entity->dependency, &entity->cb,
amd_sched_entity_wakeup))
fence_put(entity->dependency);
else
return NULL;
}
return job;
}
/** /**
* Helper to submit a job to the job queue * Helper to submit a job to the job queue
* *
...@@ -227,7 +264,6 @@ int amd_sched_entity_push_job(struct amd_sched_job *sched_job) ...@@ -227,7 +264,6 @@ int amd_sched_entity_push_job(struct amd_sched_job *sched_job)
struct amd_sched_entity *entity = sched_job->s_entity; struct amd_sched_entity *entity = sched_job->s_entity;
struct amd_sched_fence *fence = amd_sched_fence_create( struct amd_sched_fence *fence = amd_sched_fence_create(
entity, sched_job->owner); entity, sched_job->owner);
int r;
if (!fence) if (!fence)
return -ENOMEM; return -ENOMEM;
...@@ -235,10 +271,10 @@ int amd_sched_entity_push_job(struct amd_sched_job *sched_job) ...@@ -235,10 +271,10 @@ int amd_sched_entity_push_job(struct amd_sched_job *sched_job)
fence_get(&fence->base); fence_get(&fence->base);
sched_job->s_fence = fence; sched_job->s_fence = fence;
r = wait_event_interruptible(entity->scheduler->job_scheduled, wait_event(entity->scheduler->job_scheduled,
amd_sched_entity_in(sched_job)); amd_sched_entity_in(sched_job));
return r; return 0;
} }
/** /**
...@@ -260,22 +296,22 @@ static void amd_sched_wakeup(struct amd_gpu_scheduler *sched) ...@@ -260,22 +296,22 @@ static void amd_sched_wakeup(struct amd_gpu_scheduler *sched)
} }
/** /**
* Select next entity containing real IB submissions * Select next to run
*/ */
static struct amd_sched_entity * static struct amd_sched_job *
amd_sched_select_context(struct amd_gpu_scheduler *sched) amd_sched_select_job(struct amd_gpu_scheduler *sched)
{ {
struct amd_sched_entity *tmp; struct amd_sched_job *job;
if (!amd_sched_ready(sched)) if (!amd_sched_ready(sched))
return NULL; return NULL;
/* Kernel run queue has higher priority than normal run queue*/ /* Kernel run queue has higher priority than normal run queue*/
tmp = amd_sched_rq_select_entity(&sched->kernel_rq); job = amd_sched_rq_select_job(&sched->kernel_rq);
if (tmp == NULL) if (job == NULL)
tmp = amd_sched_rq_select_entity(&sched->sched_rq); job = amd_sched_rq_select_job(&sched->sched_rq);
return tmp; return job;
} }
static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
...@@ -301,22 +337,19 @@ static int amd_sched_main(void *param) ...@@ -301,22 +337,19 @@ static int amd_sched_main(void *param)
sched_setscheduler(current, SCHED_FIFO, &sparam); sched_setscheduler(current, SCHED_FIFO, &sparam);
while (!kthread_should_stop()) { while (!kthread_should_stop()) {
struct amd_sched_entity *c_entity = NULL; struct amd_sched_entity *entity;
struct amd_sched_job *job; struct amd_sched_job *job;
struct fence *fence; struct fence *fence;
wait_event_interruptible(sched->wake_up_worker, wait_event_interruptible(sched->wake_up_worker,
kthread_should_stop() || kthread_should_stop() ||
(c_entity = amd_sched_select_context(sched))); (job = amd_sched_select_job(sched)));
if (!c_entity) if (!job)
continue; continue;
r = kfifo_out(&c_entity->job_queue, &job, sizeof(void *)); entity = job->s_entity;
if (r != sizeof(void *))
continue;
atomic_inc(&sched->hw_rq_count); atomic_inc(&sched->hw_rq_count);
fence = sched->ops->run_job(job); fence = sched->ops->run_job(job);
if (fence) { if (fence) {
r = fence_add_callback(fence, &job->cb, r = fence_add_callback(fence, &job->cb,
...@@ -328,6 +361,7 @@ static int amd_sched_main(void *param) ...@@ -328,6 +361,7 @@ static int amd_sched_main(void *param)
fence_put(fence); fence_put(fence);
} }
kfifo_out(&entity->job_queue, &job, sizeof(job));
wake_up(&sched->job_scheduled); wake_up(&sched->job_scheduled);
} }
return 0; return 0;
......
...@@ -45,6 +45,8 @@ struct amd_sched_entity { ...@@ -45,6 +45,8 @@ struct amd_sched_entity {
spinlock_t queue_lock; spinlock_t queue_lock;
struct amd_gpu_scheduler *scheduler; struct amd_gpu_scheduler *scheduler;
uint64_t fence_context; uint64_t fence_context;
struct fence *dependency;
struct fence_cb cb;
}; };
/** /**
...@@ -89,6 +91,7 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f) ...@@ -89,6 +91,7 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f)
* these functions should be implemented in driver side * these functions should be implemented in driver side
*/ */
struct amd_sched_backend_ops { struct amd_sched_backend_ops {
struct fence *(*dependency)(struct amd_sched_job *job);
struct fence *(*run_job)(struct amd_sched_job *job); struct fence *(*run_job)(struct amd_sched_job *job);
void (*process_job)(struct amd_sched_job *job); void (*process_job)(struct amd_sched_job *job);
}; };
......
...@@ -171,8 +171,9 @@ radeon_dp_aux_transfer_atom(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) ...@@ -171,8 +171,9 @@ radeon_dp_aux_transfer_atom(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
return -E2BIG; return -E2BIG;
tx_buf[0] = msg->address & 0xff; tx_buf[0] = msg->address & 0xff;
tx_buf[1] = msg->address >> 8; tx_buf[1] = (msg->address >> 8) & 0xff;
tx_buf[2] = msg->request << 4; tx_buf[2] = (msg->request << 4) |
((msg->address >> 16) & 0xf);
tx_buf[3] = msg->size ? (msg->size - 1) : 0; tx_buf[3] = msg->size ? (msg->size - 1) : 0;
switch (msg->request & ~DP_AUX_I2C_MOT) { switch (msg->request & ~DP_AUX_I2C_MOT) {
......
...@@ -522,13 +522,15 @@ static int radeon_audio_set_avi_packet(struct drm_encoder *encoder, ...@@ -522,13 +522,15 @@ static int radeon_audio_set_avi_packet(struct drm_encoder *encoder,
return err; return err;
} }
if (drm_rgb_quant_range_selectable(radeon_connector_edid(connector))) { if (radeon_encoder->output_csc != RADEON_OUTPUT_CSC_BYPASS) {
if (radeon_encoder->output_csc == RADEON_OUTPUT_CSC_TVRGB) if (drm_rgb_quant_range_selectable(radeon_connector_edid(connector))) {
frame.quantization_range = HDMI_QUANTIZATION_RANGE_LIMITED; if (radeon_encoder->output_csc == RADEON_OUTPUT_CSC_TVRGB)
else frame.quantization_range = HDMI_QUANTIZATION_RANGE_LIMITED;
frame.quantization_range = HDMI_QUANTIZATION_RANGE_FULL; else
} else { frame.quantization_range = HDMI_QUANTIZATION_RANGE_FULL;
frame.quantization_range = HDMI_QUANTIZATION_RANGE_DEFAULT; } else {
frame.quantization_range = HDMI_QUANTIZATION_RANGE_DEFAULT;
}
} }
err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer)); err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer));
......
...@@ -3387,6 +3387,14 @@ void radeon_combios_asic_init(struct drm_device *dev) ...@@ -3387,6 +3387,14 @@ void radeon_combios_asic_init(struct drm_device *dev)
rdev->pdev->subsystem_device == 0x30ae) rdev->pdev->subsystem_device == 0x30ae)
return; return;
/* quirk for rs4xx HP Compaq dc5750 Small Form Factor to make it resume
* - it hangs on resume inside the dynclk 1 table.
*/
if (rdev->family == CHIP_RS480 &&
rdev->pdev->subsystem_vendor == 0x103c &&
rdev->pdev->subsystem_device == 0x280a)
return;
/* DYN CLK 1 */ /* DYN CLK 1 */
table = combios_get_table_offset(dev, COMBIOS_DYN_CLK_1_TABLE); table = combios_get_table_offset(dev, COMBIOS_DYN_CLK_1_TABLE);
if (table) if (table)
......
...@@ -116,8 +116,8 @@ radeon_dp_aux_transfer_native(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg ...@@ -116,8 +116,8 @@ radeon_dp_aux_transfer_native(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg
AUX_SW_WR_BYTES(bytes)); AUX_SW_WR_BYTES(bytes));
/* write the data header into the registers */ /* write the data header into the registers */
/* request, addres, msg size */ /* request, address, msg size */
byte = (msg->request << 4); byte = (msg->request << 4) | ((msg->address >> 16) & 0xf);
WREG32(AUX_SW_DATA + aux_offset[instance], WREG32(AUX_SW_DATA + aux_offset[instance],
AUX_SW_DATA_MASK(byte) | AUX_SW_AUTOINCREMENT_DISABLE); AUX_SW_DATA_MASK(byte) | AUX_SW_AUTOINCREMENT_DISABLE);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment