Commit 84807727 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'drm-next-4.2' of git://people.freedesktop.org/~agd5f/linux

Pull radeon and amdgpu fixes from Alex Deucher:
 "First round of fixes for 4.2 for radeon and amdgpu.  Stuff all over
  the place:

   - hibernation, suspend fixes for radeon and amdgpu
   - radeon audio fix
   - amdgpu ioctl optimzations and fixes
   - amdgpu VCE cs checker improvements
   - misc bug fixes"

[ Dave on vacation, pulling directly ]

* 'drm-next-4.2' of git://people.freedesktop.org/~agd5f/linux: (30 commits)
  drm/radeon: only check the sink type on DP connectors
  drm/amdgpu: add flag to delay VM updates
  drm/amdgpu: add optional dependencies to the CS IOCTL v2
  drm/amdgpu: recreate fence from user seq
  gpu/drm/amdgpu: Fix build when CONFIG_DEBUG_FS is not set
  Revert "drm/radeon: dont switch vt on suspend"
  drm/amdgpu: disable enable_nb_ps_policy temporarily
  drm/amdgpu: correct define SMU_EnabledFeatureScoreboard_SclkDpmOn
  drm/amdgpu: allocate ip_block_enabled memory in common code
  drm/amdgpu: remove unnecessary check before kfree
  drm/amdgpu: use kzalloc for allocating one thing
  drm/radeon: fix adding all VAs to the freed list on remove v2
  drm/amdgpu: add chunk id validity check
  drm/amdgpu: fix crash on invalid CS IOCTL
  drm/amdgpu: reset wptr at cp compute resume (v2)
  drm/amdgpu: check VCE feedback and bitstream index
  drm/amdgpu: make VCE handle check more strict
  drm/amdgpu: check VCE relocation buffer range
  drm/amdgpu: silence invalid error message
  drm/amdgpu: fix wrong type
  ...
parents c021bf1e 479e9a95
...@@ -425,6 +425,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, ...@@ -425,6 +425,8 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
unsigned irq_type); unsigned irq_type);
int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
struct amdgpu_fence **fence); struct amdgpu_fence **fence);
int amdgpu_fence_recreate(struct amdgpu_ring *ring, void *owner,
uint64_t seq, struct amdgpu_fence **fence);
void amdgpu_fence_process(struct amdgpu_ring *ring); void amdgpu_fence_process(struct amdgpu_ring *ring);
int amdgpu_fence_wait_next(struct amdgpu_ring *ring); int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
...@@ -435,9 +437,6 @@ int amdgpu_fence_wait(struct amdgpu_fence *fence, bool interruptible); ...@@ -435,9 +437,6 @@ int amdgpu_fence_wait(struct amdgpu_fence *fence, bool interruptible);
int amdgpu_fence_wait_any(struct amdgpu_device *adev, int amdgpu_fence_wait_any(struct amdgpu_device *adev,
struct amdgpu_fence **fences, struct amdgpu_fence **fences,
bool intr); bool intr);
long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev,
u64 *target_seq, bool intr,
long timeout);
struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence); struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence);
void amdgpu_fence_unref(struct amdgpu_fence **fence); void amdgpu_fence_unref(struct amdgpu_fence **fence);
...@@ -1622,6 +1621,7 @@ struct amdgpu_vce { ...@@ -1622,6 +1621,7 @@ struct amdgpu_vce {
unsigned fb_version; unsigned fb_version;
atomic_t handles[AMDGPU_MAX_VCE_HANDLES]; atomic_t handles[AMDGPU_MAX_VCE_HANDLES];
struct drm_file *filp[AMDGPU_MAX_VCE_HANDLES]; struct drm_file *filp[AMDGPU_MAX_VCE_HANDLES];
uint32_t img_size[AMDGPU_MAX_VCE_HANDLES];
struct delayed_work idle_work; struct delayed_work idle_work;
const struct firmware *fw; /* VCE firmware */ const struct firmware *fw; /* VCE firmware */
struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS]; struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS];
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <drm/drmP.h> #include <drm/drmP.h>
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_trace.h"
static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv, static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv,
struct amdgpu_bo_list **result, struct amdgpu_bo_list **result,
...@@ -124,6 +125,8 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, ...@@ -124,6 +125,8 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
gws_obj = entry->robj; gws_obj = entry->robj;
if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_OA) if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_OA)
oa_obj = entry->robj; oa_obj = entry->robj;
trace_amdgpu_bo_list_set(list, entry->robj);
} }
for (i = 0; i < list->num_entries; ++i) for (i = 0; i < list->num_entries; ++i)
......
...@@ -181,8 +181,6 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) ...@@ -181,8 +181,6 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
} }
p->chunks[i].chunk_id = user_chunk.chunk_id; p->chunks[i].chunk_id = user_chunk.chunk_id;
p->chunks[i].length_dw = user_chunk.length_dw; p->chunks[i].length_dw = user_chunk.length_dw;
if (p->chunks[i].chunk_id == AMDGPU_CHUNK_ID_IB)
p->num_ibs++;
size = p->chunks[i].length_dw; size = p->chunks[i].length_dw;
cdata = (void __user *)(unsigned long)user_chunk.chunk_data; cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
...@@ -199,7 +197,12 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) ...@@ -199,7 +197,12 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
goto out; goto out;
} }
if (p->chunks[i].chunk_id == AMDGPU_CHUNK_ID_FENCE) { switch (p->chunks[i].chunk_id) {
case AMDGPU_CHUNK_ID_IB:
p->num_ibs++;
break;
case AMDGPU_CHUNK_ID_FENCE:
size = sizeof(struct drm_amdgpu_cs_chunk_fence); size = sizeof(struct drm_amdgpu_cs_chunk_fence);
if (p->chunks[i].length_dw * sizeof(uint32_t) >= size) { if (p->chunks[i].length_dw * sizeof(uint32_t) >= size) {
uint32_t handle; uint32_t handle;
...@@ -221,6 +224,14 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) ...@@ -221,6 +224,14 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
r = -EINVAL; r = -EINVAL;
goto out; goto out;
} }
break;
case AMDGPU_CHUNK_ID_DEPENDENCIES:
break;
default:
r = -EINVAL;
goto out;
} }
} }
...@@ -445,6 +456,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo ...@@ -445,6 +456,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
for (i = 0; i < parser->nchunks; i++) for (i = 0; i < parser->nchunks; i++)
drm_free_large(parser->chunks[i].kdata); drm_free_large(parser->chunks[i].kdata);
kfree(parser->chunks); kfree(parser->chunks);
if (parser->ibs)
for (i = 0; i < parser->num_ibs; i++) for (i = 0; i < parser->num_ibs; i++)
amdgpu_ib_free(parser->adev, &parser->ibs[i]); amdgpu_ib_free(parser->adev, &parser->ibs[i]);
kfree(parser->ibs); kfree(parser->ibs);
...@@ -654,6 +666,55 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, ...@@ -654,6 +666,55 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
return 0; return 0;
} }
static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
struct amdgpu_cs_parser *p)
{
struct amdgpu_ib *ib;
int i, j, r;
if (!p->num_ibs)
return 0;
/* Add dependencies to first IB */
ib = &p->ibs[0];
for (i = 0; i < p->nchunks; ++i) {
struct drm_amdgpu_cs_chunk_dep *deps;
struct amdgpu_cs_chunk *chunk;
unsigned num_deps;
chunk = &p->chunks[i];
if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES)
continue;
deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_dep);
for (j = 0; j < num_deps; ++j) {
struct amdgpu_fence *fence;
struct amdgpu_ring *ring;
r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
deps[j].ip_instance,
deps[j].ring, &ring);
if (r)
return r;
r = amdgpu_fence_recreate(ring, p->filp,
deps[j].handle,
&fence);
if (r)
return r;
amdgpu_sync_fence(&ib->sync, fence);
amdgpu_fence_unref(&fence);
}
}
return 0;
}
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{ {
struct amdgpu_device *adev = dev->dev_private; struct amdgpu_device *adev = dev->dev_private;
...@@ -688,11 +749,16 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ...@@ -688,11 +749,16 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
else else
DRM_ERROR("Failed to process the buffer list %d!\n", r); DRM_ERROR("Failed to process the buffer list %d!\n", r);
} }
} else { }
if (!r) {
reserved_buffers = true; reserved_buffers = true;
r = amdgpu_cs_ib_fill(adev, &parser); r = amdgpu_cs_ib_fill(adev, &parser);
} }
if (!r)
r = amdgpu_cs_dependencies(adev, &parser);
if (r) { if (r) {
amdgpu_cs_parser_fini(&parser, r, reserved_buffers); amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
up_read(&adev->exclusive_lock); up_read(&adev->exclusive_lock);
...@@ -730,9 +796,9 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, ...@@ -730,9 +796,9 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
{ {
union drm_amdgpu_wait_cs *wait = data; union drm_amdgpu_wait_cs *wait = data;
struct amdgpu_device *adev = dev->dev_private; struct amdgpu_device *adev = dev->dev_private;
uint64_t seq[AMDGPU_MAX_RINGS] = {0};
struct amdgpu_ring *ring = NULL;
unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
struct amdgpu_fence *fence = NULL;
struct amdgpu_ring *ring = NULL;
struct amdgpu_ctx *ctx; struct amdgpu_ctx *ctx;
long r; long r;
...@@ -745,9 +811,12 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, ...@@ -745,9 +811,12 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
if (r) if (r)
return r; return r;
seq[ring->idx] = wait->in.handle; r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence);
if (r)
return r;
r = amdgpu_fence_wait_seq_timeout(adev, seq, true, timeout); r = fence_wait_timeout(&fence->base, true, timeout);
amdgpu_fence_unref(&fence);
amdgpu_ctx_put(ctx); amdgpu_ctx_put(ctx);
if (r < 0) if (r < 0)
return r; return r;
......
...@@ -1191,7 +1191,9 @@ static int amdgpu_early_init(struct amdgpu_device *adev) ...@@ -1191,7 +1191,9 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
return -EINVAL; return -EINVAL;
} }
adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL);
if (adev->ip_block_enabled == NULL)
return -ENOMEM;
if (adev->ip_blocks == NULL) { if (adev->ip_blocks == NULL) {
DRM_ERROR("No IP blocks found!\n"); DRM_ERROR("No IP blocks found!\n");
...@@ -1575,7 +1577,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev) ...@@ -1575,7 +1577,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
amdgpu_fence_driver_fini(adev); amdgpu_fence_driver_fini(adev);
amdgpu_fbdev_fini(adev); amdgpu_fbdev_fini(adev);
r = amdgpu_fini(adev); r = amdgpu_fini(adev);
if (adev->ip_block_enabled)
kfree(adev->ip_block_enabled); kfree(adev->ip_block_enabled);
adev->ip_block_enabled = NULL; adev->ip_block_enabled = NULL;
adev->accel_working = false; adev->accel_working = false;
...@@ -2000,4 +2001,10 @@ int amdgpu_debugfs_init(struct drm_minor *minor) ...@@ -2000,4 +2001,10 @@ int amdgpu_debugfs_init(struct drm_minor *minor)
void amdgpu_debugfs_cleanup(struct drm_minor *minor) void amdgpu_debugfs_cleanup(struct drm_minor *minor)
{ {
} }
#else
static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
{
return 0;
}
static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { }
#endif #endif
...@@ -135,6 +135,38 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, ...@@ -135,6 +135,38 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
return 0; return 0;
} }
/**
* amdgpu_fence_recreate - recreate a fence from an user fence
*
* @ring: ring the fence is associated with
* @owner: creator of the fence
* @seq: user fence sequence number
* @fence: resulting amdgpu fence object
*
* Recreates a fence command from the user fence sequence number (all asics).
* Returns 0 on success, -ENOMEM on failure.
*/
int amdgpu_fence_recreate(struct amdgpu_ring *ring, void *owner,
uint64_t seq, struct amdgpu_fence **fence)
{
struct amdgpu_device *adev = ring->adev;
if (seq > ring->fence_drv.sync_seq[ring->idx])
return -EINVAL;
*fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
if ((*fence) == NULL)
return -ENOMEM;
(*fence)->seq = seq;
(*fence)->ring = ring;
(*fence)->owner = owner;
fence_init(&(*fence)->base, &amdgpu_fence_ops,
&adev->fence_queue.lock, adev->fence_context + ring->idx,
(*fence)->seq);
return 0;
}
/** /**
* amdgpu_fence_check_signaled - callback from fence_queue * amdgpu_fence_check_signaled - callback from fence_queue
* *
...@@ -517,12 +549,14 @@ static bool amdgpu_fence_any_seq_signaled(struct amdgpu_device *adev, u64 *seq) ...@@ -517,12 +549,14 @@ static bool amdgpu_fence_any_seq_signaled(struct amdgpu_device *adev, u64 *seq)
* the wait timeout, or an error for all other cases. * the wait timeout, or an error for all other cases.
* -EDEADLK is returned when a GPU lockup has been detected. * -EDEADLK is returned when a GPU lockup has been detected.
*/ */
long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev, u64 *target_seq, static long amdgpu_fence_wait_seq_timeout(struct amdgpu_device *adev,
bool intr, long timeout) u64 *target_seq, bool intr,
long timeout)
{ {
uint64_t last_seq[AMDGPU_MAX_RINGS]; uint64_t last_seq[AMDGPU_MAX_RINGS];
bool signaled; bool signaled;
int i, r; int i;
long r;
if (timeout == 0) { if (timeout == 0) {
return amdgpu_fence_any_seq_signaled(adev, target_seq); return amdgpu_fence_any_seq_signaled(adev, target_seq);
...@@ -1023,7 +1057,7 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) ...@@ -1023,7 +1057,7 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
amdgpu_fence_process(ring); amdgpu_fence_process(ring);
seq_printf(m, "--- ring %d ---\n", i); seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
seq_printf(m, "Last signaled fence 0x%016llx\n", seq_printf(m, "Last signaled fence 0x%016llx\n",
(unsigned long long)atomic64_read(&ring->fence_drv.last_seq)); (unsigned long long)atomic64_read(&ring->fence_drv.last_seq));
seq_printf(m, "Last emitted 0x%016llx\n", seq_printf(m, "Last emitted 0x%016llx\n",
...@@ -1031,7 +1065,8 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) ...@@ -1031,7 +1065,8 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
for (j = 0; j < AMDGPU_MAX_RINGS; ++j) { for (j = 0; j < AMDGPU_MAX_RINGS; ++j) {
struct amdgpu_ring *other = adev->rings[j]; struct amdgpu_ring *other = adev->rings[j];
if (i != j && other && other->fence_drv.initialized) if (i != j && other && other->fence_drv.initialized &&
ring->fence_drv.sync_seq[j])
seq_printf(m, "Last sync to ring %d 0x%016llx\n", seq_printf(m, "Last sync to ring %d 0x%016llx\n",
j, ring->fence_drv.sync_seq[j]); j, ring->fence_drv.sync_seq[j]);
} }
......
...@@ -496,7 +496,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, ...@@ -496,7 +496,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
error_free: error_free:
drm_free_large(vm_bos); drm_free_large(vm_bos);
if (r) if (r && r != -ERESTARTSYS)
DRM_ERROR("Couldn't update BO_VA (%d)\n", r); DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
} }
...@@ -525,8 +525,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, ...@@ -525,8 +525,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -EINVAL; return -EINVAL;
} }
invalid_flags = ~(AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | invalid_flags = ~(AMDGPU_VM_DELAY_UPDATE | AMDGPU_VM_PAGE_READABLE |
AMDGPU_VM_PAGE_EXECUTABLE); AMDGPU_VM_PAGE_WRITEABLE | AMDGPU_VM_PAGE_EXECUTABLE);
if ((args->flags & invalid_flags)) { if ((args->flags & invalid_flags)) {
dev_err(&dev->pdev->dev, "invalid flags 0x%08X vs 0x%08X\n", dev_err(&dev->pdev->dev, "invalid flags 0x%08X vs 0x%08X\n",
args->flags, invalid_flags); args->flags, invalid_flags);
...@@ -579,7 +579,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, ...@@ -579,7 +579,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
break; break;
} }
if (!r) if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE))
amdgpu_gem_va_update_vm(adev, bo_va); amdgpu_gem_va_update_vm(adev, bo_va);
drm_gem_object_unreference_unlocked(gobj); drm_gem_object_unreference_unlocked(gobj);
......
...@@ -30,19 +30,21 @@ TRACE_EVENT(amdgpu_cs, ...@@ -30,19 +30,21 @@ TRACE_EVENT(amdgpu_cs,
TP_PROTO(struct amdgpu_cs_parser *p, int i), TP_PROTO(struct amdgpu_cs_parser *p, int i),
TP_ARGS(p, i), TP_ARGS(p, i),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(struct amdgpu_bo_list *, bo_list)
__field(u32, ring) __field(u32, ring)
__field(u32, dw) __field(u32, dw)
__field(u32, fences) __field(u32, fences)
), ),
TP_fast_assign( TP_fast_assign(
__entry->bo_list = p->bo_list;
__entry->ring = p->ibs[i].ring->idx; __entry->ring = p->ibs[i].ring->idx;
__entry->dw = p->ibs[i].length_dw; __entry->dw = p->ibs[i].length_dw;
__entry->fences = amdgpu_fence_count_emitted( __entry->fences = amdgpu_fence_count_emitted(
p->ibs[i].ring); p->ibs[i].ring);
), ),
TP_printk("ring=%u, dw=%u, fences=%u", TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
__entry->ring, __entry->dw, __entry->bo_list, __entry->ring, __entry->dw,
__entry->fences) __entry->fences)
); );
...@@ -61,6 +63,54 @@ TRACE_EVENT(amdgpu_vm_grab_id, ...@@ -61,6 +63,54 @@ TRACE_EVENT(amdgpu_vm_grab_id,
TP_printk("vmid=%u, ring=%u", __entry->vmid, __entry->ring) TP_printk("vmid=%u, ring=%u", __entry->vmid, __entry->ring)
); );
TRACE_EVENT(amdgpu_vm_bo_map,
TP_PROTO(struct amdgpu_bo_va *bo_va,
struct amdgpu_bo_va_mapping *mapping),
TP_ARGS(bo_va, mapping),
TP_STRUCT__entry(
__field(struct amdgpu_bo *, bo)
__field(long, start)
__field(long, last)
__field(u64, offset)
__field(u32, flags)
),
TP_fast_assign(
__entry->bo = bo_va->bo;
__entry->start = mapping->it.start;
__entry->last = mapping->it.last;
__entry->offset = mapping->offset;
__entry->flags = mapping->flags;
),
TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x",
__entry->bo, __entry->start, __entry->last,
__entry->offset, __entry->flags)
);
TRACE_EVENT(amdgpu_vm_bo_unmap,
TP_PROTO(struct amdgpu_bo_va *bo_va,
struct amdgpu_bo_va_mapping *mapping),
TP_ARGS(bo_va, mapping),
TP_STRUCT__entry(
__field(struct amdgpu_bo *, bo)
__field(long, start)
__field(long, last)
__field(u64, offset)
__field(u32, flags)
),
TP_fast_assign(
__entry->bo = bo_va->bo;
__entry->start = mapping->it.start;
__entry->last = mapping->it.last;
__entry->offset = mapping->offset;
__entry->flags = mapping->flags;
),
TP_printk("bo=%p, start=%lx, last=%lx, offset=%010llx, flags=%08x",
__entry->bo, __entry->start, __entry->last,
__entry->offset, __entry->flags)
);
TRACE_EVENT(amdgpu_vm_bo_update, TRACE_EVENT(amdgpu_vm_bo_update,
TP_PROTO(struct amdgpu_bo_va_mapping *mapping), TP_PROTO(struct amdgpu_bo_va_mapping *mapping),
TP_ARGS(mapping), TP_ARGS(mapping),
...@@ -121,6 +171,21 @@ TRACE_EVENT(amdgpu_vm_flush, ...@@ -121,6 +171,21 @@ TRACE_EVENT(amdgpu_vm_flush,
__entry->pd_addr, __entry->ring, __entry->id) __entry->pd_addr, __entry->ring, __entry->id)
); );
TRACE_EVENT(amdgpu_bo_list_set,
TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo),
TP_ARGS(list, bo),
TP_STRUCT__entry(
__field(struct amdgpu_bo_list *, list)
__field(struct amdgpu_bo *, bo)
),
TP_fast_assign(
__entry->list = list;
__entry->bo = bo;
),
TP_printk("list=%p, bo=%p", __entry->list, __entry->bo)
);
DECLARE_EVENT_CLASS(amdgpu_fence_request, DECLARE_EVENT_CLASS(amdgpu_fence_request,
TP_PROTO(struct drm_device *dev, int ring, u32 seqno), TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
......
...@@ -674,7 +674,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) ...@@ -674,7 +674,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
return 0; return 0;
if (gtt && gtt->userptr) { if (gtt && gtt->userptr) {
ttm->sg = kcalloc(1, sizeof(struct sg_table), GFP_KERNEL); ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
if (!ttm->sg) if (!ttm->sg)
return -ENOMEM; return -ENOMEM;
......
...@@ -464,28 +464,42 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, ...@@ -464,28 +464,42 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
* @p: parser context * @p: parser context
* @lo: address of lower dword * @lo: address of lower dword
* @hi: address of higher dword * @hi: address of higher dword
* @size: minimum size
* *
* Patch relocation inside command stream with real buffer address * Patch relocation inside command stream with real buffer address
*/ */
int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int hi) static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
int lo, int hi, unsigned size, uint32_t index)
{ {
struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_ib *ib = &p->ibs[ib_idx]; struct amdgpu_ib *ib = &p->ibs[ib_idx];
struct amdgpu_bo *bo; struct amdgpu_bo *bo;
uint64_t addr; uint64_t addr;
if (index == 0xffffffff)
index = 0;
addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) | addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) |
((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32;
addr += ((uint64_t)size) * ((uint64_t)index);
mapping = amdgpu_cs_find_mapping(p, addr, &bo); mapping = amdgpu_cs_find_mapping(p, addr, &bo);
if (mapping == NULL) { if (mapping == NULL) {
DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d\n", DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n",
addr, lo, hi, size, index);
return -EINVAL;
}
if ((addr + (uint64_t)size) >
((uint64_t)mapping->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) {
DRM_ERROR("BO to small for addr 0x%010Lx %d %d\n",
addr, lo, hi); addr, lo, hi);
return -EINVAL; return -EINVAL;
} }
addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE; addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE;
addr += amdgpu_bo_gpu_offset(bo); addr += amdgpu_bo_gpu_offset(bo);
addr -= ((uint64_t)size) * ((uint64_t)index);
ib->ptr[lo] = addr & 0xFFFFFFFF; ib->ptr[lo] = addr & 0xFFFFFFFF;
ib->ptr[hi] = addr >> 32; ib->ptr[hi] = addr >> 32;
...@@ -493,6 +507,48 @@ int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int ...@@ -493,6 +507,48 @@ int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int
return 0; return 0;
} }
/**
* amdgpu_vce_validate_handle - validate stream handle
*
* @p: parser context
* @handle: handle to validate
* @allocated: allocated a new handle?
*
* Validates the handle and return the found session index or -EINVAL
* we we don't have another free session index.
*/
static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
uint32_t handle, bool *allocated)
{
unsigned i;
*allocated = false;
/* validate the handle */
for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
if (atomic_read(&p->adev->vce.handles[i]) == handle) {
if (p->adev->vce.filp[i] != p->filp) {
DRM_ERROR("VCE handle collision detected!\n");
return -EINVAL;
}
return i;
}
}
/* handle not found try to alloc a new one */
for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) {
p->adev->vce.filp[i] = p->filp;
p->adev->vce.img_size[i] = 0;
*allocated = true;
return i;
}
}
DRM_ERROR("No more free VCE handles!\n");
return -EINVAL;
}
/** /**
* amdgpu_vce_cs_parse - parse and validate the command stream * amdgpu_vce_cs_parse - parse and validate the command stream
* *
...@@ -501,10 +557,15 @@ int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int ...@@ -501,10 +557,15 @@ int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int
*/ */
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
{ {
uint32_t handle = 0;
bool destroy = false;
int i, r, idx = 0;
struct amdgpu_ib *ib = &p->ibs[ib_idx]; struct amdgpu_ib *ib = &p->ibs[ib_idx];
unsigned fb_idx = 0, bs_idx = 0;
int session_idx = -1;
bool destroyed = false;
bool created = false;
bool allocated = false;
uint32_t tmp, handle = 0;
uint32_t *size = &tmp;
int i, r = 0, idx = 0;
amdgpu_vce_note_usage(p->adev); amdgpu_vce_note_usage(p->adev);
...@@ -514,16 +575,44 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) ...@@ -514,16 +575,44 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
if ((len < 8) || (len & 3)) { if ((len < 8) || (len & 3)) {
DRM_ERROR("invalid VCE command length (%d)!\n", len); DRM_ERROR("invalid VCE command length (%d)!\n", len);
return -EINVAL; r = -EINVAL;
goto out;
}
if (destroyed) {
DRM_ERROR("No other command allowed after destroy!\n");
r = -EINVAL;
goto out;
} }
switch (cmd) { switch (cmd) {
case 0x00000001: // session case 0x00000001: // session
handle = amdgpu_get_ib_value(p, ib_idx, idx + 2); handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
session_idx = amdgpu_vce_validate_handle(p, handle,
&allocated);
if (session_idx < 0)
return session_idx;
size = &p->adev->vce.img_size[session_idx];
break; break;
case 0x00000002: // task info case 0x00000002: // task info
fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
break;
case 0x01000001: // create case 0x01000001: // create
created = true;
if (!allocated) {
DRM_ERROR("Handle already in use!\n");
r = -EINVAL;
goto out;
}
*size = amdgpu_get_ib_value(p, ib_idx, idx + 8) *
amdgpu_get_ib_value(p, ib_idx, idx + 10) *
8 * 3 / 2;
break;
case 0x04000001: // config extension case 0x04000001: // config extension
case 0x04000002: // pic control case 0x04000002: // pic control
case 0x04000005: // rate control case 0x04000005: // rate control
...@@ -534,60 +623,74 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) ...@@ -534,60 +623,74 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
break; break;
case 0x03000001: // encode case 0x03000001: // encode
r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9); r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9,
*size, 0);
if (r) if (r)
return r; goto out;
r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11); r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11,
*size / 3, 0);
if (r) if (r)
return r; goto out;
break; break;
case 0x02000001: // destroy case 0x02000001: // destroy
destroy = true; destroyed = true;
break; break;
case 0x05000001: // context buffer case 0x05000001: // context buffer
r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
*size * 2, 0);
if (r)
goto out;
break;
case 0x05000004: // video bitstream buffer case 0x05000004: // video bitstream buffer
tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
tmp, bs_idx);
if (r)
goto out;
break;
case 0x05000005: // feedback buffer case 0x05000005: // feedback buffer
r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2); r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
4096, fb_idx);
if (r) if (r)
return r; goto out;
break; break;
default: default:
DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
return -EINVAL; r = -EINVAL;
goto out;
} }
idx += len / 4; if (session_idx == -1) {
DRM_ERROR("no session command at start of IB\n");
r = -EINVAL;
goto out;
} }
if (destroy) { idx += len / 4;
/* IB contains a destroy msg, free the handle */
for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
atomic_cmpxchg(&p->adev->vce.handles[i], handle, 0);
return 0;
} }
/* create or encode, validate the handle */ if (allocated && !created) {
for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { DRM_ERROR("New session without create command!\n");
if (atomic_read(&p->adev->vce.handles[i]) == handle) r = -ENOENT;
return 0;
} }
/* handle not found try to alloc a new one */ out:
for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { if ((!r && destroyed) || (r && allocated)) {
if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) { /*
p->adev->vce.filp[i] = p->filp; * IB contains a destroy msg or we have allocated an
return 0; * handle and got an error, anyway free the handle
} */
for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
atomic_cmpxchg(&p->adev->vce.handles[i], handle, 0);
} }
DRM_ERROR("No more free VCE handles!\n"); return r;
return -EINVAL;
} }
/** /**
......
...@@ -33,7 +33,6 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, ...@@ -33,7 +33,6 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
struct amdgpu_fence **fence); struct amdgpu_fence **fence);
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int hi);
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring, bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring,
struct amdgpu_semaphore *semaphore, struct amdgpu_semaphore *semaphore,
......
...@@ -1001,6 +1001,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, ...@@ -1001,6 +1001,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
list_add(&mapping->list, &bo_va->mappings); list_add(&mapping->list, &bo_va->mappings);
interval_tree_insert(&mapping->it, &vm->va); interval_tree_insert(&mapping->it, &vm->va);
trace_amdgpu_vm_bo_map(bo_va, mapping);
bo_va->addr = 0; bo_va->addr = 0;
...@@ -1058,6 +1059,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, ...@@ -1058,6 +1059,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
mutex_lock(&vm->mutex); mutex_lock(&vm->mutex);
list_del(&mapping->list); list_del(&mapping->list);
interval_tree_remove(&mapping->it, &vm->va); interval_tree_remove(&mapping->it, &vm->va);
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
kfree(mapping); kfree(mapping);
error_unlock: error_unlock:
...@@ -1099,6 +1101,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, ...@@ -1099,6 +1101,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
mutex_lock(&vm->mutex); mutex_lock(&vm->mutex);
list_del(&mapping->list); list_del(&mapping->list);
interval_tree_remove(&mapping->it, &vm->va); interval_tree_remove(&mapping->it, &vm->va);
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
if (bo_va->addr) { if (bo_va->addr) {
/* clear the old address */ /* clear the old address */
...@@ -1139,6 +1142,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, ...@@ -1139,6 +1142,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
list_for_each_entry_safe(mapping, next, &bo_va->mappings, list) { list_for_each_entry_safe(mapping, next, &bo_va->mappings, list) {
list_del(&mapping->list); list_del(&mapping->list);
interval_tree_remove(&mapping->it, &vm->va); interval_tree_remove(&mapping->it, &vm->va);
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
if (bo_va->addr) if (bo_va->addr)
list_add(&mapping->list, &vm->freed); list_add(&mapping->list, &vm->freed);
else else
......
...@@ -2256,10 +2256,6 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) ...@@ -2256,10 +2256,6 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
return -EINVAL; return -EINVAL;
} }
adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL);
if (adev->ip_block_enabled == NULL)
return -ENOMEM;
return 0; return 0;
} }
......
...@@ -552,4 +552,10 @@ ...@@ -552,4 +552,10 @@
#define VCE_CMD_IB_AUTO 0x00000005 #define VCE_CMD_IB_AUTO 0x00000005
#define VCE_CMD_SEMAPHORE 0x00000006 #define VCE_CMD_SEMAPHORE 0x00000006
/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
enum {
MTYPE_CACHED = 0,
MTYPE_NONCACHED = 3
};
#endif #endif
...@@ -425,7 +425,7 @@ static int cz_dpm_init(struct amdgpu_device *adev) ...@@ -425,7 +425,7 @@ static int cz_dpm_init(struct amdgpu_device *adev)
pi->mgcg_cgtt_local1 = 0x0; pi->mgcg_cgtt_local1 = 0x0;
pi->clock_slow_down_step = 25000; pi->clock_slow_down_step = 25000;
pi->skip_clock_slow_down = 1; pi->skip_clock_slow_down = 1;
pi->enable_nb_ps_policy = 1; pi->enable_nb_ps_policy = 0;
pi->caps_power_containment = true; pi->caps_power_containment = true;
pi->caps_cac = true; pi->caps_cac = true;
pi->didt_enabled = false; pi->didt_enabled = false;
......
...@@ -46,7 +46,7 @@ ...@@ -46,7 +46,7 @@
/* Do not change the following, it is also defined in SMU8.h */ /* Do not change the following, it is also defined in SMU8.h */
#define SMU_EnabledFeatureScoreboard_AcpDpmOn 0x00000001 #define SMU_EnabledFeatureScoreboard_AcpDpmOn 0x00000001
#define SMU_EnabledFeatureScoreboard_SclkDpmOn 0x00100000 #define SMU_EnabledFeatureScoreboard_SclkDpmOn 0x00200000
#define SMU_EnabledFeatureScoreboard_UvdDpmOn 0x00800000 #define SMU_EnabledFeatureScoreboard_UvdDpmOn 0x00800000
#define SMU_EnabledFeatureScoreboard_VceDpmOn 0x01000000 #define SMU_EnabledFeatureScoreboard_VceDpmOn 0x01000000
......
...@@ -3379,7 +3379,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev, ...@@ -3379,7 +3379,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev,
uint32_t disp_int, mask, int_control, tmp; uint32_t disp_int, mask, int_control, tmp;
unsigned hpd; unsigned hpd;
if (entry->src_data > 6) { if (entry->src_data >= adev->mode_info.num_hpd) {
DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data);
return 0; return 0;
} }
......
...@@ -2009,6 +2009,46 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev, ...@@ -2009,6 +2009,46 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev,
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
} }
/**
* gmc_v7_0_init_compute_vmid - gart enable
*
* @rdev: amdgpu_device pointer
*
* Initialize compute vmid sh_mem registers
*
*/
#define DEFAULT_SH_MEM_BASES (0x6000)
#define FIRST_COMPUTE_VMID (8)
#define LAST_COMPUTE_VMID (16)
static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev)
{
int i;
uint32_t sh_mem_config;
uint32_t sh_mem_bases;
/*
* Configure apertures:
* LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
* Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
* GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
*/
sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT;
mutex_lock(&adev->srbm_mutex);
for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
cik_srbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32(mmSH_MEM_CONFIG, sh_mem_config);
WREG32(mmSH_MEM_APE1_BASE, 1);
WREG32(mmSH_MEM_APE1_LIMIT, 0);
WREG32(mmSH_MEM_BASES, sh_mem_bases);
}
cik_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
/** /**
* gfx_v7_0_gpu_init - setup the 3D engine * gfx_v7_0_gpu_init - setup the 3D engine
* *
...@@ -2230,6 +2270,8 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) ...@@ -2230,6 +2270,8 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
cik_srbm_select(adev, 0, 0, 0, 0); cik_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex); mutex_unlock(&adev->srbm_mutex);
gmc_v7_0_init_compute_vmid(adev);
WREG32(mmSX_DEBUG_1, 0x20); WREG32(mmSX_DEBUG_1, 0x20);
WREG32(mmTA_CNTL_AUX, 0x00010000); WREG32(mmTA_CNTL_AUX, 0x00010000);
......
...@@ -1894,6 +1894,51 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev, ...@@ -1894,6 +1894,51 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
mutex_unlock(&adev->grbm_idx_mutex); mutex_unlock(&adev->grbm_idx_mutex);
} }
/**
* gmc_v8_0_init_compute_vmid - gart enable
*
* @rdev: amdgpu_device pointer
*
* Initialize compute vmid sh_mem registers
*
*/
#define DEFAULT_SH_MEM_BASES (0x6000)
#define FIRST_COMPUTE_VMID (8)
#define LAST_COMPUTE_VMID (16)
static void gmc_v8_0_init_compute_vmid(struct amdgpu_device *adev)
{
int i;
uint32_t sh_mem_config;
uint32_t sh_mem_bases;
/*
* Configure apertures:
* LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
* Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
* GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
*/
sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
SH_MEM_CONFIG__PRIVATE_ATC_MASK;
mutex_lock(&adev->srbm_mutex);
for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
vi_srbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32(mmSH_MEM_CONFIG, sh_mem_config);
WREG32(mmSH_MEM_APE1_BASE, 1);
WREG32(mmSH_MEM_APE1_LIMIT, 0);
WREG32(mmSH_MEM_BASES, sh_mem_bases);
}
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
{ {
u32 gb_addr_config; u32 gb_addr_config;
...@@ -2113,6 +2158,8 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) ...@@ -2113,6 +2158,8 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
vi_srbm_select(adev, 0, 0, 0, 0); vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex); mutex_unlock(&adev->srbm_mutex);
gmc_v8_0_init_compute_vmid(adev);
mutex_lock(&adev->grbm_idx_mutex); mutex_lock(&adev->grbm_idx_mutex);
/* /*
* making sure that the following register writes will be broadcasted * making sure that the following register writes will be broadcasted
...@@ -3081,7 +3128,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) ...@@ -3081,7 +3128,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
AMDGPU_DOORBELL_KIQ << 2); AMDGPU_DOORBELL_KIQ << 2);
WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
AMDGPU_DOORBELL_MEC_RING7 << 2); 0x7FFFF << 2);
} }
tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
...@@ -3097,6 +3144,12 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) ...@@ -3097,6 +3144,12 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
mqd->cp_hqd_pq_doorbell_control); mqd->cp_hqd_pq_doorbell_control);
/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
ring->wptr = 0;
mqd->cp_hqd_pq_wptr = ring->wptr;
WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
/* set the vmid for the queue */ /* set the vmid for the queue */
mqd->cp_hqd_vmid = 0; mqd->cp_hqd_vmid = 0;
WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
......
...@@ -438,6 +438,31 @@ static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev) ...@@ -438,6 +438,31 @@ static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev)
/* XXX todo */ /* XXX todo */
} }
/**
* sdma_v3_0_ctx_switch_enable - stop the async dma engines context switch
*
* @adev: amdgpu_device pointer
* @enable: enable/disable the DMA MEs context switch.
*
* Halt or unhalt the async dma engines context switch (VI).
*/
static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
{
u32 f32_cntl;
int i;
for (i = 0; i < SDMA_MAX_INSTANCE; i++) {
f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
if (enable)
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
AUTO_CTXSW_ENABLE, 1);
else
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
AUTO_CTXSW_ENABLE, 0);
WREG32(mmSDMA0_CNTL + sdma_offsets[i], f32_cntl);
}
}
/** /**
* sdma_v3_0_enable - stop the async dma engines * sdma_v3_0_enable - stop the async dma engines
* *
...@@ -648,6 +673,8 @@ static int sdma_v3_0_start(struct amdgpu_device *adev) ...@@ -648,6 +673,8 @@ static int sdma_v3_0_start(struct amdgpu_device *adev)
/* unhalt the MEs */ /* unhalt the MEs */
sdma_v3_0_enable(adev, true); sdma_v3_0_enable(adev, true);
/* enable sdma ring preemption */
sdma_v3_0_ctx_switch_enable(adev, true);
/* start the gfx rings and rlc compute queues */ /* start the gfx rings and rlc compute queues */
r = sdma_v3_0_gfx_resume(adev); r = sdma_v3_0_gfx_resume(adev);
...@@ -1079,6 +1106,7 @@ static int sdma_v3_0_hw_fini(void *handle) ...@@ -1079,6 +1106,7 @@ static int sdma_v3_0_hw_fini(void *handle)
{ {
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
sdma_v3_0_ctx_switch_enable(adev, false);
sdma_v3_0_enable(adev, false); sdma_v3_0_enable(adev, false);
return 0; return 0;
......
...@@ -1189,10 +1189,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) ...@@ -1189,10 +1189,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
return -EINVAL; return -EINVAL;
} }
adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL);
if (adev->ip_block_enabled == NULL)
return -ENOMEM;
return 0; return 0;
} }
......
...@@ -4604,6 +4604,31 @@ void cik_compute_set_wptr(struct radeon_device *rdev, ...@@ -4604,6 +4604,31 @@ void cik_compute_set_wptr(struct radeon_device *rdev,
WDOORBELL32(ring->doorbell_index, ring->wptr); WDOORBELL32(ring->doorbell_index, ring->wptr);
} }
static void cik_compute_stop(struct radeon_device *rdev,
struct radeon_ring *ring)
{
u32 j, tmp;
cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
/* Disable wptr polling. */
tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
tmp &= ~WPTR_POLL_EN;
WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
/* Disable HQD. */
if (RREG32(CP_HQD_ACTIVE) & 1) {
WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
for (j = 0; j < rdev->usec_timeout; j++) {
if (!(RREG32(CP_HQD_ACTIVE) & 1))
break;
udelay(1);
}
WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
WREG32(CP_HQD_PQ_RPTR, 0);
WREG32(CP_HQD_PQ_WPTR, 0);
}
cik_srbm_select(rdev, 0, 0, 0, 0);
}
/** /**
* cik_cp_compute_enable - enable/disable the compute CP MEs * cik_cp_compute_enable - enable/disable the compute CP MEs
* *
...@@ -4617,6 +4642,15 @@ static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable) ...@@ -4617,6 +4642,15 @@ static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
if (enable) if (enable)
WREG32(CP_MEC_CNTL, 0); WREG32(CP_MEC_CNTL, 0);
else { else {
/*
* To make hibernation reliable we need to clear compute ring
* configuration before halting the compute ring.
*/
mutex_lock(&rdev->srbm_mutex);
cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
mutex_unlock(&rdev->srbm_mutex);
WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT)); WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
......
...@@ -268,6 +268,17 @@ static void cik_sdma_gfx_stop(struct radeon_device *rdev) ...@@ -268,6 +268,17 @@ static void cik_sdma_gfx_stop(struct radeon_device *rdev)
} }
rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
/* FIXME use something else than big hammer but after few days can not
* seem to find good combination so reset SDMA blocks as it seems we
* do not shut them down properly. This fix hibernation and does not
* affect suspend to ram.
*/
WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
(void)RREG32(SRBM_SOFT_RESET);
udelay(50);
WREG32(SRBM_SOFT_RESET, 0);
(void)RREG32(SRBM_SOFT_RESET);
} }
/** /**
......
...@@ -469,22 +469,22 @@ void radeon_audio_detect(struct drm_connector *connector, ...@@ -469,22 +469,22 @@ void radeon_audio_detect(struct drm_connector *connector,
dig = radeon_encoder->enc_priv; dig = radeon_encoder->enc_priv;
if (status == connector_status_connected) { if (status == connector_status_connected) {
struct radeon_connector *radeon_connector;
int sink_type;
if (!drm_detect_monitor_audio(radeon_connector_edid(connector))) { if (!drm_detect_monitor_audio(radeon_connector_edid(connector))) {
radeon_encoder->audio = NULL; radeon_encoder->audio = NULL;
return; return;
} }
radeon_connector = to_radeon_connector(connector); if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) {
sink_type = radeon_dp_getsinktype(radeon_connector); struct radeon_connector *radeon_connector = to_radeon_connector(connector);
if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort && if (radeon_dp_getsinktype(radeon_connector) ==
sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) CONNECTOR_OBJECT_ID_DISPLAYPORT)
radeon_encoder->audio = rdev->audio.dp_funcs; radeon_encoder->audio = rdev->audio.dp_funcs;
else else
radeon_encoder->audio = rdev->audio.hdmi_funcs; radeon_encoder->audio = rdev->audio.hdmi_funcs;
} else {
radeon_encoder->audio = rdev->audio.hdmi_funcs;
}
dig->afmt->pin = radeon_audio_get_pin(connector->encoder); dig->afmt->pin = radeon_audio_get_pin(connector->encoder);
radeon_audio_enable(rdev, dig->afmt->pin, 0xf); radeon_audio_enable(rdev, dig->afmt->pin, 0xf);
......
...@@ -257,7 +257,6 @@ static int radeonfb_create(struct drm_fb_helper *helper, ...@@ -257,7 +257,6 @@ static int radeonfb_create(struct drm_fb_helper *helper,
} }
info->par = rfbdev; info->par = rfbdev;
info->skip_vt_switch = true;
ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj); ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj);
if (ret) { if (ret) {
......
...@@ -719,7 +719,7 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm) ...@@ -719,7 +719,7 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm)
return 0; return 0;
if (gtt && gtt->userptr) { if (gtt && gtt->userptr) {
ttm->sg = kcalloc(1, sizeof(struct sg_table), GFP_KERNEL); ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
if (!ttm->sg) if (!ttm->sg)
return -ENOMEM; return -ENOMEM;
......
...@@ -1129,12 +1129,12 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev, ...@@ -1129,12 +1129,12 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev,
interval_tree_remove(&bo_va->it, &vm->va); interval_tree_remove(&bo_va->it, &vm->va);
spin_lock(&vm->status_lock); spin_lock(&vm->status_lock);
if (list_empty(&bo_va->vm_status)) { list_del(&bo_va->vm_status);
if (bo_va->it.start || bo_va->it.last) {
bo_va->bo = radeon_bo_ref(bo_va->bo); bo_va->bo = radeon_bo_ref(bo_va->bo);
list_add(&bo_va->vm_status, &vm->freed); list_add(&bo_va->vm_status, &vm->freed);
} else { } else {
radeon_fence_unref(&bo_va->last_pt_update); radeon_fence_unref(&bo_va->last_pt_update);
list_del(&bo_va->vm_status);
kfree(bo_va); kfree(bo_va);
} }
spin_unlock(&vm->status_lock); spin_unlock(&vm->status_lock);
......
...@@ -313,6 +313,9 @@ struct drm_amdgpu_gem_op { ...@@ -313,6 +313,9 @@ struct drm_amdgpu_gem_op {
#define AMDGPU_VA_OP_MAP 1 #define AMDGPU_VA_OP_MAP 1
#define AMDGPU_VA_OP_UNMAP 2 #define AMDGPU_VA_OP_UNMAP 2
/* Delay the page table update till the next CS */
#define AMDGPU_VM_DELAY_UPDATE (1 << 0)
/* Mapping flags */ /* Mapping flags */
/* readable mapping */ /* readable mapping */
#define AMDGPU_VM_PAGE_READABLE (1 << 1) #define AMDGPU_VM_PAGE_READABLE (1 << 1)
...@@ -348,6 +351,7 @@ struct drm_amdgpu_gem_va { ...@@ -348,6 +351,7 @@ struct drm_amdgpu_gem_va {
#define AMDGPU_CHUNK_ID_IB 0x01 #define AMDGPU_CHUNK_ID_IB 0x01
#define AMDGPU_CHUNK_ID_FENCE 0x02 #define AMDGPU_CHUNK_ID_FENCE 0x02
#define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03
struct drm_amdgpu_cs_chunk { struct drm_amdgpu_cs_chunk {
uint32_t chunk_id; uint32_t chunk_id;
...@@ -399,6 +403,14 @@ struct drm_amdgpu_cs_chunk_ib { ...@@ -399,6 +403,14 @@ struct drm_amdgpu_cs_chunk_ib {
uint32_t ring; uint32_t ring;
}; };
struct drm_amdgpu_cs_chunk_dep {
uint32_t ip_type;
uint32_t ip_instance;
uint32_t ring;
uint32_t ctx_id;
uint64_t handle;
};
struct drm_amdgpu_cs_chunk_fence { struct drm_amdgpu_cs_chunk_fence {
uint32_t handle; uint32_t handle;
uint32_t offset; uint32_t offset;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment