Commit 8ed59fd6 authored by Dave Airlie's avatar Dave Airlie

Merge branch 'drm-fixes-4.4' of git://people.freedesktop.org/~agd5f/linux into drm-fixes

Radeon and amdgpu fixes for 4.4.  A bit more the usual since I missed
last week.  Misc fixes all over the place.  The big changes are the
tiling configuration fixes for Fiji.

* 'drm-fixes-4.4' of git://people.freedesktop.org/~agd5f/linux: (35 commits)
  drm/amdgpu: reserve/unreserve objects out of map/unmap operations
  drm/amdgpu: move bo_reserve out of amdgpu_vm_clear_bo
  drm/amdgpu: add lock for interval tree in vm
  drm/amdgpu: keep the owner for VMIDs
  drm/amdgpu: move VM manager clean into the VM code again
  drm/amdgpu: cleanup VM coding style
  drm/amdgpu: remove unused VM manager field
  drm/amdgpu: cleanup scheduler command submission
  drm/amdgpu: fix typo in firmware name
  drm/amdgpu: remove the unnecessary parameter adev for amdgpu_sa_bo_new()
  drm/amdgpu: wait interruptible when semaphores are disabled v2
  drm/amdgpu: update pd while updating vm as well
  drm/amdgpu: fix handling order in scheduler CS
  drm/amdgpu: fix incorrect mutex usage v3
  drm/amdgpu: cleanup scheduler fence get/put dance
  drm/amdgpu: add command submission workflow tracepoint
  drm/amdgpu: update Fiji's tiling mode table
  drm/amdgpu: fix bug that can't enter thermal interrupt for bonaire.
  drm/amdgpu: fix seq_printf format string
  drm/radeon: fix quirk for MSI R7 370 Armor 2X
  ...
parents 34258a32 49b02b18
...@@ -389,7 +389,6 @@ struct amdgpu_clock { ...@@ -389,7 +389,6 @@ struct amdgpu_clock {
* Fences. * Fences.
*/ */
struct amdgpu_fence_driver { struct amdgpu_fence_driver {
struct amdgpu_ring *ring;
uint64_t gpu_addr; uint64_t gpu_addr;
volatile uint32_t *cpu_addr; volatile uint32_t *cpu_addr;
/* sync_seq is protected by ring emission lock */ /* sync_seq is protected by ring emission lock */
...@@ -398,7 +397,7 @@ struct amdgpu_fence_driver { ...@@ -398,7 +397,7 @@ struct amdgpu_fence_driver {
bool initialized; bool initialized;
struct amdgpu_irq_src *irq_src; struct amdgpu_irq_src *irq_src;
unsigned irq_type; unsigned irq_type;
struct delayed_work lockup_work; struct timer_list fallback_timer;
wait_queue_head_t fence_queue; wait_queue_head_t fence_queue;
}; };
...@@ -917,8 +916,8 @@ struct amdgpu_ring { ...@@ -917,8 +916,8 @@ struct amdgpu_ring {
#define AMDGPU_VM_FAULT_STOP_ALWAYS 2 #define AMDGPU_VM_FAULT_STOP_ALWAYS 2
struct amdgpu_vm_pt { struct amdgpu_vm_pt {
struct amdgpu_bo *bo; struct amdgpu_bo *bo;
uint64_t addr; uint64_t addr;
}; };
struct amdgpu_vm_id { struct amdgpu_vm_id {
...@@ -926,8 +925,6 @@ struct amdgpu_vm_id { ...@@ -926,8 +925,6 @@ struct amdgpu_vm_id {
uint64_t pd_gpu_addr; uint64_t pd_gpu_addr;
/* last flushed PD/PT update */ /* last flushed PD/PT update */
struct fence *flushed_updates; struct fence *flushed_updates;
/* last use of vmid */
struct fence *last_id_use;
}; };
struct amdgpu_vm { struct amdgpu_vm {
...@@ -957,24 +954,70 @@ struct amdgpu_vm { ...@@ -957,24 +954,70 @@ struct amdgpu_vm {
/* for id and flush management per ring */ /* for id and flush management per ring */
struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS]; struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS];
/* for interval tree */
spinlock_t it_lock;
}; };
struct amdgpu_vm_manager { struct amdgpu_vm_manager {
struct fence *active[AMDGPU_NUM_VM]; struct {
uint32_t max_pfn; struct fence *active;
atomic_long_t owner;
} ids[AMDGPU_NUM_VM];
uint32_t max_pfn;
/* number of VMIDs */ /* number of VMIDs */
unsigned nvm; unsigned nvm;
/* vram base address for page table entry */ /* vram base address for page table entry */
u64 vram_base_offset; u64 vram_base_offset;
/* is vm enabled? */ /* is vm enabled? */
bool enabled; bool enabled;
/* for hw to save the PD addr on suspend/resume */
uint32_t saved_table_addr[AMDGPU_NUM_VM];
/* vm pte handling */ /* vm pte handling */
const struct amdgpu_vm_pte_funcs *vm_pte_funcs; const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
struct amdgpu_ring *vm_pte_funcs_ring; struct amdgpu_ring *vm_pte_funcs_ring;
}; };
void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct list_head *head);
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_sync *sync);
void amdgpu_vm_flush(struct amdgpu_ring *ring,
struct amdgpu_vm *vm,
struct fence *updates);
void amdgpu_vm_fence(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct fence *fence);
uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr);
int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_sync *sync);
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
struct ttm_mem_reg *mem);
void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
struct amdgpu_bo *bo);
struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
struct amdgpu_bo *bo);
struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct amdgpu_bo *bo);
int amdgpu_vm_bo_map(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t addr, uint64_t offset,
uint64_t size, uint32_t flags);
int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t addr);
void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va);
int amdgpu_vm_free_job(struct amdgpu_job *job);
/* /*
* context related structures * context related structures
*/ */
...@@ -1211,6 +1254,7 @@ struct amdgpu_cs_parser { ...@@ -1211,6 +1254,7 @@ struct amdgpu_cs_parser {
/* relocations */ /* relocations */
struct amdgpu_bo_list_entry *vm_bos; struct amdgpu_bo_list_entry *vm_bos;
struct list_head validated; struct list_head validated;
struct fence *fence;
struct amdgpu_ib *ibs; struct amdgpu_ib *ibs;
uint32_t num_ibs; uint32_t num_ibs;
...@@ -1226,7 +1270,7 @@ struct amdgpu_job { ...@@ -1226,7 +1270,7 @@ struct amdgpu_job {
struct amdgpu_device *adev; struct amdgpu_device *adev;
struct amdgpu_ib *ibs; struct amdgpu_ib *ibs;
uint32_t num_ibs; uint32_t num_ibs;
struct mutex job_lock; void *owner;
struct amdgpu_user_fence uf; struct amdgpu_user_fence uf;
int (*free_job)(struct amdgpu_job *job); int (*free_job)(struct amdgpu_job *job);
}; };
...@@ -2257,11 +2301,6 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev); ...@@ -2257,11 +2301,6 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev);
bool amdgpu_card_posted(struct amdgpu_device *adev); bool amdgpu_card_posted(struct amdgpu_device *adev);
void amdgpu_update_display_priority(struct amdgpu_device *adev); void amdgpu_update_display_priority(struct amdgpu_device *adev);
bool amdgpu_boot_test_post_card(struct amdgpu_device *adev); bool amdgpu_boot_test_post_card(struct amdgpu_device *adev);
struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
struct drm_file *filp,
struct amdgpu_ctx *ctx,
struct amdgpu_ib *ibs,
uint32_t num_ibs);
int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data); int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
...@@ -2318,49 +2357,6 @@ int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe, ...@@ -2318,49 +2357,6 @@ int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe,
long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg); unsigned long arg);
/*
* vm
*/
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct list_head *head);
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_sync *sync);
void amdgpu_vm_flush(struct amdgpu_ring *ring,
struct amdgpu_vm *vm,
struct fence *updates);
void amdgpu_vm_fence(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct amdgpu_fence *fence);
uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr);
int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
struct amdgpu_vm *vm, struct amdgpu_sync *sync);
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
struct ttm_mem_reg *mem);
void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
struct amdgpu_bo *bo);
struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
struct amdgpu_bo *bo);
struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct amdgpu_bo *bo);
int amdgpu_vm_bo_map(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t addr, uint64_t offset,
uint64_t size, uint32_t flags);
int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
uint64_t addr);
void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va);
int amdgpu_vm_free_job(struct amdgpu_job *job);
/* /*
* functions used by amdgpu_encoder.c * functions used by amdgpu_encoder.c
*/ */
......
...@@ -127,30 +127,6 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, ...@@ -127,30 +127,6 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
return 0; return 0;
} }
struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev,
struct drm_file *filp,
struct amdgpu_ctx *ctx,
struct amdgpu_ib *ibs,
uint32_t num_ibs)
{
struct amdgpu_cs_parser *parser;
int i;
parser = kzalloc(sizeof(struct amdgpu_cs_parser), GFP_KERNEL);
if (!parser)
return NULL;
parser->adev = adev;
parser->filp = filp;
parser->ctx = ctx;
parser->ibs = ibs;
parser->num_ibs = num_ibs;
for (i = 0; i < num_ibs; i++)
ibs[i].ctx = ctx;
return parser;
}
int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
{ {
union drm_amdgpu_cs *cs = data; union drm_amdgpu_cs *cs = data;
...@@ -463,8 +439,18 @@ static int cmp_size_smaller_first(void *priv, struct list_head *a, ...@@ -463,8 +439,18 @@ static int cmp_size_smaller_first(void *priv, struct list_head *a,
return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
} }
static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int error, bool backoff) /**
* cs_parser_fini() - clean parser states
* @parser: parser structure holding parsing context.
* @error: error number
*
* If error is set than unvalidate buffer, otherwise just free memory
* used by parsing context.
**/
static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
{ {
unsigned i;
if (!error) { if (!error) {
/* Sort the buffer list from the smallest to largest buffer, /* Sort the buffer list from the smallest to largest buffer,
* which affects the order of buffers in the LRU list. * which affects the order of buffers in the LRU list.
...@@ -479,17 +465,14 @@ static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int err ...@@ -479,17 +465,14 @@ static void amdgpu_cs_parser_fini_early(struct amdgpu_cs_parser *parser, int err
list_sort(NULL, &parser->validated, cmp_size_smaller_first); list_sort(NULL, &parser->validated, cmp_size_smaller_first);
ttm_eu_fence_buffer_objects(&parser->ticket, ttm_eu_fence_buffer_objects(&parser->ticket,
&parser->validated, &parser->validated,
&parser->ibs[parser->num_ibs-1].fence->base); parser->fence);
} else if (backoff) { } else if (backoff) {
ttm_eu_backoff_reservation(&parser->ticket, ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated); &parser->validated);
} }
} fence_put(parser->fence);
static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser)
{
unsigned i;
if (parser->ctx) if (parser->ctx)
amdgpu_ctx_put(parser->ctx); amdgpu_ctx_put(parser->ctx);
if (parser->bo_list) if (parser->bo_list)
...@@ -499,31 +482,12 @@ static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser) ...@@ -499,31 +482,12 @@ static void amdgpu_cs_parser_fini_late(struct amdgpu_cs_parser *parser)
for (i = 0; i < parser->nchunks; i++) for (i = 0; i < parser->nchunks; i++)
drm_free_large(parser->chunks[i].kdata); drm_free_large(parser->chunks[i].kdata);
kfree(parser->chunks); kfree(parser->chunks);
if (!amdgpu_enable_scheduler) if (parser->ibs)
{ for (i = 0; i < parser->num_ibs; i++)
if (parser->ibs) amdgpu_ib_free(parser->adev, &parser->ibs[i]);
for (i = 0; i < parser->num_ibs; i++) kfree(parser->ibs);
amdgpu_ib_free(parser->adev, &parser->ibs[i]); if (parser->uf.bo)
kfree(parser->ibs); drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
if (parser->uf.bo)
drm_gem_object_unreference_unlocked(&parser->uf.bo->gem_base);
}
kfree(parser);
}
/**
* cs_parser_fini() - clean parser states
* @parser: parser structure holding parsing context.
* @error: error number
*
* If error is set than unvalidate buffer, otherwise just free memory
* used by parsing context.
**/
static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
{
amdgpu_cs_parser_fini_early(parser, error, backoff);
amdgpu_cs_parser_fini_late(parser);
} }
static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
...@@ -610,15 +574,9 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, ...@@ -610,15 +574,9 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
} }
r = amdgpu_bo_vm_update_pte(parser, vm); r = amdgpu_bo_vm_update_pte(parser, vm);
if (r) { if (!r)
goto out; amdgpu_cs_sync_rings(parser);
}
amdgpu_cs_sync_rings(parser);
if (!amdgpu_enable_scheduler)
r = amdgpu_ib_schedule(adev, parser->num_ibs, parser->ibs,
parser->filp);
out:
return r; return r;
} }
...@@ -828,36 +786,36 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ...@@ -828,36 +786,36 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
union drm_amdgpu_cs *cs = data; union drm_amdgpu_cs *cs = data;
struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_cs_parser *parser; struct amdgpu_cs_parser parser = {};
bool reserved_buffers = false; bool reserved_buffers = false;
int i, r; int i, r;
if (!adev->accel_working) if (!adev->accel_working)
return -EBUSY; return -EBUSY;
parser = amdgpu_cs_parser_create(adev, filp, NULL, NULL, 0); parser.adev = adev;
if (!parser) parser.filp = filp;
return -ENOMEM;
r = amdgpu_cs_parser_init(parser, data); r = amdgpu_cs_parser_init(&parser, data);
if (r) { if (r) {
DRM_ERROR("Failed to initialize parser !\n"); DRM_ERROR("Failed to initialize parser !\n");
amdgpu_cs_parser_fini(parser, r, false); amdgpu_cs_parser_fini(&parser, r, false);
r = amdgpu_cs_handle_lockup(adev, r); r = amdgpu_cs_handle_lockup(adev, r);
return r; return r;
} }
mutex_lock(&vm->mutex); mutex_lock(&vm->mutex);
r = amdgpu_cs_parser_relocs(parser); r = amdgpu_cs_parser_relocs(&parser);
if (r == -ENOMEM) if (r == -ENOMEM)
DRM_ERROR("Not enough memory for command submission!\n"); DRM_ERROR("Not enough memory for command submission!\n");
else if (r && r != -ERESTARTSYS) else if (r && r != -ERESTARTSYS)
DRM_ERROR("Failed to process the buffer list %d!\n", r); DRM_ERROR("Failed to process the buffer list %d!\n", r);
else if (!r) { else if (!r) {
reserved_buffers = true; reserved_buffers = true;
r = amdgpu_cs_ib_fill(adev, parser); r = amdgpu_cs_ib_fill(adev, &parser);
} }
if (!r) { if (!r) {
r = amdgpu_cs_dependencies(adev, parser); r = amdgpu_cs_dependencies(adev, &parser);
if (r) if (r)
DRM_ERROR("Failed in the dependencies handling %d!\n", r); DRM_ERROR("Failed in the dependencies handling %d!\n", r);
} }
...@@ -865,62 +823,71 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ...@@ -865,62 +823,71 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (r) if (r)
goto out; goto out;
for (i = 0; i < parser->num_ibs; i++) for (i = 0; i < parser.num_ibs; i++)
trace_amdgpu_cs(parser, i); trace_amdgpu_cs(&parser, i);
r = amdgpu_cs_ib_vm_chunk(adev, parser); r = amdgpu_cs_ib_vm_chunk(adev, &parser);
if (r) if (r)
goto out; goto out;
if (amdgpu_enable_scheduler && parser->num_ibs) { if (amdgpu_enable_scheduler && parser.num_ibs) {
struct amdgpu_ring * ring = parser.ibs->ring;
struct amd_sched_fence *fence;
struct amdgpu_job *job; struct amdgpu_job *job;
struct amdgpu_ring * ring = parser->ibs->ring;
job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
if (!job) { if (!job) {
r = -ENOMEM; r = -ENOMEM;
goto out; goto out;
} }
job->base.sched = &ring->sched; job->base.sched = &ring->sched;
job->base.s_entity = &parser->ctx->rings[ring->idx].entity; job->base.s_entity = &parser.ctx->rings[ring->idx].entity;
job->adev = parser->adev; job->adev = parser.adev;
job->ibs = parser->ibs; job->owner = parser.filp;
job->num_ibs = parser->num_ibs; job->free_job = amdgpu_cs_free_job;
job->base.owner = parser->filp;
mutex_init(&job->job_lock); job->ibs = parser.ibs;
job->num_ibs = parser.num_ibs;
parser.ibs = NULL;
parser.num_ibs = 0;
if (job->ibs[job->num_ibs - 1].user) { if (job->ibs[job->num_ibs - 1].user) {
memcpy(&job->uf, &parser->uf, job->uf = parser.uf;
sizeof(struct amdgpu_user_fence));
job->ibs[job->num_ibs - 1].user = &job->uf; job->ibs[job->num_ibs - 1].user = &job->uf;
parser.uf.bo = NULL;
} }
job->free_job = amdgpu_cs_free_job; fence = amd_sched_fence_create(job->base.s_entity,
mutex_lock(&job->job_lock); parser.filp);
r = amd_sched_entity_push_job(&job->base); if (!fence) {
if (r) { r = -ENOMEM;
mutex_unlock(&job->job_lock);
amdgpu_cs_free_job(job); amdgpu_cs_free_job(job);
kfree(job); kfree(job);
goto out; goto out;
} }
cs->out.handle = job->base.s_fence = fence;
amdgpu_ctx_add_fence(parser->ctx, ring, parser.fence = fence_get(&fence->base);
&job->base.s_fence->base);
parser->ibs[parser->num_ibs - 1].sequence = cs->out.handle;
list_sort(NULL, &parser->validated, cmp_size_smaller_first); cs->out.handle = amdgpu_ctx_add_fence(parser.ctx, ring,
ttm_eu_fence_buffer_objects(&parser->ticket, &fence->base);
&parser->validated, job->ibs[job->num_ibs - 1].sequence = cs->out.handle;
&job->base.s_fence->base);
mutex_unlock(&job->job_lock); trace_amdgpu_cs_ioctl(job);
amdgpu_cs_parser_fini_late(parser); amd_sched_entity_push_job(&job->base);
mutex_unlock(&vm->mutex);
return 0; } else {
struct amdgpu_fence *fence;
r = amdgpu_ib_schedule(adev, parser.num_ibs, parser.ibs,
parser.filp);
fence = parser.ibs[parser.num_ibs - 1].fence;
parser.fence = fence_get(&fence->base);
cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence;
} }
cs->out.handle = parser->ibs[parser->num_ibs - 1].sequence;
out: out:
amdgpu_cs_parser_fini(parser, r, reserved_buffers); amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
mutex_unlock(&vm->mutex); mutex_unlock(&vm->mutex);
r = amdgpu_cs_handle_lockup(adev, r); r = amdgpu_cs_handle_lockup(adev, r);
return r; return r;
......
...@@ -47,6 +47,9 @@ ...@@ -47,6 +47,9 @@
* that the the relevant GPU caches have been flushed. * that the the relevant GPU caches have been flushed.
*/ */
static struct kmem_cache *amdgpu_fence_slab;
static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0);
/** /**
* amdgpu_fence_write - write a fence value * amdgpu_fence_write - write a fence value
* *
...@@ -84,24 +87,6 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) ...@@ -84,24 +87,6 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
return seq; return seq;
} }
/**
* amdgpu_fence_schedule_check - schedule lockup check
*
* @ring: pointer to struct amdgpu_ring
*
* Queues a delayed work item to check for lockups.
*/
static void amdgpu_fence_schedule_check(struct amdgpu_ring *ring)
{
/*
* Do not reset the timer here with mod_delayed_work,
* this can livelock in an interaction with TTM delayed destroy.
*/
queue_delayed_work(system_power_efficient_wq,
&ring->fence_drv.lockup_work,
AMDGPU_FENCE_JIFFIES_TIMEOUT);
}
/** /**
* amdgpu_fence_emit - emit a fence on the requested ring * amdgpu_fence_emit - emit a fence on the requested ring
* *
...@@ -118,7 +103,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, ...@@ -118,7 +103,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
/* we are protected by the ring emission mutex */ /* we are protected by the ring emission mutex */
*fence = kmalloc(sizeof(struct amdgpu_fence), GFP_KERNEL); *fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
if ((*fence) == NULL) { if ((*fence) == NULL) {
return -ENOMEM; return -ENOMEM;
} }
...@@ -132,10 +117,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, ...@@ -132,10 +117,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
(*fence)->seq, (*fence)->seq,
AMDGPU_FENCE_FLAG_INT); AMDGPU_FENCE_FLAG_INT);
trace_amdgpu_fence_emit(ring->adev->ddev, ring->idx, (*fence)->seq);
return 0; return 0;
} }
/**
* amdgpu_fence_schedule_fallback - schedule fallback check
*
* @ring: pointer to struct amdgpu_ring
*
* Start a timer as fallback to our interrupts.
*/
static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
{
mod_timer(&ring->fence_drv.fallback_timer,
jiffies + AMDGPU_FENCE_JIFFIES_TIMEOUT);
}
/** /**
* amdgpu_fence_activity - check for fence activity * amdgpu_fence_activity - check for fence activity
* *
...@@ -202,45 +199,38 @@ static bool amdgpu_fence_activity(struct amdgpu_ring *ring) ...@@ -202,45 +199,38 @@ static bool amdgpu_fence_activity(struct amdgpu_ring *ring)
} while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq); } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq);
if (seq < last_emitted) if (seq < last_emitted)
amdgpu_fence_schedule_check(ring); amdgpu_fence_schedule_fallback(ring);
return wake; return wake;
} }
/** /**
* amdgpu_fence_check_lockup - check for hardware lockup * amdgpu_fence_process - process a fence
* *
* @work: delayed work item * @adev: amdgpu_device pointer
* @ring: ring index the fence is associated with
* *
* Checks for fence activity and if there is none probe * Checks the current fence value and wakes the fence queue
* the hardware if a lockup occured. * if the sequence number has increased (all asics).
*/ */
static void amdgpu_fence_check_lockup(struct work_struct *work) void amdgpu_fence_process(struct amdgpu_ring *ring)
{ {
struct amdgpu_fence_driver *fence_drv;
struct amdgpu_ring *ring;
fence_drv = container_of(work, struct amdgpu_fence_driver,
lockup_work.work);
ring = fence_drv->ring;
if (amdgpu_fence_activity(ring)) if (amdgpu_fence_activity(ring))
wake_up_all(&ring->fence_drv.fence_queue); wake_up_all(&ring->fence_drv.fence_queue);
} }
/** /**
* amdgpu_fence_process - process a fence * amdgpu_fence_fallback - fallback for hardware interrupts
* *
* @adev: amdgpu_device pointer * @work: delayed work item
* @ring: ring index the fence is associated with
* *
* Checks the current fence value and wakes the fence queue * Checks for fence activity.
* if the sequence number has increased (all asics).
*/ */
void amdgpu_fence_process(struct amdgpu_ring *ring) static void amdgpu_fence_fallback(unsigned long arg)
{ {
if (amdgpu_fence_activity(ring)) struct amdgpu_ring *ring = (void *)arg;
wake_up_all(&ring->fence_drv.fence_queue);
amdgpu_fence_process(ring);
} }
/** /**
...@@ -290,7 +280,7 @@ static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq) ...@@ -290,7 +280,7 @@ static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
if (atomic64_read(&ring->fence_drv.last_seq) >= seq) if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
return 0; return 0;
amdgpu_fence_schedule_check(ring); amdgpu_fence_schedule_fallback(ring);
wait_event(ring->fence_drv.fence_queue, ( wait_event(ring->fence_drv.fence_queue, (
(signaled = amdgpu_fence_seq_signaled(ring, seq)))); (signaled = amdgpu_fence_seq_signaled(ring, seq))));
...@@ -491,9 +481,8 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) ...@@ -491,9 +481,8 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
atomic64_set(&ring->fence_drv.last_seq, 0); atomic64_set(&ring->fence_drv.last_seq, 0);
ring->fence_drv.initialized = false; ring->fence_drv.initialized = false;
INIT_DELAYED_WORK(&ring->fence_drv.lockup_work, setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
amdgpu_fence_check_lockup); (unsigned long)ring);
ring->fence_drv.ring = ring;
init_waitqueue_head(&ring->fence_drv.fence_queue); init_waitqueue_head(&ring->fence_drv.fence_queue);
...@@ -536,6 +525,13 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) ...@@ -536,6 +525,13 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
*/ */
int amdgpu_fence_driver_init(struct amdgpu_device *adev) int amdgpu_fence_driver_init(struct amdgpu_device *adev)
{ {
if (atomic_inc_return(&amdgpu_fence_slab_ref) == 1) {
amdgpu_fence_slab = kmem_cache_create(
"amdgpu_fence", sizeof(struct amdgpu_fence), 0,
SLAB_HWCACHE_ALIGN, NULL);
if (!amdgpu_fence_slab)
return -ENOMEM;
}
if (amdgpu_debugfs_fence_init(adev)) if (amdgpu_debugfs_fence_init(adev))
dev_err(adev->dev, "fence debugfs file creation failed\n"); dev_err(adev->dev, "fence debugfs file creation failed\n");
...@@ -554,9 +550,12 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) ...@@ -554,9 +550,12 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
{ {
int i, r; int i, r;
if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
kmem_cache_destroy(amdgpu_fence_slab);
mutex_lock(&adev->ring_lock); mutex_lock(&adev->ring_lock);
for (i = 0; i < AMDGPU_MAX_RINGS; i++) { for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i]; struct amdgpu_ring *ring = adev->rings[i];
if (!ring || !ring->fence_drv.initialized) if (!ring || !ring->fence_drv.initialized)
continue; continue;
r = amdgpu_fence_wait_empty(ring); r = amdgpu_fence_wait_empty(ring);
...@@ -568,6 +567,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) ...@@ -568,6 +567,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
amdgpu_irq_put(adev, ring->fence_drv.irq_src, amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type); ring->fence_drv.irq_type);
amd_sched_fini(&ring->sched); amd_sched_fini(&ring->sched);
del_timer_sync(&ring->fence_drv.fallback_timer);
ring->fence_drv.initialized = false; ring->fence_drv.initialized = false;
} }
mutex_unlock(&adev->ring_lock); mutex_unlock(&adev->ring_lock);
...@@ -751,18 +751,25 @@ static bool amdgpu_fence_enable_signaling(struct fence *f) ...@@ -751,18 +751,25 @@ static bool amdgpu_fence_enable_signaling(struct fence *f)
fence->fence_wake.func = amdgpu_fence_check_signaled; fence->fence_wake.func = amdgpu_fence_check_signaled;
__add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake); __add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
fence_get(f); fence_get(f);
amdgpu_fence_schedule_check(ring); if (!timer_pending(&ring->fence_drv.fallback_timer))
amdgpu_fence_schedule_fallback(ring);
FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx); FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
return true; return true;
} }
static void amdgpu_fence_release(struct fence *f)
{
struct amdgpu_fence *fence = to_amdgpu_fence(f);
kmem_cache_free(amdgpu_fence_slab, fence);
}
const struct fence_ops amdgpu_fence_ops = { const struct fence_ops amdgpu_fence_ops = {
.get_driver_name = amdgpu_fence_get_driver_name, .get_driver_name = amdgpu_fence_get_driver_name,
.get_timeline_name = amdgpu_fence_get_timeline_name, .get_timeline_name = amdgpu_fence_get_timeline_name,
.enable_signaling = amdgpu_fence_enable_signaling, .enable_signaling = amdgpu_fence_enable_signaling,
.signaled = amdgpu_fence_is_signaled, .signaled = amdgpu_fence_is_signaled,
.wait = fence_default_wait, .wait = fence_default_wait,
.release = NULL, .release = amdgpu_fence_release,
}; };
/* /*
......
...@@ -483,6 +483,9 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, ...@@ -483,6 +483,9 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
if (domain == AMDGPU_GEM_DOMAIN_CPU) if (domain == AMDGPU_GEM_DOMAIN_CPU)
goto error_unreserve; goto error_unreserve;
} }
r = amdgpu_vm_update_page_directory(adev, bo_va->vm);
if (r)
goto error_unreserve;
r = amdgpu_vm_clear_freed(adev, bo_va->vm); r = amdgpu_vm_clear_freed(adev, bo_va->vm);
if (r) if (r)
...@@ -512,6 +515,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, ...@@ -512,6 +515,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_bo *rbo; struct amdgpu_bo *rbo;
struct amdgpu_bo_va *bo_va; struct amdgpu_bo_va *bo_va;
struct ttm_validate_buffer tv, tv_pd;
struct ww_acquire_ctx ticket;
struct list_head list, duplicates;
uint32_t invalid_flags, va_flags = 0; uint32_t invalid_flags, va_flags = 0;
int r = 0; int r = 0;
...@@ -549,7 +555,18 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, ...@@ -549,7 +555,18 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
return -ENOENT; return -ENOENT;
mutex_lock(&fpriv->vm.mutex); mutex_lock(&fpriv->vm.mutex);
rbo = gem_to_amdgpu_bo(gobj); rbo = gem_to_amdgpu_bo(gobj);
r = amdgpu_bo_reserve(rbo, false); INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&duplicates);
tv.bo = &rbo->tbo;
tv.shared = true;
list_add(&tv.head, &list);
if (args->operation == AMDGPU_VA_OP_MAP) {
tv_pd.bo = &fpriv->vm.page_directory->tbo;
tv_pd.shared = true;
list_add(&tv_pd.head, &list);
}
r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
if (r) { if (r) {
mutex_unlock(&fpriv->vm.mutex); mutex_unlock(&fpriv->vm.mutex);
drm_gem_object_unreference_unlocked(gobj); drm_gem_object_unreference_unlocked(gobj);
...@@ -558,7 +575,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, ...@@ -558,7 +575,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
bo_va = amdgpu_vm_bo_find(&fpriv->vm, rbo); bo_va = amdgpu_vm_bo_find(&fpriv->vm, rbo);
if (!bo_va) { if (!bo_va) {
amdgpu_bo_unreserve(rbo); ttm_eu_backoff_reservation(&ticket, &list);
drm_gem_object_unreference_unlocked(gobj);
mutex_unlock(&fpriv->vm.mutex); mutex_unlock(&fpriv->vm.mutex);
return -ENOENT; return -ENOENT;
} }
...@@ -581,7 +599,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, ...@@ -581,7 +599,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
default: default:
break; break;
} }
ttm_eu_backoff_reservation(&ticket, &list);
if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE)) if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE))
amdgpu_gem_va_update_vm(adev, bo_va, args->operation); amdgpu_gem_va_update_vm(adev, bo_va, args->operation);
mutex_unlock(&fpriv->vm.mutex); mutex_unlock(&fpriv->vm.mutex);
......
...@@ -62,7 +62,7 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, ...@@ -62,7 +62,7 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
int r; int r;
if (size) { if (size) {
r = amdgpu_sa_bo_new(adev, &adev->ring_tmp_bo, r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
&ib->sa_bo, size, 256); &ib->sa_bo, size, 256);
if (r) { if (r) {
dev_err(adev->dev, "failed to get a new IB (%d)\n", r); dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
...@@ -216,7 +216,7 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, ...@@ -216,7 +216,7 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
} }
if (ib->vm) if (ib->vm)
amdgpu_vm_fence(adev, ib->vm, ib->fence); amdgpu_vm_fence(adev, ib->vm, &ib->fence->base);
amdgpu_ring_unlock_commit(ring); amdgpu_ring_unlock_commit(ring);
return 0; return 0;
......
...@@ -189,10 +189,9 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev, ...@@ -189,10 +189,9 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager); struct amdgpu_sa_manager *sa_manager);
int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev, int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager); struct amdgpu_sa_manager *sa_manager);
int amdgpu_sa_bo_new(struct amdgpu_device *adev, int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
struct amdgpu_sa_manager *sa_manager, struct amdgpu_sa_bo **sa_bo,
struct amdgpu_sa_bo **sa_bo, unsigned size, unsigned align);
unsigned size, unsigned align);
void amdgpu_sa_bo_free(struct amdgpu_device *adev, void amdgpu_sa_bo_free(struct amdgpu_device *adev,
struct amdgpu_sa_bo **sa_bo, struct amdgpu_sa_bo **sa_bo,
struct fence *fence); struct fence *fence);
......
...@@ -311,8 +311,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, ...@@ -311,8 +311,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
return false; return false;
} }
int amdgpu_sa_bo_new(struct amdgpu_device *adev, int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
struct amdgpu_sa_manager *sa_manager,
struct amdgpu_sa_bo **sa_bo, struct amdgpu_sa_bo **sa_bo,
unsigned size, unsigned align) unsigned size, unsigned align)
{ {
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <drm/drmP.h> #include <drm/drmP.h>
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_trace.h"
static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job) static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job)
{ {
...@@ -44,11 +45,8 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job) ...@@ -44,11 +45,8 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
return NULL; return NULL;
} }
job = to_amdgpu_job(sched_job); job = to_amdgpu_job(sched_job);
mutex_lock(&job->job_lock); trace_amdgpu_sched_run_job(job);
r = amdgpu_ib_schedule(job->adev, r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, job->owner);
job->num_ibs,
job->ibs,
job->base.owner);
if (r) { if (r) {
DRM_ERROR("Error scheduling IBs (%d)\n", r); DRM_ERROR("Error scheduling IBs (%d)\n", r);
goto err; goto err;
...@@ -61,8 +59,6 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job) ...@@ -61,8 +59,6 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
if (job->free_job) if (job->free_job)
job->free_job(job); job->free_job(job);
mutex_unlock(&job->job_lock);
fence_put(&job->base.s_fence->base);
kfree(job); kfree(job);
return fence ? &fence->base : NULL; return fence ? &fence->base : NULL;
} }
...@@ -88,21 +84,19 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, ...@@ -88,21 +84,19 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
return -ENOMEM; return -ENOMEM;
job->base.sched = &ring->sched; job->base.sched = &ring->sched;
job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity; job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity;
job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner);
if (!job->base.s_fence) {
kfree(job);
return -ENOMEM;
}
*f = fence_get(&job->base.s_fence->base);
job->adev = adev; job->adev = adev;
job->ibs = ibs; job->ibs = ibs;
job->num_ibs = num_ibs; job->num_ibs = num_ibs;
job->base.owner = owner; job->owner = owner;
mutex_init(&job->job_lock);
job->free_job = free_job; job->free_job = free_job;
mutex_lock(&job->job_lock); amd_sched_entity_push_job(&job->base);
r = amd_sched_entity_push_job(&job->base);
if (r) {
mutex_unlock(&job->job_lock);
kfree(job);
return r;
}
*f = fence_get(&job->base.s_fence->base);
mutex_unlock(&job->job_lock);
} else { } else {
r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
if (r) if (r)
......
...@@ -40,7 +40,7 @@ int amdgpu_semaphore_create(struct amdgpu_device *adev, ...@@ -40,7 +40,7 @@ int amdgpu_semaphore_create(struct amdgpu_device *adev,
if (*semaphore == NULL) { if (*semaphore == NULL) {
return -ENOMEM; return -ENOMEM;
} }
r = amdgpu_sa_bo_new(adev, &adev->ring_tmp_bo, r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
&(*semaphore)->sa_bo, 8, 8); &(*semaphore)->sa_bo, 8, 8);
if (r) { if (r) {
kfree(*semaphore); kfree(*semaphore);
......
...@@ -302,8 +302,14 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync, ...@@ -302,8 +302,14 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
return -EINVAL; return -EINVAL;
} }
if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores || if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores) {
(count >= AMDGPU_NUM_SYNCS)) { r = fence_wait(&fence->base, true);
if (r)
return r;
continue;
}
if (count >= AMDGPU_NUM_SYNCS) {
/* not enough room, wait manually */ /* not enough room, wait manually */
r = fence_wait(&fence->base, false); r = fence_wait(&fence->base, false);
if (r) if (r)
......
...@@ -48,6 +48,57 @@ TRACE_EVENT(amdgpu_cs, ...@@ -48,6 +48,57 @@ TRACE_EVENT(amdgpu_cs,
__entry->fences) __entry->fences)
); );
TRACE_EVENT(amdgpu_cs_ioctl,
TP_PROTO(struct amdgpu_job *job),
TP_ARGS(job),
TP_STRUCT__entry(
__field(struct amdgpu_device *, adev)
__field(struct amd_sched_job *, sched_job)
__field(struct amdgpu_ib *, ib)
__field(struct fence *, fence)
__field(char *, ring_name)
__field(u32, num_ibs)
),
TP_fast_assign(
__entry->adev = job->adev;
__entry->sched_job = &job->base;
__entry->ib = job->ibs;
__entry->fence = &job->base.s_fence->base;
__entry->ring_name = job->ibs[0].ring->name;
__entry->num_ibs = job->num_ibs;
),
TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
__entry->adev, __entry->sched_job, __entry->ib,
__entry->fence, __entry->ring_name, __entry->num_ibs)
);
TRACE_EVENT(amdgpu_sched_run_job,
TP_PROTO(struct amdgpu_job *job),
TP_ARGS(job),
TP_STRUCT__entry(
__field(struct amdgpu_device *, adev)
__field(struct amd_sched_job *, sched_job)
__field(struct amdgpu_ib *, ib)
__field(struct fence *, fence)
__field(char *, ring_name)
__field(u32, num_ibs)
),
TP_fast_assign(
__entry->adev = job->adev;
__entry->sched_job = &job->base;
__entry->ib = job->ibs;
__entry->fence = &job->base.s_fence->base;
__entry->ring_name = job->ibs[0].ring->name;
__entry->num_ibs = job->num_ibs;
),
TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
__entry->adev, __entry->sched_job, __entry->ib,
__entry->fence, __entry->ring_name, __entry->num_ibs)
);
TRACE_EVENT(amdgpu_vm_grab_id, TRACE_EVENT(amdgpu_vm_grab_id,
TP_PROTO(unsigned vmid, int ring), TP_PROTO(unsigned vmid, int ring),
TP_ARGS(vmid, ring), TP_ARGS(vmid, ring),
...@@ -196,49 +247,6 @@ TRACE_EVENT(amdgpu_bo_list_set, ...@@ -196,49 +247,6 @@ TRACE_EVENT(amdgpu_bo_list_set,
TP_printk("list=%p, bo=%p", __entry->list, __entry->bo) TP_printk("list=%p, bo=%p", __entry->list, __entry->bo)
); );
DECLARE_EVENT_CLASS(amdgpu_fence_request,
TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
TP_ARGS(dev, ring, seqno),
TP_STRUCT__entry(
__field(u32, dev)
__field(int, ring)
__field(u32, seqno)
),
TP_fast_assign(
__entry->dev = dev->primary->index;
__entry->ring = ring;
__entry->seqno = seqno;
),
TP_printk("dev=%u, ring=%d, seqno=%u",
__entry->dev, __entry->ring, __entry->seqno)
);
DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_emit,
TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
TP_ARGS(dev, ring, seqno)
);
DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_wait_begin,
TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
TP_ARGS(dev, ring, seqno)
);
DEFINE_EVENT(amdgpu_fence_request, amdgpu_fence_wait_end,
TP_PROTO(struct drm_device *dev, int ring, u32 seqno),
TP_ARGS(dev, ring, seqno)
);
DECLARE_EVENT_CLASS(amdgpu_semaphore_request, DECLARE_EVENT_CLASS(amdgpu_semaphore_request,
TP_PROTO(int ring, struct amdgpu_semaphore *sem), TP_PROTO(int ring, struct amdgpu_semaphore *sem),
......
...@@ -1073,10 +1073,10 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data) ...@@ -1073,10 +1073,10 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
ret = drm_mm_dump_table(m, mm); ret = drm_mm_dump_table(m, mm);
spin_unlock(&glob->lru_lock); spin_unlock(&glob->lru_lock);
if (ttm_pl == TTM_PL_VRAM) if (ttm_pl == TTM_PL_VRAM)
seq_printf(m, "man size:%llu pages, ram usage:%luMB, vis usage:%luMB\n", seq_printf(m, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n",
adev->mman.bdev.man[ttm_pl].size, adev->mman.bdev.man[ttm_pl].size,
atomic64_read(&adev->vram_usage) >> 20, (u64)atomic64_read(&adev->vram_usage) >> 20,
atomic64_read(&adev->vram_vis_usage) >> 20); (u64)atomic64_read(&adev->vram_vis_usage) >> 20);
return ret; return ret;
} }
......
This diff is collapsed.
...@@ -6569,12 +6569,12 @@ static int ci_dpm_set_interrupt_state(struct amdgpu_device *adev, ...@@ -6569,12 +6569,12 @@ static int ci_dpm_set_interrupt_state(struct amdgpu_device *adev,
switch (state) { switch (state) {
case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_DISABLE:
cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK;
WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
break; break;
case AMDGPU_IRQ_STATE_ENABLE: case AMDGPU_IRQ_STATE_ENABLE:
cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK; cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTH_MASK_MASK;
WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
break; break;
default: default:
...@@ -6586,12 +6586,12 @@ static int ci_dpm_set_interrupt_state(struct amdgpu_device *adev, ...@@ -6586,12 +6586,12 @@ static int ci_dpm_set_interrupt_state(struct amdgpu_device *adev,
switch (state) { switch (state) {
case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_DISABLE:
cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK;
WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
break; break;
case AMDGPU_IRQ_STATE_ENABLE: case AMDGPU_IRQ_STATE_ENABLE:
cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT); cg_thermal_int = RREG32_SMC(ixCG_THERMAL_INT);
cg_thermal_int |= CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK; cg_thermal_int &= ~CG_THERMAL_INT_CTRL__THERM_INTL_MASK_MASK;
WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int); WREG32_SMC(ixCG_THERMAL_INT, cg_thermal_int);
break; break;
default: default:
......
This diff is collapsed.
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev); static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev);
static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev); static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
MODULE_FIRMWARE("radeon/boniare_mc.bin"); MODULE_FIRMWARE("radeon/bonaire_mc.bin");
MODULE_FIRMWARE("radeon/hawaii_mc.bin"); MODULE_FIRMWARE("radeon/hawaii_mc.bin");
/** /**
...@@ -501,6 +501,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) ...@@ -501,6 +501,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
WREG32(mmVM_L2_CNTL, tmp); WREG32(mmVM_L2_CNTL, tmp);
tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); tmp = REG_SET_FIELD(0, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
...@@ -960,12 +961,10 @@ static int gmc_v7_0_sw_init(void *handle) ...@@ -960,12 +961,10 @@ static int gmc_v7_0_sw_init(void *handle)
static int gmc_v7_0_sw_fini(void *handle) static int gmc_v7_0_sw_fini(void *handle)
{ {
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->vm_manager.enabled) { if (adev->vm_manager.enabled) {
for (i = 0; i < AMDGPU_NUM_VM; ++i) amdgpu_vm_manager_fini(adev);
fence_put(adev->vm_manager.active[i]);
gmc_v7_0_vm_fini(adev); gmc_v7_0_vm_fini(adev);
adev->vm_manager.enabled = false; adev->vm_manager.enabled = false;
} }
...@@ -1010,12 +1009,10 @@ static int gmc_v7_0_hw_fini(void *handle) ...@@ -1010,12 +1009,10 @@ static int gmc_v7_0_hw_fini(void *handle)
static int gmc_v7_0_suspend(void *handle) static int gmc_v7_0_suspend(void *handle)
{ {
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->vm_manager.enabled) { if (adev->vm_manager.enabled) {
for (i = 0; i < AMDGPU_NUM_VM; ++i) amdgpu_vm_manager_fini(adev);
fence_put(adev->vm_manager.active[i]);
gmc_v7_0_vm_fini(adev); gmc_v7_0_vm_fini(adev);
adev->vm_manager.enabled = false; adev->vm_manager.enabled = false;
} }
......
...@@ -629,6 +629,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) ...@@ -629,6 +629,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, EFFECTIVE_L2_QUEUE_SIZE, 7);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
WREG32(mmVM_L2_CNTL, tmp); WREG32(mmVM_L2_CNTL, tmp);
tmp = RREG32(mmVM_L2_CNTL2); tmp = RREG32(mmVM_L2_CNTL2);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
...@@ -979,12 +980,10 @@ static int gmc_v8_0_sw_init(void *handle) ...@@ -979,12 +980,10 @@ static int gmc_v8_0_sw_init(void *handle)
static int gmc_v8_0_sw_fini(void *handle) static int gmc_v8_0_sw_fini(void *handle)
{ {
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->vm_manager.enabled) { if (adev->vm_manager.enabled) {
for (i = 0; i < AMDGPU_NUM_VM; ++i) amdgpu_vm_manager_fini(adev);
fence_put(adev->vm_manager.active[i]);
gmc_v8_0_vm_fini(adev); gmc_v8_0_vm_fini(adev);
adev->vm_manager.enabled = false; adev->vm_manager.enabled = false;
} }
...@@ -1031,12 +1030,10 @@ static int gmc_v8_0_hw_fini(void *handle) ...@@ -1031,12 +1030,10 @@ static int gmc_v8_0_hw_fini(void *handle)
static int gmc_v8_0_suspend(void *handle) static int gmc_v8_0_suspend(void *handle)
{ {
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->vm_manager.enabled) { if (adev->vm_manager.enabled) {
for (i = 0; i < AMDGPU_NUM_VM; ++i) amdgpu_vm_manager_fini(adev);
fence_put(adev->vm_manager.active[i]);
gmc_v8_0_vm_fini(adev); gmc_v8_0_vm_fini(adev);
adev->vm_manager.enabled = false; adev->vm_manager.enabled = false;
} }
......
...@@ -16,6 +16,8 @@ TRACE_EVENT(amd_sched_job, ...@@ -16,6 +16,8 @@ TRACE_EVENT(amd_sched_job,
TP_ARGS(sched_job), TP_ARGS(sched_job),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(struct amd_sched_entity *, entity) __field(struct amd_sched_entity *, entity)
__field(struct amd_sched_job *, sched_job)
__field(struct fence *, fence)
__field(const char *, name) __field(const char *, name)
__field(u32, job_count) __field(u32, job_count)
__field(int, hw_job_count) __field(int, hw_job_count)
...@@ -23,16 +25,32 @@ TRACE_EVENT(amd_sched_job, ...@@ -23,16 +25,32 @@ TRACE_EVENT(amd_sched_job,
TP_fast_assign( TP_fast_assign(
__entry->entity = sched_job->s_entity; __entry->entity = sched_job->s_entity;
__entry->sched_job = sched_job;
__entry->fence = &sched_job->s_fence->base;
__entry->name = sched_job->sched->name; __entry->name = sched_job->sched->name;
__entry->job_count = kfifo_len( __entry->job_count = kfifo_len(
&sched_job->s_entity->job_queue) / sizeof(sched_job); &sched_job->s_entity->job_queue) / sizeof(sched_job);
__entry->hw_job_count = atomic_read( __entry->hw_job_count = atomic_read(
&sched_job->sched->hw_rq_count); &sched_job->sched->hw_rq_count);
), ),
TP_printk("entity=%p, ring=%s, job count:%u, hw job count:%d", TP_printk("entity=%p, sched job=%p, fence=%p, ring=%s, job count:%u, hw job count:%d",
__entry->entity, __entry->name, __entry->job_count, __entry->entity, __entry->sched_job, __entry->fence, __entry->name,
__entry->hw_job_count) __entry->job_count, __entry->hw_job_count)
); );
TRACE_EVENT(amd_sched_process_job,
TP_PROTO(struct amd_sched_fence *fence),
TP_ARGS(fence),
TP_STRUCT__entry(
__field(struct fence *, fence)
),
TP_fast_assign(
__entry->fence = &fence->base;
),
TP_printk("fence=%p signaled", __entry->fence)
);
#endif #endif
/* This part must be outside protection */ /* This part must be outside protection */
......
...@@ -34,6 +34,9 @@ static struct amd_sched_job * ...@@ -34,6 +34,9 @@ static struct amd_sched_job *
amd_sched_entity_pop_job(struct amd_sched_entity *entity); amd_sched_entity_pop_job(struct amd_sched_entity *entity);
static void amd_sched_wakeup(struct amd_gpu_scheduler *sched); static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
struct kmem_cache *sched_fence_slab;
atomic_t sched_fence_slab_ref = ATOMIC_INIT(0);
/* Initialize a given run queue struct */ /* Initialize a given run queue struct */
static void amd_sched_rq_init(struct amd_sched_rq *rq) static void amd_sched_rq_init(struct amd_sched_rq *rq)
{ {
...@@ -273,22 +276,13 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job) ...@@ -273,22 +276,13 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
* *
* Returns 0 for success, negative error code otherwise. * Returns 0 for success, negative error code otherwise.
*/ */
int amd_sched_entity_push_job(struct amd_sched_job *sched_job) void amd_sched_entity_push_job(struct amd_sched_job *sched_job)
{ {
struct amd_sched_entity *entity = sched_job->s_entity; struct amd_sched_entity *entity = sched_job->s_entity;
struct amd_sched_fence *fence = amd_sched_fence_create(
entity, sched_job->owner);
if (!fence)
return -ENOMEM;
fence_get(&fence->base);
sched_job->s_fence = fence;
wait_event(entity->sched->job_scheduled, wait_event(entity->sched->job_scheduled,
amd_sched_entity_in(sched_job)); amd_sched_entity_in(sched_job));
trace_amd_sched_job(sched_job); trace_amd_sched_job(sched_job);
return 0;
} }
/** /**
...@@ -343,6 +337,7 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb) ...@@ -343,6 +337,7 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
list_del_init(&s_fence->list); list_del_init(&s_fence->list);
spin_unlock_irqrestore(&sched->fence_list_lock, flags); spin_unlock_irqrestore(&sched->fence_list_lock, flags);
} }
trace_amd_sched_process_job(s_fence);
fence_put(&s_fence->base); fence_put(&s_fence->base);
wake_up_interruptible(&sched->wake_up_worker); wake_up_interruptible(&sched->wake_up_worker);
} }
...@@ -450,6 +445,13 @@ int amd_sched_init(struct amd_gpu_scheduler *sched, ...@@ -450,6 +445,13 @@ int amd_sched_init(struct amd_gpu_scheduler *sched,
init_waitqueue_head(&sched->wake_up_worker); init_waitqueue_head(&sched->wake_up_worker);
init_waitqueue_head(&sched->job_scheduled); init_waitqueue_head(&sched->job_scheduled);
atomic_set(&sched->hw_rq_count, 0); atomic_set(&sched->hw_rq_count, 0);
if (atomic_inc_return(&sched_fence_slab_ref) == 1) {
sched_fence_slab = kmem_cache_create(
"amd_sched_fence", sizeof(struct amd_sched_fence), 0,
SLAB_HWCACHE_ALIGN, NULL);
if (!sched_fence_slab)
return -ENOMEM;
}
/* Each scheduler will run on a seperate kernel thread */ /* Each scheduler will run on a seperate kernel thread */
sched->thread = kthread_run(amd_sched_main, sched, sched->name); sched->thread = kthread_run(amd_sched_main, sched, sched->name);
...@@ -470,4 +472,6 @@ void amd_sched_fini(struct amd_gpu_scheduler *sched) ...@@ -470,4 +472,6 @@ void amd_sched_fini(struct amd_gpu_scheduler *sched)
{ {
if (sched->thread) if (sched->thread)
kthread_stop(sched->thread); kthread_stop(sched->thread);
if (atomic_dec_and_test(&sched_fence_slab_ref))
kmem_cache_destroy(sched_fence_slab);
} }
...@@ -30,6 +30,9 @@ ...@@ -30,6 +30,9 @@
struct amd_gpu_scheduler; struct amd_gpu_scheduler;
struct amd_sched_rq; struct amd_sched_rq;
extern struct kmem_cache *sched_fence_slab;
extern atomic_t sched_fence_slab_ref;
/** /**
* A scheduler entity is a wrapper around a job queue or a group * A scheduler entity is a wrapper around a job queue or a group
* of other entities. Entities take turns emitting jobs from their * of other entities. Entities take turns emitting jobs from their
...@@ -76,7 +79,6 @@ struct amd_sched_job { ...@@ -76,7 +79,6 @@ struct amd_sched_job {
struct amd_gpu_scheduler *sched; struct amd_gpu_scheduler *sched;
struct amd_sched_entity *s_entity; struct amd_sched_entity *s_entity;
struct amd_sched_fence *s_fence; struct amd_sched_fence *s_fence;
void *owner;
}; };
extern const struct fence_ops amd_sched_fence_ops; extern const struct fence_ops amd_sched_fence_ops;
...@@ -128,7 +130,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, ...@@ -128,7 +130,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
uint32_t jobs); uint32_t jobs);
void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
struct amd_sched_entity *entity); struct amd_sched_entity *entity);
int amd_sched_entity_push_job(struct amd_sched_job *sched_job); void amd_sched_entity_push_job(struct amd_sched_job *sched_job);
struct amd_sched_fence *amd_sched_fence_create( struct amd_sched_fence *amd_sched_fence_create(
struct amd_sched_entity *s_entity, void *owner); struct amd_sched_entity *s_entity, void *owner);
......
...@@ -32,7 +32,7 @@ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity ...@@ -32,7 +32,7 @@ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity
struct amd_sched_fence *fence = NULL; struct amd_sched_fence *fence = NULL;
unsigned seq; unsigned seq;
fence = kzalloc(sizeof(struct amd_sched_fence), GFP_KERNEL); fence = kmem_cache_zalloc(sched_fence_slab, GFP_KERNEL);
if (fence == NULL) if (fence == NULL)
return NULL; return NULL;
fence->owner = owner; fence->owner = owner;
...@@ -71,11 +71,17 @@ static bool amd_sched_fence_enable_signaling(struct fence *f) ...@@ -71,11 +71,17 @@ static bool amd_sched_fence_enable_signaling(struct fence *f)
return true; return true;
} }
static void amd_sched_fence_release(struct fence *f)
{
struct amd_sched_fence *fence = to_amd_sched_fence(f);
kmem_cache_free(sched_fence_slab, fence);
}
const struct fence_ops amd_sched_fence_ops = { const struct fence_ops amd_sched_fence_ops = {
.get_driver_name = amd_sched_fence_get_driver_name, .get_driver_name = amd_sched_fence_get_driver_name,
.get_timeline_name = amd_sched_fence_get_timeline_name, .get_timeline_name = amd_sched_fence_get_timeline_name,
.enable_signaling = amd_sched_fence_enable_signaling, .enable_signaling = amd_sched_fence_enable_signaling,
.signaled = NULL, .signaled = NULL,
.wait = fence_default_wait, .wait = fence_default_wait,
.release = NULL, .release = amd_sched_fence_release,
}; };
...@@ -221,11 +221,17 @@ int radeon_bo_create(struct radeon_device *rdev, ...@@ -221,11 +221,17 @@ int radeon_bo_create(struct radeon_device *rdev,
if (!(rdev->flags & RADEON_IS_PCIE)) if (!(rdev->flags & RADEON_IS_PCIE))
bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
/* Write-combined CPU mappings of GTT cause GPU hangs with RV6xx
* See https://bugs.freedesktop.org/show_bug.cgi?id=91268
*/
if (rdev->family >= CHIP_RV610 && rdev->family <= CHIP_RV635)
bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
* See https://bugs.freedesktop.org/show_bug.cgi?id=84627 * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
*/ */
bo->flags &= ~RADEON_GEM_GTT_WC; bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) #elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
/* Don't try to enable write-combining when it can't work, or things /* Don't try to enable write-combining when it can't work, or things
* may be slow * may be slow
...@@ -235,9 +241,10 @@ int radeon_bo_create(struct radeon_device *rdev, ...@@ -235,9 +241,10 @@ int radeon_bo_create(struct radeon_device *rdev,
#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ #warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
thanks to write-combining thanks to write-combining
DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " if (bo->flags & RADEON_GEM_GTT_WC)
"better performance thanks to write-combining\n"); DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
bo->flags &= ~RADEON_GEM_GTT_WC; "better performance thanks to write-combining\n");
bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
#endif #endif
radeon_ttm_placement_from_domain(bo, domain); radeon_ttm_placement_from_domain(bo, domain);
......
...@@ -1542,8 +1542,7 @@ int radeon_pm_late_init(struct radeon_device *rdev) ...@@ -1542,8 +1542,7 @@ int radeon_pm_late_init(struct radeon_device *rdev)
ret = device_create_file(rdev->dev, &dev_attr_power_method); ret = device_create_file(rdev->dev, &dev_attr_power_method);
if (ret) if (ret)
DRM_ERROR("failed to create device file for power method\n"); DRM_ERROR("failed to create device file for power method\n");
if (!ret) rdev->pm.sysfs_initialized = true;
rdev->pm.sysfs_initialized = true;
} }
mutex_lock(&rdev->pm.mutex); mutex_lock(&rdev->pm.mutex);
......
...@@ -2927,7 +2927,7 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = { ...@@ -2927,7 +2927,7 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = {
{ PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 },
{ PCI_VENDOR_ID_ATI, 0x6811, 0x174b, 0xe271, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x174b, 0xe271, 0, 120000 },
{ PCI_VENDOR_ID_ATI, 0x6810, 0x174b, 0xe271, 85000, 90000 }, { PCI_VENDOR_ID_ATI, 0x6810, 0x174b, 0xe271, 85000, 90000 },
{ PCI_VENDOR_ID_ATI, 0x6811, 0x1762, 0x2015, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x1462, 0x2015, 0, 120000 },
{ PCI_VENDOR_ID_ATI, 0x6811, 0x1043, 0x2015, 0, 120000 }, { PCI_VENDOR_ID_ATI, 0x6811, 0x1043, 0x2015, 0, 120000 },
{ 0, 0, 0, 0 }, { 0, 0, 0, 0 },
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment