Commit 2d55e45a authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/amdgpu: use SDMA round robin for VM updates v3

Distribute the load on both rings.

v2: use a loop for the initialization
v3: agd: rebase on upstream
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 3ee94136
...@@ -942,7 +942,9 @@ struct amdgpu_vm_manager { ...@@ -942,7 +942,9 @@ struct amdgpu_vm_manager {
bool enabled; bool enabled;
/* vm pte handling */ /* vm pte handling */
const struct amdgpu_vm_pte_funcs *vm_pte_funcs; const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
struct amdgpu_ring *vm_pte_funcs_ring; struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS];
unsigned vm_pte_num_rings;
atomic_t vm_pte_next_ring;
}; };
void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_init(struct amdgpu_device *adev);
......
...@@ -1403,7 +1403,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -1403,7 +1403,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->mman.buffer_funcs = NULL; adev->mman.buffer_funcs = NULL;
adev->mman.buffer_funcs_ring = NULL; adev->mman.buffer_funcs_ring = NULL;
adev->vm_manager.vm_pte_funcs = NULL; adev->vm_manager.vm_pte_funcs = NULL;
adev->vm_manager.vm_pte_funcs_ring = NULL; adev->vm_manager.vm_pte_num_rings = 0;
adev->gart.gart_funcs = NULL; adev->gart.gart_funcs = NULL;
adev->fence_context = fence_context_alloc(AMDGPU_MAX_RINGS); adev->fence_context = fence_context_alloc(AMDGPU_MAX_RINGS);
......
...@@ -325,13 +325,15 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, ...@@ -325,13 +325,15 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
struct amdgpu_vm *vm, struct amdgpu_vm *vm,
struct amdgpu_bo *bo) struct amdgpu_bo *bo)
{ {
struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; struct amdgpu_ring *ring;
struct fence *fence = NULL; struct fence *fence = NULL;
struct amdgpu_job *job; struct amdgpu_job *job;
unsigned entries; unsigned entries;
uint64_t addr; uint64_t addr;
int r; int r;
ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
r = reservation_object_reserve_shared(bo->tbo.resv); r = reservation_object_reserve_shared(bo->tbo.resv);
if (r) if (r)
return r; return r;
...@@ -413,7 +415,7 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) ...@@ -413,7 +415,7 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
struct amdgpu_vm *vm) struct amdgpu_vm *vm)
{ {
struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; struct amdgpu_ring *ring;
struct amdgpu_bo *pd = vm->page_directory; struct amdgpu_bo *pd = vm->page_directory;
uint64_t pd_addr = amdgpu_bo_gpu_offset(pd); uint64_t pd_addr = amdgpu_bo_gpu_offset(pd);
uint32_t incr = AMDGPU_VM_PTE_COUNT * 8; uint32_t incr = AMDGPU_VM_PTE_COUNT * 8;
...@@ -425,6 +427,8 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, ...@@ -425,6 +427,8 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
int r; int r;
ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
/* padding, etc. */ /* padding, etc. */
ndw = 64; ndw = 64;
...@@ -670,7 +674,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ...@@ -670,7 +674,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
uint32_t flags, uint64_t addr, uint32_t flags, uint64_t addr,
struct fence **fence) struct fence **fence)
{ {
struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; struct amdgpu_ring *ring;
void *owner = AMDGPU_FENCE_OWNER_VM; void *owner = AMDGPU_FENCE_OWNER_VM;
unsigned nptes, ncmds, ndw; unsigned nptes, ncmds, ndw;
struct amdgpu_job *job; struct amdgpu_job *job;
...@@ -678,6 +682,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ...@@ -678,6 +682,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
struct fence *f = NULL; struct fence *f = NULL;
int r; int r;
ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
/* sync to everything on unmapping */ /* sync to everything on unmapping */
if (!(flags & AMDGPU_PTE_VALID)) if (!(flags & AMDGPU_PTE_VALID))
owner = AMDGPU_FENCE_OWNER_UNDEFINED; owner = AMDGPU_FENCE_OWNER_UNDEFINED;
...@@ -1269,10 +1275,11 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, ...@@ -1269,10 +1275,11 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
*/ */
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{ {
struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
AMDGPU_VM_PTE_COUNT * 8); AMDGPU_VM_PTE_COUNT * 8);
unsigned pd_size, pd_entries; unsigned pd_size, pd_entries;
unsigned ring_instance;
struct amdgpu_ring *ring;
struct amd_sched_rq *rq; struct amd_sched_rq *rq;
int i, r; int i, r;
...@@ -1298,6 +1305,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) ...@@ -1298,6 +1305,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
} }
/* create scheduler entity for page table updates */ /* create scheduler entity for page table updates */
ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
ring_instance %= adev->vm_manager.vm_pte_num_rings;
ring = adev->vm_manager.vm_pte_rings[ring_instance];
rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
r = amd_sched_entity_init(&ring->sched, &vm->entity, r = amd_sched_entity_init(&ring->sched, &vm->entity,
rq, amdgpu_sched_jobs); rq, amdgpu_sched_jobs);
...@@ -1345,11 +1356,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) ...@@ -1345,11 +1356,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
*/ */
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{ {
struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
struct amdgpu_bo_va_mapping *mapping, *tmp; struct amdgpu_bo_va_mapping *mapping, *tmp;
int i; int i;
amd_sched_entity_fini(&ring->sched, &vm->entity); amd_sched_entity_fini(vm->entity.sched, &vm->entity);
if (!RB_EMPTY_ROOT(&vm->va)) { if (!RB_EMPTY_ROOT(&vm->va)) {
dev_err(adev->dev, "still active bo inside vm\n"); dev_err(adev->dev, "still active bo inside vm\n");
...@@ -1397,6 +1407,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) ...@@ -1397,6 +1407,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
for (i = 1; i < adev->vm_manager.num_ids; ++i) for (i = 1; i < adev->vm_manager.num_ids; ++i)
list_add_tail(&adev->vm_manager.ids[i].list, list_add_tail(&adev->vm_manager.ids[i].list,
&adev->vm_manager.ids_lru); &adev->vm_manager.ids_lru);
atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
} }
/** /**
......
...@@ -1371,8 +1371,14 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { ...@@ -1371,8 +1371,14 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev) static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev)
{ {
unsigned i;
if (adev->vm_manager.vm_pte_funcs == NULL) { if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs; adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring; for (i = 0; i < adev->sdma.num_instances; i++)
adev->vm_manager.vm_pte_rings[i] =
&adev->sdma.instance[i].ring;
adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
} }
} }
...@@ -1376,8 +1376,14 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { ...@@ -1376,8 +1376,14 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
{ {
unsigned i;
if (adev->vm_manager.vm_pte_funcs == NULL) { if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring; for (i = 0; i < adev->sdma.num_instances; i++)
adev->vm_manager.vm_pte_rings[i] =
&adev->sdma.instance[i].ring;
adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
} }
} }
...@@ -1643,8 +1643,14 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { ...@@ -1643,8 +1643,14 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev) static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev)
{ {
unsigned i;
if (adev->vm_manager.vm_pte_funcs == NULL) { if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs;
adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring; for (i = 0; i < adev->sdma.num_instances; i++)
adev->vm_manager.vm_pte_rings[i] =
&adev->sdma.instance[i].ring;
adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
} }
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment