Commit 90a51a32 authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/radeon: allocate page tables on demand v4

Based on Dmitries work, but splitting the code into page
directory and page table handling makes it far more
readable and (hopefully) more reliable.

Allocations of page tables are made from the SA on demand,
that should still work fine since all page tables are of
the same size.

Also using the fact that allocations from the SA are mostly
continuously (except for end of buffer wraps and under very
high memory pressure) to group updates send to the chipset
specific code into larger chunks.

v3: mostly a rewrite of Dmitries previous patch.
v4: fix some typos and coding style
Signed-off-by: default avatarDmitry Cherkasov <Dmitrii.Cherkasov@amd.com>
Signed-off-by: default avatarChristian König <deathsimple@vodafone.de>
Tested-by: default avatarMichel Dänzer <michel.daenzer@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 23d4f1f2
...@@ -1580,7 +1580,7 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) ...@@ -1580,7 +1580,7 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
radeon_ring_write(ring, 0); radeon_ring_write(ring, 0);
radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (vm->id << 2), 0)); radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (vm->id << 2), 0));
radeon_ring_write(ring, vm->last_pfn); radeon_ring_write(ring, rdev->vm_manager.max_pfn);
radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0)); radeon_ring_write(ring, PACKET0(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0));
radeon_ring_write(ring, vm->pd_gpu_addr >> 12); radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
......
...@@ -663,9 +663,14 @@ struct radeon_vm { ...@@ -663,9 +663,14 @@ struct radeon_vm {
struct list_head list; struct list_head list;
struct list_head va; struct list_head va;
unsigned id; unsigned id;
unsigned last_pfn;
u64 pd_gpu_addr; /* contains the page directory */
struct radeon_sa_bo *sa_bo; struct radeon_sa_bo *page_directory;
uint64_t pd_gpu_addr;
/* array of page tables, one for each page directory entry */
struct radeon_sa_bo **page_tables;
struct mutex mutex; struct mutex mutex;
/* last fence for cs using this vm */ /* last fence for cs using this vm */
struct radeon_fence *fence; struct radeon_fence *fence;
......
...@@ -422,6 +422,18 @@ void radeon_gart_fini(struct radeon_device *rdev) ...@@ -422,6 +422,18 @@ void radeon_gart_fini(struct radeon_device *rdev)
* TODO bind a default page at vm initialization for default address * TODO bind a default page at vm initialization for default address
*/ */
/**
* radeon_vm_num_pde - return the number of page directory entries
*
* @rdev: radeon_device pointer
*
* Calculate the number of page directory entries (cayman+).
*/
static unsigned radeon_vm_num_pdes(struct radeon_device *rdev)
{
return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE;
}
/** /**
* radeon_vm_directory_size - returns the size of the page directory in bytes * radeon_vm_directory_size - returns the size of the page directory in bytes
* *
...@@ -431,7 +443,7 @@ void radeon_gart_fini(struct radeon_device *rdev) ...@@ -431,7 +443,7 @@ void radeon_gart_fini(struct radeon_device *rdev)
*/ */
static unsigned radeon_vm_directory_size(struct radeon_device *rdev) static unsigned radeon_vm_directory_size(struct radeon_device *rdev)
{ {
return (rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE) * 8; return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8);
} }
/** /**
...@@ -451,11 +463,11 @@ int radeon_vm_manager_init(struct radeon_device *rdev) ...@@ -451,11 +463,11 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
if (!rdev->vm_manager.enabled) { if (!rdev->vm_manager.enabled) {
/* allocate enough for 2 full VM pts */ /* allocate enough for 2 full VM pts */
size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev)); size = radeon_vm_directory_size(rdev);
size += RADEON_GPU_PAGE_ALIGN(rdev->vm_manager.max_pfn * 8); size += rdev->vm_manager.max_pfn * 8;
size *= 2; size *= 2;
r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager, r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
size, RADEON_GPU_PAGE_ALIGN(size),
RADEON_GEM_DOMAIN_VRAM); RADEON_GEM_DOMAIN_VRAM);
if (r) { if (r) {
dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n", dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n",
...@@ -476,7 +488,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev) ...@@ -476,7 +488,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
/* restore page table */ /* restore page table */
list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) { list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) {
if (vm->sa_bo == NULL) if (vm->page_directory == NULL)
continue; continue;
list_for_each_entry(bo_va, &vm->va, vm_list) { list_for_each_entry(bo_va, &vm->va, vm_list) {
...@@ -500,16 +512,25 @@ static void radeon_vm_free_pt(struct radeon_device *rdev, ...@@ -500,16 +512,25 @@ static void radeon_vm_free_pt(struct radeon_device *rdev,
struct radeon_vm *vm) struct radeon_vm *vm)
{ {
struct radeon_bo_va *bo_va; struct radeon_bo_va *bo_va;
int i;
if (!vm->sa_bo) if (!vm->page_directory)
return; return;
list_del_init(&vm->list); list_del_init(&vm->list);
radeon_sa_bo_free(rdev, &vm->sa_bo, vm->fence); radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
list_for_each_entry(bo_va, &vm->va, vm_list) { list_for_each_entry(bo_va, &vm->va, vm_list) {
bo_va->valid = false; bo_va->valid = false;
} }
if (vm->page_tables == NULL)
return;
for (i = 0; i < radeon_vm_num_pdes(rdev); i++)
radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence);
kfree(vm->page_tables);
} }
/** /**
...@@ -545,6 +566,35 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) ...@@ -545,6 +566,35 @@ void radeon_vm_manager_fini(struct radeon_device *rdev)
rdev->vm_manager.enabled = false; rdev->vm_manager.enabled = false;
} }
/**
* radeon_vm_evict - evict page table to make room for new one
*
* @rdev: radeon_device pointer
* @vm: VM we want to allocate something for
*
* Evict a VM from the lru, making sure that it isn't @vm. (cayman+).
* Returns 0 for success, -ENOMEM for failure.
*
* Global and local mutex must be locked!
*/
int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm)
{
struct radeon_vm *vm_evict;
if (list_empty(&rdev->vm_manager.lru_vm))
return -ENOMEM;
vm_evict = list_first_entry(&rdev->vm_manager.lru_vm,
struct radeon_vm, list);
if (vm_evict == vm)
return -ENOMEM;
mutex_lock(&vm_evict->mutex);
radeon_vm_free_pt(rdev, vm_evict);
mutex_unlock(&vm_evict->mutex);
return 0;
}
/** /**
* radeon_vm_alloc_pt - allocates a page table for a VM * radeon_vm_alloc_pt - allocates a page table for a VM
* *
...@@ -559,20 +609,15 @@ void radeon_vm_manager_fini(struct radeon_device *rdev) ...@@ -559,20 +609,15 @@ void radeon_vm_manager_fini(struct radeon_device *rdev)
*/ */
int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm)
{ {
struct radeon_vm *vm_evict; unsigned pd_size, pts_size;
int r;
u64 *pd_addr; u64 *pd_addr;
int tables_size; int r;
if (vm == NULL) { if (vm == NULL) {
return -EINVAL; return -EINVAL;
} }
/* allocate enough to cover the current VM size */ if (vm->page_directory != NULL) {
tables_size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev));
tables_size += RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8);
if (vm->sa_bo != NULL) {
/* update lru */ /* update lru */
list_del_init(&vm->list); list_del_init(&vm->list);
list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
...@@ -580,25 +625,34 @@ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm) ...@@ -580,25 +625,34 @@ int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm)
} }
retry: retry:
r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->sa_bo, pd_size = RADEON_GPU_PAGE_ALIGN(radeon_vm_directory_size(rdev));
tables_size, RADEON_GPU_PAGE_SIZE, false); r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
&vm->page_directory, pd_size,
RADEON_GPU_PAGE_SIZE, false);
if (r == -ENOMEM) { if (r == -ENOMEM) {
if (list_empty(&rdev->vm_manager.lru_vm)) { r = radeon_vm_evict(rdev, vm);
if (r)
return r; return r;
}
vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list);
mutex_lock(&vm_evict->mutex);
radeon_vm_free_pt(rdev, vm_evict);
mutex_unlock(&vm_evict->mutex);
goto retry; goto retry;
} else if (r) { } else if (r) {
return r; return r;
} }
pd_addr = radeon_sa_bo_cpu_addr(vm->sa_bo); vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory);
vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->sa_bo);
memset(pd_addr, 0, tables_size); /* Initially clear the page directory */
pd_addr = radeon_sa_bo_cpu_addr(vm->page_directory);
memset(pd_addr, 0, pd_size);
pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *);
vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
if (vm->page_tables == NULL) {
DRM_ERROR("Cannot allocate memory for page table array\n");
radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
return -ENOMEM;
}
list_add_tail(&vm->list, &rdev->vm_manager.lru_vm); list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, return radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo,
...@@ -793,20 +847,6 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, ...@@ -793,20 +847,6 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
} }
mutex_lock(&vm->mutex); mutex_lock(&vm->mutex);
if (last_pfn > vm->last_pfn) {
/* release mutex and lock in right order */
mutex_unlock(&vm->mutex);
mutex_lock(&rdev->vm_manager.lock);
mutex_lock(&vm->mutex);
/* and check again */
if (last_pfn > vm->last_pfn) {
/* grow va space 32M by 32M */
unsigned align = ((32 << 20) >> 12) - 1;
radeon_vm_free_pt(rdev, vm);
vm->last_pfn = (last_pfn + align) & ~align;
}
mutex_unlock(&rdev->vm_manager.lock);
}
head = &vm->va; head = &vm->va;
last_offset = 0; last_offset = 0;
list_for_each_entry(tmp, &vm->va, vm_list) { list_for_each_entry(tmp, &vm->va, vm_list) {
...@@ -864,6 +904,155 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) ...@@ -864,6 +904,155 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr)
return result; return result;
} }
/**
* radeon_vm_update_pdes - make sure that page directory is valid
*
* @rdev: radeon_device pointer
* @vm: requested vm
* @start: start of GPU address range
* @end: end of GPU address range
*
* Allocates new page tables if necessary
* and updates the page directory (cayman+).
* Returns 0 for success, error for failure.
*
* Global and local mutex must be locked!
*/
static int radeon_vm_update_pdes(struct radeon_device *rdev,
struct radeon_vm *vm,
uint64_t start, uint64_t end)
{
static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
uint64_t last_pde = ~0, last_pt = ~0;
unsigned count = 0;
uint64_t pt_idx;
int r;
start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
/* walk over the address space and update the page directory */
for (pt_idx = start; pt_idx <= end; ++pt_idx) {
uint64_t pde, pt;
if (vm->page_tables[pt_idx])
continue;
retry:
r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
&vm->page_tables[pt_idx],
RADEON_VM_PTE_COUNT * 8,
RADEON_GPU_PAGE_SIZE, false);
if (r == -ENOMEM) {
r = radeon_vm_evict(rdev, vm);
if (r)
return r;
goto retry;
} else if (r) {
return r;
}
pde = vm->pd_gpu_addr + pt_idx * 8;
pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
if (((last_pde + 8 * count) != pde) ||
((last_pt + incr * count) != pt)) {
if (count) {
radeon_asic_vm_set_page(rdev, last_pde,
last_pt, count, incr,
RADEON_VM_PAGE_VALID);
}
count = 1;
last_pde = pde;
last_pt = pt;
} else {
++count;
}
}
if (count) {
radeon_asic_vm_set_page(rdev, last_pde, last_pt, count,
incr, RADEON_VM_PAGE_VALID);
}
return 0;
}
/**
* radeon_vm_update_ptes - make sure that page tables are valid
*
* @rdev: radeon_device pointer
* @vm: requested vm
* @start: start of GPU address range
* @end: end of GPU address range
* @dst: destination address to map to
* @flags: mapping flags
*
* Update the page tables in the range @start - @end (cayman+).
*
* Global and local mutex must be locked!
*/
static void radeon_vm_update_ptes(struct radeon_device *rdev,
struct radeon_vm *vm,
uint64_t start, uint64_t end,
uint64_t dst, uint32_t flags)
{
static const uint64_t mask = RADEON_VM_PTE_COUNT - 1;
uint64_t last_pte = ~0, last_dst = ~0;
unsigned count = 0;
uint64_t addr;
start = start / RADEON_GPU_PAGE_SIZE;
end = end / RADEON_GPU_PAGE_SIZE;
/* walk over the address space and update the page tables */
for (addr = start; addr < end; ) {
uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE;
unsigned nptes;
uint64_t pte;
if ((addr & ~mask) == (end & ~mask))
nptes = end - addr;
else
nptes = RADEON_VM_PTE_COUNT - (addr & mask);
pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
pte += (addr & mask) * 8;
if (((last_pte + 8 * count) != pte) ||
((count + nptes) > 1 << 11)) {
if (count) {
radeon_asic_vm_set_page(rdev, last_pte,
last_dst, count,
RADEON_GPU_PAGE_SIZE,
flags);
}
count = nptes;
last_pte = pte;
last_dst = dst;
} else {
count += nptes;
}
addr += nptes;
dst += nptes * RADEON_GPU_PAGE_SIZE;
}
if (count) {
radeon_asic_vm_set_page(rdev, last_pte, last_dst, count,
RADEON_GPU_PAGE_SIZE, flags);
}
}
/** /**
* radeon_vm_bo_update_pte - map a bo into the vm page table * radeon_vm_bo_update_pte - map a bo into the vm page table
* *
...@@ -887,12 +1076,11 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, ...@@ -887,12 +1076,11 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
struct radeon_semaphore *sem = NULL; struct radeon_semaphore *sem = NULL;
struct radeon_bo_va *bo_va; struct radeon_bo_va *bo_va;
unsigned nptes, npdes, ndw; unsigned nptes, npdes, ndw;
uint64_t pe, addr; uint64_t addr;
uint64_t pfn;
int r; int r;
/* nothing to do if vm isn't bound */ /* nothing to do if vm isn't bound */
if (vm->sa_bo == NULL) if (vm->page_directory == NULL)
return 0; return 0;
bo_va = radeon_vm_bo_find(vm, bo); bo_va = radeon_vm_bo_find(vm, bo);
...@@ -939,25 +1127,29 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, ...@@ -939,25 +1127,29 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
} }
} }
/* estimate number of dw needed */
/* reserve space for 32-bit padding */
ndw = 32;
nptes = radeon_bo_ngpu_pages(bo); nptes = radeon_bo_ngpu_pages(bo);
pfn = (bo_va->soffset / RADEON_GPU_PAGE_SIZE); /* assume two extra pdes in case the mapping overlaps the borders */
npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2;
/* estimate number of dw needed */
/* semaphore, fence and padding */
ndw = 32;
/* handle cases where a bo spans several pdes */ if (RADEON_VM_BLOCK_SIZE > 11)
npdes = (ALIGN(pfn + nptes, RADEON_VM_PTE_COUNT) - /* reserve space for one header for every 2k dwords */
(pfn & ~(RADEON_VM_PTE_COUNT - 1))) >> RADEON_VM_BLOCK_SIZE; ndw += (nptes >> 11) * 3;
else
/* reserve space for one header for
every (1 << BLOCK_SIZE) entries */
ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 3;
/* reserve space for one header for every 2k dwords */
ndw += (nptes >> 11) * 3;
/* reserve space for pte addresses */ /* reserve space for pte addresses */
ndw += nptes * 2; ndw += nptes * 2;
/* reserve space for one header for every 2k dwords */ /* reserve space for one header for every 2k dwords */
ndw += (npdes >> 11) * 3; ndw += (npdes >> 11) * 3;
/* reserve space for pde addresses */ /* reserve space for pde addresses */
ndw += npdes * 2; ndw += npdes * 2;
...@@ -971,22 +1163,14 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, ...@@ -971,22 +1163,14 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
radeon_fence_note_sync(vm->fence, ridx); radeon_fence_note_sync(vm->fence, ridx);
} }
/* update page table entries */ r = radeon_vm_update_pdes(rdev, vm, bo_va->soffset, bo_va->eoffset);
pe = vm->pd_gpu_addr; if (r) {
pe += radeon_vm_directory_size(rdev); radeon_ring_unlock_undo(rdev, ring);
pe += (bo_va->soffset / RADEON_GPU_PAGE_SIZE) * 8; return r;
}
radeon_asic_vm_set_page(rdev, pe, addr, nptes,
RADEON_GPU_PAGE_SIZE, bo_va->flags);
/* update page directory entries */
addr = pe;
pe = vm->pd_gpu_addr;
pe += ((bo_va->soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE) * 8;
radeon_asic_vm_set_page(rdev, pe, addr, npdes, radeon_vm_update_ptes(rdev, vm, bo_va->soffset, bo_va->eoffset,
RADEON_VM_PTE_COUNT * 8, RADEON_VM_PAGE_VALID); addr, bo_va->flags);
radeon_fence_unref(&vm->fence); radeon_fence_unref(&vm->fence);
r = radeon_fence_emit(rdev, &vm->fence, ridx); r = radeon_fence_emit(rdev, &vm->fence, ridx);
...@@ -997,6 +1181,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, ...@@ -997,6 +1181,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
radeon_ring_unlock_commit(rdev, ring); radeon_ring_unlock_commit(rdev, ring);
radeon_semaphore_free(rdev, &sem, vm->fence); radeon_semaphore_free(rdev, &sem, vm->fence);
radeon_fence_unref(&vm->last_flush); radeon_fence_unref(&vm->last_flush);
return 0; return 0;
} }
...@@ -1068,7 +1253,6 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) ...@@ -1068,7 +1253,6 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
vm->id = 0; vm->id = 0;
vm->fence = NULL; vm->fence = NULL;
vm->last_pfn = 0;
mutex_init(&vm->mutex); mutex_init(&vm->mutex);
INIT_LIST_HEAD(&vm->list); INIT_LIST_HEAD(&vm->list);
INIT_LIST_HEAD(&vm->va); INIT_LIST_HEAD(&vm->va);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment