Commit 284710fa authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/amdgpu: add basic PRT support (v2)

Future hardware generations can handle PRT flags on a per page basis,
but current hardware can only turn it on globally.

Add the basic handling for both, a global callback to enable/disable
triggered by setting a per mapping flag.

v2: agd: rebase fixes
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarNicolai Hähnle <nicolai.haehnle@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent a5f6b5b1
...@@ -294,6 +294,8 @@ struct amdgpu_gart_funcs { ...@@ -294,6 +294,8 @@ struct amdgpu_gart_funcs {
uint32_t gpu_page_idx, /* pte/pde to update */ uint32_t gpu_page_idx, /* pte/pde to update */
uint64_t addr, /* addr to write into pte/pde */ uint64_t addr, /* addr to write into pte/pde */
uint32_t flags); /* access flags */ uint32_t flags); /* access flags */
/* enable/disable PRT support */
void (*set_prt)(struct amdgpu_device *adev, bool enable);
}; };
/* provided by the ih block */ /* provided by the ih block */
......
...@@ -69,6 +69,12 @@ struct amdgpu_pte_update_params { ...@@ -69,6 +69,12 @@ struct amdgpu_pte_update_params {
bool shadow; bool shadow;
}; };
/* Helper to disable partial resident texture feature from a fence callback */
struct amdgpu_prt_cb {
struct amdgpu_device *adev;
struct dma_fence_cb cb;
};
/** /**
* amdgpu_vm_num_pde - return the number of page directory entries * amdgpu_vm_num_pde - return the number of page directory entries
* *
...@@ -989,11 +995,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ...@@ -989,11 +995,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
goto error_free; goto error_free;
amdgpu_bo_fence(vm->page_directory, f, true); amdgpu_bo_fence(vm->page_directory, f, true);
if (fence) { dma_fence_put(*fence);
dma_fence_put(*fence); *fence = f;
*fence = dma_fence_get(f);
}
dma_fence_put(f);
return 0; return 0;
error_free: error_free:
...@@ -1176,6 +1179,61 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, ...@@ -1176,6 +1179,61 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
return 0; return 0;
} }
/**
* amdgpu_vm_update_prt_state - update the global PRT state
*/
static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
{
unsigned long flags;
bool enable;
spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
enable = !!atomic_read(&adev->vm_manager.num_prt_mappings);
adev->gart.gart_funcs->set_prt(adev, enable);
spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
}
/**
* amdgpu_vm_prt - callback for updating the PRT status
*/
static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
{
struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb);
amdgpu_vm_update_prt_state(cb->adev);
kfree(cb);
}
/**
* amdgpu_vm_free_mapping - free a mapping
*
* @adev: amdgpu_device pointer
* @vm: requested vm
* @mapping: mapping to be freed
* @fence: fence of the unmap operation
*
* Free a mapping and make sure we decrease the PRT usage count if applicable.
*/
static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct amdgpu_bo_va_mapping *mapping,
struct dma_fence *fence)
{
if ((mapping->flags & AMDGPU_PTE_PRT) &&
atomic_dec_return(&adev->vm_manager.num_prt_mappings) == 0) {
struct amdgpu_prt_cb *cb = kmalloc(sizeof(struct amdgpu_prt_cb),
GFP_KERNEL);
cb->adev = adev;
if (!fence || dma_fence_add_callback(fence, &cb->cb,
amdgpu_vm_prt_cb)) {
amdgpu_vm_update_prt_state(adev);
kfree(cb);
}
}
kfree(mapping);
}
/** /**
* amdgpu_vm_clear_freed - clear freed BOs in the PT * amdgpu_vm_clear_freed - clear freed BOs in the PT
* *
...@@ -1191,6 +1249,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, ...@@ -1191,6 +1249,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_vm *vm) struct amdgpu_vm *vm)
{ {
struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo_va_mapping *mapping;
struct dma_fence *fence = NULL;
int r; int r;
while (!list_empty(&vm->freed)) { while (!list_empty(&vm->freed)) {
...@@ -1199,12 +1258,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, ...@@ -1199,12 +1258,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
list_del(&mapping->list); list_del(&mapping->list);
r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm, mapping, r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm, mapping,
0, 0, NULL); 0, 0, &fence);
kfree(mapping); amdgpu_vm_free_mapping(adev, vm, mapping, fence);
if (r) if (r) {
dma_fence_put(fence);
return r; return r;
}
} }
dma_fence_put(fence);
return 0; return 0;
} }
...@@ -1314,6 +1376,15 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, ...@@ -1314,6 +1376,15 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
size == 0 || size & AMDGPU_GPU_PAGE_MASK) size == 0 || size & AMDGPU_GPU_PAGE_MASK)
return -EINVAL; return -EINVAL;
if (flags & AMDGPU_PTE_PRT) {
/* Check if we have PRT hardware support */
if (!adev->gart.gart_funcs->set_prt)
return -EINVAL;
if (atomic_inc_return(&adev->vm_manager.num_prt_mappings) == 1)
amdgpu_vm_update_prt_state(adev);
}
/* make sure object fit at this offset */ /* make sure object fit at this offset */
eaddr = saddr + size - 1; eaddr = saddr + size - 1;
if (saddr >= eaddr || if (saddr >= eaddr ||
...@@ -1400,7 +1471,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, ...@@ -1400,7 +1471,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
list_del(&mapping->list); list_del(&mapping->list);
interval_tree_remove(&mapping->it, &vm->va); interval_tree_remove(&mapping->it, &vm->va);
trace_amdgpu_vm_bo_unmap(bo_va, mapping); trace_amdgpu_vm_bo_unmap(bo_va, mapping);
kfree(mapping); amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
error: error:
return r; return r;
...@@ -1452,7 +1523,8 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, ...@@ -1452,7 +1523,8 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
if (valid) if (valid)
list_add(&mapping->list, &vm->freed); list_add(&mapping->list, &vm->freed);
else else
kfree(mapping); amdgpu_vm_free_mapping(adev, vm, mapping,
bo_va->last_pt_update);
return 0; return 0;
} }
...@@ -1488,7 +1560,8 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, ...@@ -1488,7 +1560,8 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
list_del(&mapping->list); list_del(&mapping->list);
interval_tree_remove(&mapping->it, &vm->va); interval_tree_remove(&mapping->it, &vm->va);
kfree(mapping); amdgpu_vm_free_mapping(adev, vm, mapping,
bo_va->last_pt_update);
} }
dma_fence_put(bo_va->last_pt_update); dma_fence_put(bo_va->last_pt_update);
...@@ -1625,9 +1698,13 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) ...@@ -1625,9 +1698,13 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
kfree(mapping); kfree(mapping);
} }
list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
if (mapping->flags & AMDGPU_PTE_PRT)
continue;
list_del(&mapping->list); list_del(&mapping->list);
kfree(mapping); kfree(mapping);
} }
amdgpu_vm_clear_freed(adev, vm);
for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) { for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) {
struct amdgpu_bo *pt = vm->page_tables[i].bo; struct amdgpu_bo *pt = vm->page_tables[i].bo;
...@@ -1673,6 +1750,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) ...@@ -1673,6 +1750,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
atomic64_set(&adev->vm_manager.client_counter, 0); atomic64_set(&adev->vm_manager.client_counter, 0);
spin_lock_init(&adev->vm_manager.prt_lock);
atomic_set(&adev->vm_manager.num_prt_mappings, 0);
} }
/** /**
......
...@@ -65,6 +65,8 @@ struct amdgpu_bo_list_entry; ...@@ -65,6 +65,8 @@ struct amdgpu_bo_list_entry;
#define AMDGPU_PTE_FRAG(x) ((x & 0x1f) << 7) #define AMDGPU_PTE_FRAG(x) ((x & 0x1f) << 7)
#define AMDGPU_PTE_PRT (1UL << 63)
/* How to programm VM fault handling */ /* How to programm VM fault handling */
#define AMDGPU_VM_FAULT_STOP_NEVER 0 #define AMDGPU_VM_FAULT_STOP_NEVER 0
#define AMDGPU_VM_FAULT_STOP_FIRST 1 #define AMDGPU_VM_FAULT_STOP_FIRST 1
...@@ -159,6 +161,10 @@ struct amdgpu_vm_manager { ...@@ -159,6 +161,10 @@ struct amdgpu_vm_manager {
atomic_t vm_pte_next_ring; atomic_t vm_pte_next_ring;
/* client id counter */ /* client id counter */
atomic64_t client_counter; atomic64_t client_counter;
/* partial resident texture handling */
spinlock_t prt_lock;
atomic_t num_prt_mappings;
}; };
void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_init(struct amdgpu_device *adev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment