Commit b6c4f90b authored by Shashank Sharma's avatar Shashank Sharma Committed by Alex Deucher

drm/amdgpu: sync page table freeing with tlb flush

The idea behind this patch is to delay the freeing of PT entry objects
until the TLB flush is done.

This patch:
- Adds a tlb_flush_waitlist in amdgpu_vm_update_params which will keep the
  objects that need to be freed after tlb_flush.
- Adds PT entries in this list in amdgpu_vm_ptes_update after finding
  the PT entry.
- Changes functionality of amdgpu_vm_pt_free_dfs from (df_search + free)
  to simply freeing of the BOs, also renames it to
  amdgpu_vm_pt_free_list to reflect this same.
- Exports function amdgpu_vm_pt_free_list to be called directly.
- Calls amdgpu_vm_pt_free_list directly from amdgpu_vm_update_range.

V2: rebase
V4: Addressed review comments from Christian
    - add only locked PTEs entries in TLB flush waitlist.
    - do not create a separate function for list flush.
    - do not create a new lock for TLB flush.
    - there is no need to wait on tlb_flush_fence exclusively.

V5: Addressed review comments from Christian
    - change the amdgpu_vm_pt_free_dfs's functionality to simple freeing
      of the objects and rename it.
    - add all the PTE objects in params->tlb_flush_waitlist
    - let amdgpu_vm_pt_free_root handle the freeing of BOs independently
    - call amdgpu_vm_pt_free_list directly

V6: Rebase
V7: Rebase
V8: Added a NULL check to fix this backtrace issue:
[  415.351447] BUG: kernel NULL pointer dereference, address: 0000000000000008
[  415.359245] #PF: supervisor write access in kernel mode
[  415.365081] #PF: error_code(0x0002) - not-present page
[  415.370817] PGD 101259067 P4D 101259067 PUD 10125a067 PMD 0
[  415.377140] Oops: 0002 [#1] PREEMPT SMP NOPTI
[  415.382004] CPU: 0 PID: 25481 Comm: test_with_MPI.e Tainted: G           OE     5.18.2-mi300-build-140423-ubuntu-22.04+ #24
[  415.394437] Hardware name: AMD Corporation Sh51p/Sh51p, BIOS RMO1001AS 02/21/2024
[  415.402797] RIP: 0010:amdgpu_vm_ptes_update+0x6fd/0xa10 [amdgpu]
[  415.409648] Code: 4c 89 ff 4d 8d 66 30 e8 f1 ed ff ff 48 85 db 74 42 48 39 5d a0 74 40 48 8b 53 20 48 8b 4b 18 48 8d 43 18 48 8d 75 b0 4c 89 ff <48
> 89 51 08 48 89 0a 49 8b 56 30 48 89 42 08 48 89 53 18 4c 89 63
[  415.430621] RSP: 0018:ffffc9000401f990 EFLAGS: 00010287
[  415.436456] RAX: ffff888147bb82f0 RBX: ffff888147bb82d8 RCX: 0000000000000000
[  415.444426] RDX: 0000000000000000 RSI: ffffc9000401fa30 RDI: ffff888161f80000
[  415.452397] RBP: ffffc9000401fa80 R08: 0000000000000000 R09: ffffc9000401fa00
[  415.460368] R10: 00000007f0cc0000 R11: 00000007f0c85000 R12: ffffc9000401fb20
[  415.468340] R13: 00000007f0d00000 R14: ffffc9000401faf0 R15: ffff888161f80000
[  415.476312] FS:  00007f132ff89840(0000) GS:ffff889f87c00000(0000) knlGS:0000000000000000
[  415.485350] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  415.491767] CR2: 0000000000000008 CR3: 0000000161d46003 CR4: 0000000000770ef0
[  415.499738] PKRU: 55555554
[  415.502750] Call Trace:
[  415.505482]  <TASK>
[  415.507825]  amdgpu_vm_update_range+0x32a/0x880 [amdgpu]
[  415.513869]  amdgpu_vm_clear_freed+0x117/0x250 [amdgpu]
[  415.519814]  amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu+0x18c/0x250 [amdgpu]
[  415.527729]  kfd_ioctl_unmap_memory_from_gpu+0xed/0x340 [amdgpu]
[  415.534551]  kfd_ioctl+0x3b6/0x510 [amdgpu]

V9: Addressed review comments from Christian
    - No NULL check reqd for root PT freeing
    - Free PT list regardless of needs_flush
    - Move adding BOs in list in a separate function

V10: Added Christian's RB
V11: squash in list fix

Cc: Christian König <Christian.Koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Felix Kuehling <felix.kuehling@amd.com>
Cc: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Acked-by: default avatarFelix Kuehling <felix.kuehling@amd.com>
Acked-by: default avatarRajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Reviewed-by: default avatarChristian König <Christian.Koenig@amd.com>
Tested-by: default avatarRajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: default avatarShashank Sharma <shashank.sharma@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent fb734632
...@@ -988,6 +988,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, ...@@ -988,6 +988,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
params.unlocked = unlocked; params.unlocked = unlocked;
params.needs_flush = flush_tlb; params.needs_flush = flush_tlb;
params.allow_override = allow_override; params.allow_override = allow_override;
INIT_LIST_HEAD(&params.tlb_flush_waitlist);
/* Implicitly sync to command submissions in the same VM before /* Implicitly sync to command submissions in the same VM before
* unmapping. Sync to moving fences before mapping. * unmapping. Sync to moving fences before mapping.
...@@ -1078,6 +1079,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, ...@@ -1078,6 +1079,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
tlb_cb = NULL; tlb_cb = NULL;
} }
amdgpu_vm_pt_free_list(adev, &params);
error_free: error_free:
kfree(tlb_cb); kfree(tlb_cb);
amdgpu_vm_eviction_unlock(vm); amdgpu_vm_eviction_unlock(vm);
......
...@@ -266,6 +266,11 @@ struct amdgpu_vm_update_params { ...@@ -266,6 +266,11 @@ struct amdgpu_vm_update_params {
* to be overridden for NUMA local memory. * to be overridden for NUMA local memory.
*/ */
bool allow_override; bool allow_override;
/**
* @tlb_flush_waitlist: temporary storage for BOs until tlb_flush
*/
struct list_head tlb_flush_waitlist;
}; };
struct amdgpu_vm_update_funcs { struct amdgpu_vm_update_funcs {
...@@ -547,6 +552,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, ...@@ -547,6 +552,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
uint64_t start, uint64_t end, uint64_t start, uint64_t end,
uint64_t dst, uint64_t flags); uint64_t dst, uint64_t flags);
void amdgpu_vm_pt_free_work(struct work_struct *work); void amdgpu_vm_pt_free_work(struct work_struct *work);
void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
struct amdgpu_vm_update_params *params);
#if defined(CONFIG_DEBUG_FS) #if defined(CONFIG_DEBUG_FS)
void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
......
...@@ -622,40 +622,58 @@ void amdgpu_vm_pt_free_work(struct work_struct *work) ...@@ -622,40 +622,58 @@ void amdgpu_vm_pt_free_work(struct work_struct *work)
} }
/** /**
* amdgpu_vm_pt_free_dfs - free PD/PT levels * amdgpu_vm_pt_free_list - free PD/PT levels
* *
* @adev: amdgpu device structure * @adev: amdgpu device structure
* @vm: amdgpu vm structure * @params: see amdgpu_vm_update_params definition
* @start: optional cursor where to start freeing PDs/PTs
* @unlocked: vm resv unlock status
* *
* Free the page directory or page table level and all sub levels. * Free the page directory objects saved in the flush list
*/ */
static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
struct amdgpu_vm *vm, struct amdgpu_vm_update_params *params)
struct amdgpu_vm_pt_cursor *start,
bool unlocked)
{ {
struct amdgpu_vm_pt_cursor cursor; struct amdgpu_vm_bo_base *entry, *next;
struct amdgpu_vm_bo_base *entry; struct amdgpu_vm *vm = params->vm;
bool unlocked = params->unlocked;
if (list_empty(&params->tlb_flush_waitlist))
return;
if (unlocked) { if (unlocked) {
spin_lock(&vm->status_lock); spin_lock(&vm->status_lock);
for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) list_splice_init(&params->tlb_flush_waitlist, &vm->pt_freed);
list_move(&entry->vm_status, &vm->pt_freed);
if (start)
list_move(&start->entry->vm_status, &vm->pt_freed);
spin_unlock(&vm->status_lock); spin_unlock(&vm->status_lock);
schedule_work(&vm->pt_free_work); schedule_work(&vm->pt_free_work);
return; return;
} }
for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) list_for_each_entry_safe(entry, next, &params->tlb_flush_waitlist, vm_status)
amdgpu_vm_pt_free(entry); amdgpu_vm_pt_free(entry);
}
if (start) /**
amdgpu_vm_pt_free(start->entry); * amdgpu_vm_pt_add_list - add PD/PT level to the flush list
*
* @params: parameters for the update
* @cursor: first PT entry to start DF search from, non NULL
*
* This list will be freed after TLB flush.
*/
static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params,
struct amdgpu_vm_pt_cursor *cursor)
{
struct amdgpu_vm_pt_cursor seek;
struct amdgpu_vm_bo_base *entry;
spin_lock(&params->vm->status_lock);
for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, entry) {
if (entry && entry->bo)
list_move(&entry->vm_status, &params->tlb_flush_waitlist);
}
/* enter start node now */
list_move(&cursor->entry->vm_status, &params->tlb_flush_waitlist);
spin_unlock(&params->vm->status_lock);
} }
/** /**
...@@ -667,7 +685,11 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, ...@@ -667,7 +685,11 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
*/ */
void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{ {
amdgpu_vm_pt_free_dfs(adev, vm, NULL, false); struct amdgpu_vm_pt_cursor cursor;
struct amdgpu_vm_bo_base *entry;
for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry)
amdgpu_vm_pt_free(entry);
} }
/** /**
...@@ -973,9 +995,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, ...@@ -973,9 +995,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
/* Make sure previous mapping is freed */ /* Make sure previous mapping is freed */
if (cursor.entry->bo) { if (cursor.entry->bo) {
params->needs_flush = true; params->needs_flush = true;
amdgpu_vm_pt_free_dfs(adev, params->vm, amdgpu_vm_pt_add_list(params, &cursor);
&cursor,
params->unlocked);
} }
amdgpu_vm_pt_next(adev, &cursor); amdgpu_vm_pt_next(adev, &cursor);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment