Commit 6849d47c authored by Roger He's avatar Roger He Committed by Alex Deucher

drm/amdgpu: handle all fragment sizes v4

This can improve performance for some cases.

v2 (chk): handle all sizes, simplify the patch quite a bit
v3 (chk): adjust dw estimation as well
v4 (chk): use single loop, make end mask 64bit
Signed-off-by: default avatarRoger He <Hongbo.He@amd.com>
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Tested-by: default avatarRoger He <Hongbo.He@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: default avatarChunming Zhou <david1.zhou@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 2b9bdfa7
...@@ -1420,8 +1420,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, ...@@ -1420,8 +1420,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
uint64_t start, uint64_t end, uint64_t start, uint64_t end,
uint64_t dst, uint64_t flags) uint64_t dst, uint64_t flags)
{ {
int r;
/** /**
* The MC L1 TLB supports variable sized pages, based on a fragment * The MC L1 TLB supports variable sized pages, based on a fragment
* field in the PTE. When this field is set to a non-zero value, page * field in the PTE. When this field is set to a non-zero value, page
...@@ -1440,39 +1438,38 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, ...@@ -1440,39 +1438,38 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
* Userspace can support this by aligning virtual base address and * Userspace can support this by aligning virtual base address and
* allocation size to the fragment size. * allocation size to the fragment size.
*/ */
unsigned pages_per_frag = params->adev->vm_manager.fragment_size; unsigned max_frag = params->adev->vm_manager.fragment_size;
uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag); int r;
uint64_t frag_align = 1 << pages_per_frag;
uint64_t frag_start = ALIGN(start, frag_align);
uint64_t frag_end = end & ~(frag_align - 1);
/* system pages are non continuously */ /* system pages are non continuously */
if (params->src || !(flags & AMDGPU_PTE_VALID) || if (params->src || !(flags & AMDGPU_PTE_VALID))
(frag_start >= frag_end))
return amdgpu_vm_update_ptes(params, start, end, dst, flags); return amdgpu_vm_update_ptes(params, start, end, dst, flags);
/* handle the 4K area at the beginning */ while (start != end) {
if (start != frag_start) { uint64_t frag_flags, frag_end;
r = amdgpu_vm_update_ptes(params, start, frag_start, unsigned frag;
dst, flags);
/* This intentionally wraps around if no bit is set */
frag = min((unsigned)ffs(start) - 1,
(unsigned)fls64(end - start) - 1);
if (frag >= max_frag) {
frag_flags = AMDGPU_PTE_FRAG(max_frag);
frag_end = end & ~((1ULL << max_frag) - 1);
} else {
frag_flags = AMDGPU_PTE_FRAG(frag);
frag_end = start + (1 << frag);
}
r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
flags | frag_flags);
if (r) if (r)
return r; return r;
dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
}
/* handle the area in the middle */
r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
flags | frag_flags);
if (r)
return r;
/* handle the 4K area at the end */ dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
if (frag_end != end) { start = frag_end;
dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
} }
return r;
return 0;
} }
/** /**
...@@ -1562,8 +1559,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ...@@ -1562,8 +1559,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
/* set page commands needed */ /* set page commands needed */
ndw += ncmds * 10; ndw += ncmds * 10;
/* two extra commands for begin/end of fragment */ /* extra commands for begin/end fragments */
ndw += 2 * 10; ndw += 2 * 10 * adev->vm_manager.fragment_size;
params.func = amdgpu_vm_do_set_ptes; params.func = amdgpu_vm_do_set_ptes;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment