Commit a546a276 authored by Xiaogang Chen's avatar Xiaogang Chen Committed by Alex Deucher

drm/amdkfd: Use partial migrations/mapping for GPU/CPU page faults in SVM

This patch implements partial migration/mapping for gpu/cpu page faults in SVM
according to migration granularity(default 2MB). A svm range may include pages
from both system ram and vram of one gpu now. These chagnes are expected to
improve migration performance and reduce mmu callback and TLB flush workloads.
Signed-off-by: default avatarXiaogang Chen <xiaogang.chen@amd.com>
Reviewed-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent fa745b55
This diff is collapsed.
......@@ -41,9 +41,13 @@ enum MIGRATION_COPY_DIR {
};
int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
unsigned long start, unsigned long last,
struct mm_struct *mm, uint32_t trigger);
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
unsigned long start, unsigned long last,
uint32_t trigger, struct page *fault_page);
unsigned long
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
......
This diff is collapsed.
......@@ -78,6 +78,7 @@ struct svm_work_list_item {
* @update_list:link list node used to add to update_list
* @mapping: bo_va mapping structure to create and update GPU page table
* @npages: number of pages
* @vram_pages: vram pages number in this svm_range
* @dma_addr: dma mapping address on each GPU for system memory physical page
* @ttm_res: vram ttm resource map
* @offset: range start offset within mm_nodes
......@@ -88,7 +89,9 @@ struct svm_work_list_item {
* @flags: flags defined as KFD_IOCTL_SVM_FLAG_*
* @perferred_loc: perferred location, 0 for CPU, or GPU id
* @perfetch_loc: last prefetch location, 0 for CPU, or GPU id
* @actual_loc: the actual location, 0 for CPU, or GPU id
* @actual_loc: this svm_range location. 0: all pages are from sys ram;
* GPU id: this svm_range may include vram pages from GPU with
* id actual_loc.
* @granularity:migration granularity, log2 num pages
* @invalid: not 0 means cpu page table is invalidated
* @validate_timestamp: system timestamp when range is validated
......@@ -112,6 +115,7 @@ struct svm_range {
struct list_head list;
struct list_head update_list;
uint64_t npages;
uint64_t vram_pages;
dma_addr_t *dma_addr[MAX_GPU_INSTANCE];
struct ttm_resource *ttm_res;
uint64_t offset;
......@@ -168,9 +172,6 @@ struct kfd_node *svm_range_get_node_by_id(struct svm_range *prange,
int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
bool clear);
void svm_range_vram_node_free(struct svm_range *prange);
int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
unsigned long addr, struct svm_range *parent,
struct svm_range *prange);
int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
uint32_t vmid, uint32_t node_id, uint64_t addr,
bool write_fault);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment