drm/xe: Port Xe to GPUVA

Rather than open coding VM binds and VMA tracking, use the GPUVA library. GPUVA provides a common infrastructure for VM binds to use mmap / munmap semantics and support for VK sparse bindings. The concepts are: 1) xe_vm inherits from drm_gpuva_manager 2) xe_vma inherits from drm_gpuva 3) xe_vma_op inherits from drm_gpuva_op 4) VM bind operations (MAP, UNMAP, PREFETCH, UNMAP_ALL) call into the GPUVA code to generate an VMA operations list which is parsed, committed, and executed. v2 (CI): Add break after default in case statement. v3: Rebase v4: Fix some error handling v5: Use unlocked version VMA in error paths v6: Rebase, address some review feedback mainly Thomas H v7: Fix compile error in xe_vma_op_unwind, address checkpatch Signed-off-by: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>

drm/xe: Port Xe to GPUVA
Rather than open coding VM binds and VMA tracking, use the GPUVA library. GPUVA provides a common infrastructure for VM binds to use mmap / munmap semantics and support for VK sparse bindings. The concepts are: 1) xe_vm inherits from drm_gpuva_manager 2) xe_vma inherits from drm_gpuva 3) xe_vma_op inherits from drm_gpuva_op 4) VM bind operations (MAP, UNMAP, PREFETCH, UNMAP_ALL) call into the GPUVA code to generate an VMA operations list which is parsed, committed, and executed. v2 (CI): Add break after default in case statement. v3: Rebase v4: Fix some error handling v5: Use unlocked version VMA in error paths v6: Rebase, address some review feedback mainly Thomas H v7: Fix compile error in xe_vma_op_unwind, address checkpatch Signed-off-by: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
b06d47be · Matthew Brost · Rodrigo Vivi · 5cecdd0b · b06d47be · b06d47be
Commit b06d47be authored Jul 07, 2023 by Matthew Brost Committed by Rodrigo Vivi Dec 21, 2023
14 changed files
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -21,6 +21,7 @@ config DRM_XE
 	select VMAP_PFN
 	select DRM_TTM
 	select DRM_TTM_HELPER
+	select DRM_GPUVM
 	select DRM_SCHED
 	select MMU_NOTIFIER
 	select WANT_DEV_COREDUMP

--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -300,7 +300,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
 	/* First part of the test, are we updating our pagetable bo with a new entry? */
 	xe_map_wr(xe, &bo->vmap, XE_PAGE_SIZE * (NUM_KERNEL_PDE - 1), u64,
 		  0xdeaddeadbeefbeef);
-	expected = xe_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0, 0);
+	expected = xe_pte_encode(NULL, pt, 0, XE_CACHE_WB, 0);
 	if (m->eng->vm->flags & XE_VM_FLAGS_64K)
 		expected |= XE_PTE_PS64;
 	if (xe_bo_is_vram(pt))

--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -412,7 +412,9 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 {
 	struct dma_resv_iter cursor;
 	struct dma_fence *fence;
-	struct xe_vma *vma;
+	struct drm_gpuva *gpuva;
+	struct drm_gem_object *obj = &bo->ttm.base;
+	struct drm_gpuvm_bo *vm_bo;
 	int ret = 0;

 	dma_resv_assert_held(bo->ttm.base.resv);
@@ -425,10 +427,12 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 		dma_resv_iter_end(&cursor);
 	}

-	list_for_each_entry(vma, &bo->vmas, bo_link) {
-		struct xe_vm *vm = xe_vma_vm(vma);
+	drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
+		drm_gpuvm_bo_for_each_va(gpuva, vm_bo) {
+			struct xe_vma *vma = gpuva_to_vma(gpuva);
+			struct xe_vm *vm = xe_vma_vm(vma);

-		trace_xe_vma_evict(vma);
+			trace_xe_vma_evict(vma);

 		if (xe_vm_in_fault_mode(vm)) {
 			/* Wait for pending binds / unbinds. */
@@ -454,7 +458,6 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,

 		} else {
 			bool vm_resv_locked = false;
-			struct xe_vm *vm = xe_vma_vm(vma);

 			/*
 			 * We need to put the vma on the vm's rebind_list,
@@ -462,9 +465,9 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 			 * that we indeed have it locked, put the vma an the
 			 * vm's notifier.rebind_list instead and scoop later.
 			 */
-			if (dma_resv_trylock(&vm->resv))
+			if (dma_resv_trylock(xe_vm_resv(vm)))
 				vm_resv_locked = true;
-			else if (ctx->resv != &vm->resv) {
+			else if (ctx->resv != xe_vm_resv(vm)) {
 				spin_lock(&vm->notifier.list_lock);
 				list_move_tail(&vma->notifier.rebind_link,
 					       &vm->notifier.rebind_list);
@@ -477,7 +480,8 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
 				list_add_tail(&vma->rebind_link, &vm->rebind_list);

 			if (vm_resv_locked)
-				dma_resv_unlock(&vm->resv);
+				dma_resv_unlock(xe_vm_resv(vm));
+		}
 		}
 	}

@@ -1285,7 +1289,7 @@ xe_bo_create_locked_range(struct xe_device *xe,
 		}
 	}

-	bo = __xe_bo_create_locked(xe, bo, tile, vm ? &vm->resv : NULL,
+	bo = __xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
 				   vm && !xe_vm_in_fault_mode(vm) &&
 				   flags & XE_BO_CREATE_USER_BIT ?
 				   &vm->lru_bulk_move : NULL, size,
@@ -1293,6 +1297,13 @@ xe_bo_create_locked_range(struct xe_device *xe,
 	if (IS_ERR(bo))
 		return bo;

+	/*
+	 * Note that instead of taking a reference no the drm_gpuvm_resv_bo(),
+	 * to ensure the shared resv doesn't disappear under the bo, the bo
+	 * will keep a reference to the vm, and avoid circular references
+	 * by having all the vm's bo refereferences released at vm close
+	 * time.
+	 */
 	if (vm && xe_bo_is_user(bo))
 		xe_vm_get(vm);
 	bo->vm = vm;
@@ -1600,7 +1611,7 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
 		xe_vm_assert_held(vm);

 		ctx.allow_res_evict = allow_res_evict;
-		ctx.resv = &vm->resv;
+		ctx.resv = xe_vm_resv(vm);
 	}

 	return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);

--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -9,6 +9,15 @@
 #include "xe_bo_types.h"
 #include "xe_macros.h"
 #include "xe_vm_types.h"
+#include "xe_vm.h"
+
+/**
+ * xe_vm_assert_held(vm) - Assert that the vm's reservation object is held.
+ * @vm: The vm
+ */
+#define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm))
+
+

 #define XE_DEFAULT_GTT_SIZE_MB          3072ULL /* 3GB by default */

@@ -149,7 +158,7 @@ void xe_bo_unlock(struct xe_bo *bo, struct ww_acquire_ctx *ww);
 static inline void xe_bo_unlock_vm_held(struct xe_bo *bo)
 {
 	if (bo) {
-		XE_BUG_ON(bo->vm && bo->ttm.base.resv != &bo->vm->resv);
+		XE_BUG_ON(bo->vm && bo->ttm.base.resv != xe_vm_resv(bo->vm));
 		if (bo->vm)
 			xe_vm_assert_held(bo->vm);
 		else

--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -134,7 +134,7 @@ static struct drm_driver driver = {
 	.driver_features =
 	    DRIVER_GEM |
 	    DRIVER_RENDER | DRIVER_SYNCOBJ |
-	    DRIVER_SYNCOBJ_TIMELINE,
+	    DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
 	.open = xe_file_open,
 	.postclose = xe_file_close,


--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -350,7 +350,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	/* Wait behind munmap style rebinds */
 	if (!xe_vm_no_dma_fences(vm)) {
 		err = drm_sched_job_add_resv_dependencies(&job->drm,
-							  &vm->resv,
+							  xe_vm_resv(vm),
 							  DMA_RESV_USAGE_KERNEL);
 		if (err)
 			goto err_put_job;
@@ -378,7 +378,7 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	xe_sched_job_arm(job);
 	if (!xe_vm_no_dma_fences(vm)) {
 		/* Block userptr invalidations / BO eviction */
-		dma_resv_add_fence(&vm->resv,
+		dma_resv_add_fence(xe_vm_resv(vm),
 				   &job->drm.s_fence->finished,
 				   DMA_RESV_USAGE_BOOKKEEP);


--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -75,10 +75,10 @@ static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma)
 		!(BIT(tile->id) & vma->usm.tile_invalidated);
 }

-static bool vma_matches(struct xe_vma *vma, struct xe_vma *lookup)
+static bool vma_matches(struct xe_vma *vma, u64 page_addr)
 {
-	if (xe_vma_start(lookup) > xe_vma_end(vma) - 1 ||
-	    xe_vma_end(lookup) - 1 < xe_vma_start(vma))
+	if (page_addr > xe_vma_end(vma) - 1 ||
+	    page_addr + SZ_4K - 1 < xe_vma_start(vma))
 		return false;

 	return true;
@@ -91,16 +91,14 @@ static bool only_needs_bo_lock(struct xe_bo *bo)

 static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr)
 {
-	struct xe_vma *vma = NULL, lookup;
+	struct xe_vma *vma = NULL;

-	lookup.start = page_addr;
-	lookup.end = lookup.start + SZ_4K - 1;
 	if (vm->usm.last_fault_vma) {   /* Fast lookup */
-		if (vma_matches(vm->usm.last_fault_vma, &lookup))
+		if (vma_matches(vm->usm.last_fault_vma, page_addr))
 			vma = vm->usm.last_fault_vma;
 	}
 	if (!vma)
-		vma = xe_vm_find_overlapping_vma(vm, &lookup);
+		vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);

 	return vma;
 }
@@ -489,12 +487,8 @@ static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc)
 {
 	u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) *
 		sub_granularity_in_byte(acc->granularity);
-	struct xe_vma lookup;
-
-	lookup.start = page_va;
-	lookup.end = lookup.start + SZ_4K - 1;

-	return xe_vm_find_overlapping_vma(vm, &lookup);
+	return xe_vm_find_overlapping_vma(vm, page_va, SZ_4K);
 }

 static int handle_acc(struct xe_gt *gt, struct acc *acc)

--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -197,7 +197,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 	/* Map the entire BO in our level 0 pt */
 	for (i = 0, level = 0; i < num_entries; level++) {
 		entry = xe_pte_encode(NULL, bo, i * XE_PAGE_SIZE,
-				      XE_CACHE_WB, 0, 0);
+				      XE_CACHE_WB, 0);

 		xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64, entry);

@@ -216,7 +216,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 		     i += vm->flags & XE_VM_FLAGS_64K ? XE_64K_PAGE_SIZE :
 		     XE_PAGE_SIZE) {
 			entry = xe_pte_encode(NULL, batch, i,
-					      XE_CACHE_WB, 0, 0);
+					      XE_CACHE_WB, 0);

 			xe_map_wr(xe, &bo->vmap, map_ofs + level * 8, u64,
 				  entry);
@@ -1068,7 +1068,7 @@ xe_migrate_update_pgtables_cpu(struct xe_migrate *m,
 					  DMA_RESV_USAGE_KERNEL))
 		return ERR_PTR(-ETIME);

-	if (wait_vm && !dma_resv_test_signaled(&vm->resv,
+	if (wait_vm && !dma_resv_test_signaled(xe_vm_resv(vm),
 					       DMA_RESV_USAGE_BOOKKEEP))
 		return ERR_PTR(-ETIME);

@@ -1159,7 +1159,8 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	u64 addr;
 	int err = 0;
 	bool usm = !eng && xe->info.supports_usm;
-	bool first_munmap_rebind = vma && vma->first_munmap_rebind;
+	bool first_munmap_rebind = vma &&
+		vma->gpuva.flags & XE_VMA_FIRST_REBIND;
 	struct xe_engine *eng_override = !eng ? m->eng : eng;

 	/* Use the CPU if no in syncs and engine is idle */
@@ -1232,8 +1233,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,

 			BUG_ON(pt_bo->size != SZ_4K);

-			addr = xe_pte_encode(NULL, pt_bo, 0, XE_CACHE_WB,
-					     0, 0);
+			addr = xe_pte_encode(NULL, pt_bo, 0, XE_CACHE_WB, 0);
 			bb->cs[bb->len++] = lower_32_bits(addr);
 			bb->cs[bb->len++] = upper_32_bits(addr);
 		}
@@ -1281,7 +1281,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
 	 * trigger preempts before moving forward
 	 */
 	if (first_munmap_rebind) {
-		err = job_add_deps(job, &vm->resv,
+		err = job_add_deps(job, xe_vm_resv(vm),
 				   DMA_RESV_USAGE_BOOKKEEP);
 		if (err)
 			goto err_job;

--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -100,15 +100,15 @@ static dma_addr_t vma_addr(struct xe_vma *vma, u64 offset,
 	}
 }

-static u64 __pte_encode(u64 pte, enum xe_cache_level cache, u32 flags,
-			u32 pt_level)
+static u64 __pte_encode(u64 pte, enum xe_cache_level cache,
+			struct xe_vma *vma, u32 pt_level)
 {
 	pte |= XE_PAGE_PRESENT | XE_PAGE_RW;

-	if (unlikely(flags & XE_PTE_FLAG_READ_ONLY))
+	if (unlikely(vma && xe_vma_read_only(vma)))
 		pte &= ~XE_PAGE_RW;

-	if (unlikely(flags & XE_PTE_FLAG_NULL))
+	if (unlikely(vma && xe_vma_is_null(vma)))
 		pte |= XE_PTE_NULL;

 	/* FIXME: I don't think the PPAT handling is correct for MTL */
@@ -142,7 +142,6 @@ static u64 __pte_encode(u64 pte, enum xe_cache_level cache, u32 flags,
 * @bo: If @vma is NULL, representing the memory to point to.
 * @offset: The offset into @vma or @bo.
 * @cache: The cache level indicating
- * @flags: Currently only supports PTE_READ_ONLY for read-only access.
 * @pt_level: The page-table level of the page-table into which the entry
 * is to be inserted.
 *
@@ -150,7 +149,7 @@ static u64 __pte_encode(u64 pte, enum xe_cache_level cache, u32 flags,
 */
 u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
 		  u64 offset, enum xe_cache_level cache,
-		  u32 flags, u32 pt_level)
+		  u32 pt_level)
 {
 	u64 pte;
 	bool is_vram;
@@ -162,11 +161,11 @@ u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo,

 	if (is_vram) {
 		pte |= XE_PPGTT_PTE_LM;
-		if (vma && vma->use_atomic_access_pte_bit)
+		if (vma && vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT)
 			pte |= XE_USM_PPGTT_PTE_AE;
 	}

-	return __pte_encode(pte, cache, flags, pt_level);
+	return __pte_encode(pte, cache, vma, pt_level);
 }

 static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
@@ -179,7 +178,7 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,

 	if (level == 0) {
 		u64 empty = xe_pte_encode(NULL, vm->scratch_bo[id], 0,
-					  XE_CACHE_WB, 0, 0);
+					  XE_CACHE_WB, 0);

 		return empty;
 	} else {
@@ -424,10 +423,9 @@ struct xe_pt_stage_bind_walk {
 	 */
 	bool needs_64K;
 	/**
-	 * @pte_flags: Flags determining PTE setup. These are not flags
-	 * encoded directly in the PTE. See @default_pte for those.
+	 * @vma: VMA being mapped
 	 */
-	u32 pte_flags;
+	struct xe_vma *vma;

 	/* Also input, but is updated during the walk*/
 	/** @curs: The DMA address cursor. */
@@ -564,7 +562,7 @@ static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level,
 		return false;

 	/* null VMA's do not have dma addresses */
-	if (xe_walk->pte_flags & XE_PTE_FLAG_NULL)
+	if (xe_vma_is_null(xe_walk->vma))
 		return true;

 	/* Is the DMA address huge PTE size aligned? */
@@ -590,7 +588,7 @@ xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk)
 		return false;

 	/* null VMA's do not have dma addresses */
-	if (xe_walk->pte_flags & XE_PTE_FLAG_NULL)
+	if (xe_vma_is_null(xe_walk->vma))
 		return true;

 	xe_res_next(&curs, addr - xe_walk->va_curs_start);
@@ -643,14 +641,13 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 	/* Is this a leaf entry ?*/
 	if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) {
 		struct xe_res_cursor *curs = xe_walk->curs;
-		bool is_null = xe_walk->pte_flags & XE_PTE_FLAG_NULL;
+		bool is_null = xe_vma_is_null(xe_walk->vma);

 		XE_WARN_ON(xe_walk->va_curs_start != addr);

 		pte = __pte_encode(is_null ? 0 :
 				   xe_res_dma(curs) + xe_walk->dma_offset,
-				   xe_walk->cache, xe_walk->pte_flags,
-				   level);
+				   xe_walk->cache, xe_walk->vma, level);
 		pte |= xe_walk->default_pte;

 		/*
@@ -762,7 +759,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
 		.tile = tile,
 		.curs = &curs,
 		.va_curs_start = xe_vma_start(vma),
-		.pte_flags = vma->pte_flags,
+		.vma = vma,
 		.wupd.entries = entries,
 		.needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAGS_64K) && is_vram,
 	};
@@ -771,7 +768,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,

 	if (is_vram) {
 		xe_walk.default_pte = XE_PPGTT_PTE_LM;
-		if (vma && vma->use_atomic_access_pte_bit)
+		if (vma && vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT)
 			xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE;
 		xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource);
 		xe_walk.cache = XE_CACHE_WB;
@@ -990,7 +987,7 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma)
 	else if (!xe_vma_is_null(vma))
 		dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv);

-	dma_resv_assert_held(&vm->resv);
+	xe_vm_assert_held(vm);
 }

 static void xe_pt_commit_bind(struct xe_vma *vma,
@@ -1343,6 +1340,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 					   syncs, num_syncs,
 					   &bind_pt_update.base);
 	if (!IS_ERR(fence)) {
+		bool last_munmap_rebind = vma->gpuva.flags & XE_VMA_LAST_REBIND;
 		LLIST_HEAD(deferred);

 		/* TLB invalidation must be done before signaling rebind */
@@ -1358,8 +1356,8 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 		}

 		/* add shared fence now for pagetable delayed destroy */
-		dma_resv_add_fence(&vm->resv, fence, !rebind &&
-				   vma->last_munmap_rebind ?
+		dma_resv_add_fence(xe_vm_resv(vm), fence, !rebind &&
+				   last_munmap_rebind ?
 				   DMA_RESV_USAGE_KERNEL :
 				   DMA_RESV_USAGE_BOOKKEEP);

@@ -1377,7 +1375,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
 			up_read(&vm->userptr.notifier_lock);
 			xe_bo_put_commit(&deferred);
 		}
-		if (!rebind && vma->last_munmap_rebind &&
+		if (!rebind && last_munmap_rebind &&
 		    xe_vm_in_compute_mode(vm))
 			queue_work(vm->xe->ordered_wq,
 				   &vm->preempt.rebind_work);
@@ -1676,7 +1674,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e
 		fence = &ifence->base.base;

 		/* add shared fence now for pagetable delayed destroy */
-		dma_resv_add_fence(&vm->resv, fence,
+		dma_resv_add_fence(xe_vm_resv(vm), fence,
 				   DMA_RESV_USAGE_BOOKKEEP);

 		/* This fence will be installed by caller when doing eviction */

--- a/drivers/gpu/drm/xe/xe_pt.h
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -50,5 +50,5 @@ u64 xe_pde_encode(struct xe_bo *bo, u64 bo_offset,

 u64 xe_pte_encode(struct xe_vma *vma, struct xe_bo *bo,
 		  u64 offset, enum xe_cache_level cache,
-		  u32 flags, u32 pt_level);
+		  u32 pt_level);
 #endif
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -6,6 +6,7 @@
 #ifndef _XE_VM_H_
 #define _XE_VM_H_

+#include "xe_bo_types.h"
 #include "xe_macros.h"
 #include "xe_map.h"
 #include "xe_vm_types.h"
@@ -22,20 +23,19 @@ struct xe_file;
 struct xe_sync_entry;

 struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags);
-void xe_vm_free(struct kref *ref);

 struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id);
 int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node);

 static inline struct xe_vm *xe_vm_get(struct xe_vm *vm)
 {
-	kref_get(&vm->refcount);
+	drm_gpuvm_get(&vm->gpuvm);
 	return vm;
 }

 static inline void xe_vm_put(struct xe_vm *vm)
 {
-	kref_put(&vm->refcount, xe_vm_free);
+	drm_gpuvm_put(&vm->gpuvm);
 }

 int xe_vm_lock(struct xe_vm *vm, struct ww_acquire_ctx *ww,
@@ -61,7 +61,22 @@ static inline bool xe_vm_is_closed_or_banned(struct xe_vm *vm)
 }

 struct xe_vma *
-xe_vm_find_overlapping_vma(struct xe_vm *vm, struct xe_vma *vma);
+xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range);
+
+static inline struct xe_vm *gpuva_to_vm(struct drm_gpuva *gpuva)
+{
+	return container_of(gpuva->vm, struct xe_vm, gpuvm);
+}
+
+static inline struct xe_vma *gpuva_to_vma(struct drm_gpuva *gpuva)
+{
+	return container_of(gpuva, struct xe_vma, gpuva);
+}
+
+static inline struct xe_vma_op *gpuva_op_to_vma_op(struct drm_gpuva_op *op)
+{
+	return container_of(op, struct xe_vma_op, base);
+}

 /**
 * DOC: Provide accessors for vma members to facilitate easy change of
@@ -69,12 +84,12 @@ xe_vm_find_overlapping_vma(struct xe_vm *vm, struct xe_vma *vma);
 */
 static inline u64 xe_vma_start(struct xe_vma *vma)
 {
-	return vma->start;
+	return vma->gpuva.va.addr;
 }

 static inline u64 xe_vma_size(struct xe_vma *vma)
 {
-	return vma->end - vma->start + 1;
+	return vma->gpuva.va.range;
 }

 static inline u64 xe_vma_end(struct xe_vma *vma)
@@ -84,32 +99,33 @@ static inline u64 xe_vma_end(struct xe_vma *vma)

 static inline u64 xe_vma_bo_offset(struct xe_vma *vma)
 {
-	return vma->bo_offset;
+	return vma->gpuva.gem.offset;
 }

 static inline struct xe_bo *xe_vma_bo(struct xe_vma *vma)
 {
-	return vma->bo;
+	return !vma->gpuva.gem.obj ? NULL :
+		container_of(vma->gpuva.gem.obj, struct xe_bo, ttm.base);
 }

 static inline struct xe_vm *xe_vma_vm(struct xe_vma *vma)
 {
-	return vma->vm;
+	return container_of(vma->gpuva.vm, struct xe_vm, gpuvm);
 }

 static inline bool xe_vma_read_only(struct xe_vma *vma)
 {
-	return vma->pte_flags & XE_PTE_FLAG_READ_ONLY;
+	return vma->gpuva.flags & XE_VMA_READ_ONLY;
 }

 static inline u64 xe_vma_userptr(struct xe_vma *vma)
 {
-	return vma->userptr.ptr;
+	return vma->gpuva.gem.offset;
 }

 static inline bool xe_vma_is_null(struct xe_vma *vma)
 {
-	return vma->pte_flags & XE_PTE_FLAG_NULL;
+	return vma->gpuva.flags & DRM_GPUVA_SPARSE;
 }

 static inline bool xe_vma_has_no_bo(struct xe_vma *vma)
@@ -122,8 +138,6 @@ static inline bool xe_vma_is_userptr(struct xe_vma *vma)
 	return xe_vma_has_no_bo(vma) && !xe_vma_is_null(vma);
 }

-#define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv)
-
 u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile);

 int xe_vm_create_ioctl(struct drm_device *dev, void *data,
@@ -218,6 +232,23 @@ void xe_vm_fence_all_extobjs(struct xe_vm *vm, struct dma_fence *fence,

 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);

+/**
+ * xe_vm_resv() - Return's the vm's reservation object
+ * @vm: The vm
+ *
+ * Return: Pointer to the vm's reservation object.
+ */
+static inline struct dma_resv *xe_vm_resv(struct xe_vm *vm)
+{
+	return drm_gpuvm_resv(&vm->gpuvm);
+}
+
+/**
+ * xe_vm_assert_held(vm) - Assert that the vm's reservation object is held.
+ * @vm: The vm
+ */
+#define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm))
+
 #if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
 #define vm_dbg drm_dbg
 #else

--- a/drivers/gpu/drm/xe/xe_vm_madvise.c
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -210,19 +210,12 @@ static const madvise_func madvise_funcs[] = {
 	[DRM_XE_VM_MADVISE_PIN] = madvise_pin,
 };

-static struct xe_vma *node_to_vma(const struct rb_node *node)
-{
-	BUILD_BUG_ON(offsetof(struct xe_vma, vm_node) != 0);
-	return (struct xe_vma *)node;
-}
-
 static struct xe_vma **
 get_vmas(struct xe_vm *vm, int *num_vmas, u64 addr, u64 range)
 {
-	struct xe_vma **vmas;
-	struct xe_vma *vma, *__vma, lookup;
+	struct xe_vma **vmas, **__vmas;
+	struct drm_gpuva *gpuva;
 	int max_vmas = 8;
-	struct rb_node *node;

 	lockdep_assert_held(&vm->lock);

@@ -230,62 +223,23 @@ get_vmas(struct xe_vm *vm, int *num_vmas, u64 addr, u64 range)
 	if (!vmas)
 		return NULL;

-	lookup.start = addr;
-	lookup.end = addr + range - 1;
+	drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) {
+		struct xe_vma *vma = gpuva_to_vma(gpuva);

-	vma = xe_vm_find_overlapping_vma(vm, &lookup);
-	if (!vma)
-		return vmas;
+		if (xe_vma_is_userptr(vma))
+			continue;

-	if (!xe_vma_is_userptr(vma)) {
-		vmas[*num_vmas] = vma;
-		*num_vmas += 1;
-	}
-
-	node = &vma->vm_node;
-	while ((node = rb_next(node))) {
-		if (!xe_vma_cmp_vma_cb(&lookup, node)) {
-			__vma = node_to_vma(node);
-			if (xe_vma_is_userptr(__vma))
-				continue;
-
-			if (*num_vmas == max_vmas) {
-				struct xe_vma **__vmas =
-					krealloc(vmas, max_vmas * sizeof(*vmas),
-						 GFP_KERNEL);
-
-				if (!__vmas)
-					return NULL;
-				vmas = __vmas;
-			}
-			vmas[*num_vmas] = __vma;
-			*num_vmas += 1;
-		} else {
-			break;
+		if (*num_vmas == max_vmas) {
+			max_vmas <<= 1;
+			__vmas = krealloc(vmas, max_vmas * sizeof(*vmas),
+					  GFP_KERNEL);
+			if (!__vmas)
+				return NULL;
+			vmas = __vmas;
 		}
-	}

-	node = &vma->vm_node;
-	while ((node = rb_prev(node))) {
-		if (!xe_vma_cmp_vma_cb(&lookup, node)) {
-			__vma = node_to_vma(node);
-			if (xe_vma_is_userptr(__vma))
-				continue;
-
-			if (*num_vmas == max_vmas) {
-				struct xe_vma **__vmas =
-					krealloc(vmas, max_vmas * sizeof(*vmas),
-						 GFP_KERNEL);
-
-				if (!__vmas)
-					return NULL;
-				vmas = __vmas;
-			}
-			vmas[*num_vmas] = __vma;
-			*num_vmas += 1;
-		} else {
-			break;
-		}
+		vmas[*num_vmas] = vma;
+		*num_vmas += 1;
 	}

 	return vmas;

--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -6,6 +6,8 @@
 #ifndef _XE_VM_TYPES_H_
 #define _XE_VM_TYPES_H_

+#include <drm/drm_gpuvm.h>
+
 #include <linux/dma-resv.h>
 #include <linux/kref.h>
 #include <linux/mmu_notifier.h>
@@ -14,30 +16,23 @@
 #include "xe_device_types.h"
 #include "xe_pt_types.h"

+struct async_op_fence;
 struct xe_bo;
+struct xe_sync_entry;
 struct xe_vm;

-struct xe_vma {
-	struct rb_node vm_node;
-	/** @vm: VM which this VMA belongs to */
-	struct xe_vm *vm;
+#define TEST_VM_ASYNC_OPS_ERROR
+#define FORCE_ASYNC_OP_ERROR	BIT(31)

-	/**
-	 * @start: start address of this VMA within its address domain, end -
-	 * start + 1 == VMA size
-	 */
-	u64 start;
-	/** @end: end address of this VMA within its address domain */
-	u64 end;
-	/** @pte_flags: pte flags for this VMA */
-#define XE_PTE_FLAG_READ_ONLY		BIT(0)
-#define XE_PTE_FLAG_NULL		BIT(1)
-	u32 pte_flags;
-
-	/** @bo: BO if not a userptr, must be NULL is userptr */
-	struct xe_bo *bo;
-	/** @bo_offset: offset into BO if not a userptr, unused for userptr */
-	u64 bo_offset;
+#define XE_VMA_READ_ONLY	DRM_GPUVA_USERBITS
+#define XE_VMA_DESTROYED	(DRM_GPUVA_USERBITS << 1)
+#define XE_VMA_ATOMIC_PTE_BIT	(DRM_GPUVA_USERBITS << 2)
+#define XE_VMA_FIRST_REBIND	(DRM_GPUVA_USERBITS << 3)
+#define XE_VMA_LAST_REBIND	(DRM_GPUVA_USERBITS << 4)
+
+struct xe_vma {
+	/** @gpuva: Base GPUVA object */
+	struct drm_gpuva gpuva;

 	/** @tile_mask: Tile mask of where to create binding for this VMA */
 	u64 tile_mask;
@@ -51,40 +46,8 @@ struct xe_vma {
 	 */
 	u64 tile_present;

-	/**
-	 * @destroyed: VMA is destroyed, in the sense that it shouldn't be
-	 * subject to rebind anymore. This field must be written under
-	 * the vm lock in write mode and the userptr.notifier_lock in
-	 * either mode. Read under the vm lock or the userptr.notifier_lock in
-	 * write mode.
-	 */
-	bool destroyed;
-
-	/**
-	 * @first_munmap_rebind: VMA is first in a sequence of ops that triggers
-	 * a rebind (munmap style VM unbinds). This indicates the operation
-	 * using this VMA must wait on all dma-resv slots (wait for pending jobs
-	 * / trigger preempt fences).
-	 */
-	bool first_munmap_rebind;
-
-	/**
-	 * @last_munmap_rebind: VMA is first in a sequence of ops that triggers
-	 * a rebind (munmap style VM unbinds). This indicates the operation
-	 * using this VMA must install itself into kernel dma-resv slot (blocks
-	 * future jobs) and kick the rebind work in compute mode.
-	 */
-	bool last_munmap_rebind;
-
-	/** @use_atomic_access_pte_bit: Set atomic access bit in PTE */
-	bool use_atomic_access_pte_bit;
-
-	union {
-		/** @bo_link: link into BO if not a userptr */
-		struct list_head bo_link;
-		/** @userptr_link: link into VM repin list if userptr */
-		struct list_head userptr_link;
-	};
+	/** @userptr_link: link into VM repin list if userptr */
+	struct list_head userptr_link;

 	/**
 	 * @rebind_link: link into VM if this VMA needs rebinding, and
@@ -107,8 +70,6 @@ struct xe_vma {

 	/** @userptr: user pointer state */
 	struct {
-		/** @ptr: user pointer */
-		uintptr_t ptr;
 		/** @invalidate_link: Link for the vm::userptr.invalidated list */
 		struct list_head invalidate_link;
 		/**
@@ -153,24 +114,19 @@ struct xe_vma {

 struct xe_device;

-#define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv)
-
 struct xe_vm {
-	struct xe_device *xe;
+	/** @gpuvm: base GPUVM used to track VMAs */
+	struct drm_gpuvm gpuvm;

-	struct kref refcount;
+	struct xe_device *xe;

 	/* engine used for (un)binding vma's */
 	struct xe_engine *eng[XE_MAX_TILES_PER_DEVICE];

-	/** Protects @rebind_list and the page-table structures */
-	struct dma_resv resv;
-
 	/** @lru_bulk_move: Bulk LRU move list for this VM's BOs */
 	struct ttm_lru_bulk_move lru_bulk_move;

 	u64 size;
-	struct rb_root vmas;

 	struct xe_pt *pt_root[XE_MAX_TILES_PER_DEVICE];
 	struct xe_bo *scratch_bo[XE_MAX_TILES_PER_DEVICE];
@@ -351,4 +307,99 @@ struct xe_vm {
 	bool batch_invalidate_tlb;
 };

+/** struct xe_vma_op_map - VMA map operation */
+struct xe_vma_op_map {
+	/** @vma: VMA to map */
+	struct xe_vma *vma;
+	/** @immediate: Immediate bind */
+	bool immediate;
+	/** @read_only: Read only */
+	bool read_only;
+	/** @is_null: is NULL binding */
+	bool is_null;
+};
+
+/** struct xe_vma_op_unmap - VMA unmap operation */
+struct xe_vma_op_unmap {
+	/** @start: start of the VMA unmap */
+	u64 start;
+	/** @range: range of the VMA unmap */
+	u64 range;
+};
+
+/** struct xe_vma_op_remap - VMA remap operation */
+struct xe_vma_op_remap {
+	/** @prev: VMA preceding part of a split mapping */
+	struct xe_vma *prev;
+	/** @next: VMA subsequent part of a split mapping */
+	struct xe_vma *next;
+	/** @start: start of the VMA unmap */
+	u64 start;
+	/** @range: range of the VMA unmap */
+	u64 range;
+	/** @unmap_done: unmap operation in done */
+	bool unmap_done;
+};
+
+/** struct xe_vma_op_prefetch - VMA prefetch operation */
+struct xe_vma_op_prefetch {
+	/** @region: memory region to prefetch to */
+	u32 region;
+};
+
+/** enum xe_vma_op_flags - flags for VMA operation */
+enum xe_vma_op_flags {
+	/** @XE_VMA_OP_FIRST: first VMA operation for a set of syncs */
+	XE_VMA_OP_FIRST		= (0x1 << 0),
+	/** @XE_VMA_OP_LAST: last VMA operation for a set of syncs */
+	XE_VMA_OP_LAST		= (0x1 << 1),
+	/** @XE_VMA_OP_COMMITTED: VMA operation committed */
+	XE_VMA_OP_COMMITTED	= (0x1 << 2),
+};
+
+/** struct xe_vma_op - VMA operation */
+struct xe_vma_op {
+	/** @base: GPUVA base operation */
+	struct drm_gpuva_op base;
+	/**
+	 * @ops: GPUVA ops, when set call drm_gpuva_ops_free after this
+	 * operations is processed
+	 */
+	struct drm_gpuva_ops *ops;
+	/** @engine: engine for this operation */
+	struct xe_engine *engine;
+	/**
+	 * @syncs: syncs for this operation, only used on first and last
+	 * operation
+	 */
+	struct xe_sync_entry *syncs;
+	/** @num_syncs: number of syncs */
+	u32 num_syncs;
+	/** @link: async operation link */
+	struct list_head link;
+	/**
+	 * @fence: async operation fence, signaled on last operation complete
+	 */
+	struct async_op_fence *fence;
+	/** @tile_mask: gt mask for this operation */
+	u64 tile_mask;
+	/** @flags: operation flags */
+	enum xe_vma_op_flags flags;
+
+#ifdef TEST_VM_ASYNC_OPS_ERROR
+	/** @inject_error: inject error to test async op error handling */
+	bool inject_error;
+#endif
+
+	union {
+		/** @map: VMA map operation specific data */
+		struct xe_vma_op_map map;
+		/** @unmap: VMA unmap operation specific data */
+		struct xe_vma_op_unmap unmap;
+		/** @remap: VMA remap operation specific data */
+		struct xe_vma_op_remap remap;
+		/** @prefetch: VMA prefetch operation specific data */
+		struct xe_vma_op_prefetch prefetch;
+	};
+};
 #endif