Commit 4c2be3c5 authored by Chris Wilson's avatar Chris Wilson

drm/i915/gtt: Recursive ppgtt clear for gen8

With an explicit level, we can refactor the separate clear functions
as a simple recursive function. The additional knowledge of the level
allows us to spot when we can free an entire subtree at once.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarAbdiel Janulgue <abdiel.janulgue@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190712112725.2892-3-chris@chris-wilson.co.uk
parent 1eda701e
...@@ -94,6 +94,20 @@ config DRM_I915_TRACE_GEM ...@@ -94,6 +94,20 @@ config DRM_I915_TRACE_GEM
If in doubt, say "N". If in doubt, say "N".
config DRM_I915_TRACE_GTT
bool "Insert extra ftrace output from the GTT internals"
depends on DRM_I915_DEBUG_GEM
select TRACING
default n
help
Enable additional and verbose debugging output that will spam
ordinary tests, but may be vital for post-mortem debugging when
used with /proc/sys/kernel/ftrace_dump_on_oops
Recommended for driver developers only.
If in doubt, say "N".
config DRM_I915_SW_FENCE_DEBUG_OBJECTS config DRM_I915_SW_FENCE_DEBUG_OBJECTS
bool "Enable additional driver debugging for fence objects" bool "Enable additional driver debugging for fence objects"
depends on DRM_I915 depends on DRM_I915
......
...@@ -46,6 +46,12 @@ ...@@ -46,6 +46,12 @@
#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT)
#define DBG(...) trace_printk(__VA_ARGS__)
#else
#define DBG(...)
#endif
/** /**
* DOC: Global GTT views * DOC: Global GTT views
* *
...@@ -796,6 +802,9 @@ release_pd_entry(struct i915_page_directory * const pd, ...@@ -796,6 +802,9 @@ release_pd_entry(struct i915_page_directory * const pd,
{ {
bool free = false; bool free = false;
if (atomic_add_unless(&pt->used, -1, 1))
return false;
spin_lock(&pd->lock); spin_lock(&pd->lock);
if (atomic_dec_and_test(&pt->used)) { if (atomic_dec_and_test(&pt->used)) {
clear_pd_entry(pd, idx, scratch); clear_pd_entry(pd, idx, scratch);
...@@ -927,86 +936,101 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) ...@@ -927,86 +936,101 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
free_scratch(vm); free_scratch(vm);
} }
/* Removes entries from a single page table, releasing it if it's empty. static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
* Caller can use the return value to update higher-level entries. struct i915_page_directory * const pd,
*/ u64 start, const u64 end, int lvl)
static void gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
struct i915_page_table *pt,
u64 start, u64 length)
{ {
const unsigned int num_entries = gen8_pte_count(start, length); const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
gen8_pte_t *vaddr; unsigned int idx, len;
len = gen8_pd_range(start, end, lvl--, &idx);
DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d}\n",
__func__, vm, lvl + 1, start, end,
idx, len, atomic_read(px_used(pd)));
GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
do {
struct i915_page_table *pt = pd->entry[idx];
if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
gen8_pd_contains(start, end, lvl)) {
DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
__func__, vm, lvl + 1, idx, start, end);
clear_pd_entry(pd, idx, scratch);
__gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
start += (u64)I915_PDES << gen8_pd_shift(lvl);
continue;
}
if (lvl) {
start = __gen8_ppgtt_clear(vm, as_pd(pt),
start, end, lvl);
} else {
unsigned int count;
u64 *vaddr;
count = gen8_pt_count(start, end);
DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d} removing pte\n",
__func__, vm, lvl, start, end,
gen8_pd_index(start, 0), count,
atomic_read(&pt->used));
GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
vaddr = kmap_atomic_px(pt); vaddr = kmap_atomic_px(pt);
memset64(vaddr + gen8_pte_index(start), memset64(vaddr + gen8_pd_index(start, 0),
vm->scratch[0].encode, vm->scratch[0].encode,
num_entries); count);
kunmap_atomic(vaddr); kunmap_atomic(vaddr);
GEM_BUG_ON(num_entries > atomic_read(&pt->used)); atomic_sub(count, &pt->used);
start += count;
}
atomic_sub(num_entries, &pt->used); if (release_pd_entry(pd, idx, pt, scratch))
free_px(vm, pt);
} while (idx++, --len);
return start;
} }
static void gen8_ppgtt_clear_pd(struct i915_address_space *vm, static void gen8_ppgtt_clear(struct i915_address_space *vm,
struct i915_page_directory *pd,
u64 start, u64 length) u64 start, u64 length)
{ {
struct i915_page_table *pt; GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
u32 pde; GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
gen8_for_each_pde(pt, pd, start, length, pde) { start >>= GEN8_PTE_SHIFT;
atomic_inc(&pt->used); length >>= GEN8_PTE_SHIFT;
gen8_ppgtt_clear_pt(vm, pt, start, length); GEM_BUG_ON(length == 0);
if (release_pd_entry(pd, pde, pt, &vm->scratch[1]))
free_px(vm, pt); __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
} start, start + length, vm->top);
} }
/* Removes entries from a single page dir pointer, releasing it if it's empty. static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
* Caller can use the return value to update higher-level entries struct i915_page_directory *pd,
*/
static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
struct i915_page_directory * const pdp,
u64 start, u64 length) u64 start, u64 length)
{ {
struct i915_page_directory *pd; GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
unsigned int pdpe; GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { start >>= GEN8_PTE_SHIFT;
atomic_inc(px_used(pd)); length >>= GEN8_PTE_SHIFT;
gen8_ppgtt_clear_pd(vm, pd, start, length);
if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2]))
free_px(vm, pd);
}
}
static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm, __gen8_ppgtt_clear(vm, pd, start, start + length, 1);
u64 start, u64 length)
{
gen8_ppgtt_clear_pdp(vm, i915_vm_to_ppgtt(vm)->pd, start, length);
} }
/* Removes entries from a single pml4. static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
* This is the top-level structure in 4-level page tables used on gen8+. struct i915_page_directory * const pdp,
* Empty entries are always scratch pml4e.
*/
static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
u64 start, u64 length) u64 start, u64 length)
{ {
struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
struct i915_page_directory * const pml4 = ppgtt->pd; GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
struct i915_page_directory *pdp;
unsigned int pml4e;
GEM_BUG_ON(!i915_vm_is_4lvl(vm)); start >>= GEN8_PTE_SHIFT;
length >>= GEN8_PTE_SHIFT;
gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { __gen8_ppgtt_clear(vm, pdp, start, start + length, 2);
atomic_inc(px_used(pdp));
gen8_ppgtt_clear_pdp(vm, pdp, start, length);
if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3]))
free_px(vm, pdp);
}
} }
static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
...@@ -1171,7 +1195,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, ...@@ -1171,7 +1195,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3])) if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3]))
free_px(vm, pdp); free_px(vm, pdp);
unwind: unwind:
gen8_ppgtt_clear_4lvl(vm, from, start - from); gen8_ppgtt_clear(vm, from, start - from);
out: out:
if (alloc) if (alloc)
free_px(vm, alloc); free_px(vm, alloc);
...@@ -1484,6 +1508,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) ...@@ -1484,6 +1508,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
fill_px(pd, vm->scratch[1].encode); fill_px(pd, vm->scratch[1].encode);
set_pd_entry(pdp, pdpe, pd); set_pd_entry(pdp, pdpe, pd);
atomic_inc(px_used(pd)); /* keep pinned */
} }
return 0; return 0;
...@@ -1524,6 +1549,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm) ...@@ -1524,6 +1549,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
} }
fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count); fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
atomic_inc(px_used(pd)); /* mark as pinned */
return pd; return pd;
} }
...@@ -1573,7 +1599,6 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) ...@@ -1573,7 +1599,6 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
if (i915_vm_is_4lvl(&ppgtt->vm)) { if (i915_vm_is_4lvl(&ppgtt->vm)) {
ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl; ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl;
ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl; ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl;
ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl;
} else { } else {
if (intel_vgpu_active(i915)) { if (intel_vgpu_active(i915)) {
err = gen8_preallocate_top_level_pdp(ppgtt); err = gen8_preallocate_top_level_pdp(ppgtt);
...@@ -1583,9 +1608,10 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) ...@@ -1583,9 +1608,10 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl; ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl;
ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl; ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl;
ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl;
} }
ppgtt->vm.clear_range = gen8_ppgtt_clear;
if (intel_vgpu_active(i915)) if (intel_vgpu_active(i915))
gen8_ppgtt_notify_vgt(ppgtt, true); gen8_ppgtt_notify_vgt(ppgtt, true);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment