Commit 731f74c5 authored by Dave Gordon's avatar Dave Gordon Committed by Tvrtko Ursulin

drm/i915: tweak gen6_for_{each_pde, all_pdes} macros

Gen8 versions of these macros were updated a few months ago
(e8ebd8e2 drm/i915: eliminate 'temp' in gen8_for_each macros)
originally because at least one iterator could generate an
out of bounds access, but also because eliminating the 'temp'
parameter generated smaller and faster code.

Matthew Auld recently noticed the same problem with the gen6
versions and provided a patch
https://lists.freedesktop.org/archives/intel-gfx/2016-June/099334.html
but while we're changing these, we might as well make them as
much like the gen8 versions as possible, including the style
of using "&& (..., true)" rather than ": (..., 1) : 0", and
of course eliminating the redundant 'temp'.

Furthermore, the "all_pdes" version is only used in one place,
so we can improve code efficiency by changing both the macro
parameters and the calling code to reduce extra dereferences.
Signed-off-by: default avatarDave Gordon <david.s.gordon@intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarMatthew Auld <matthew.auld@intel.com>
Signed-off-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1466793466-23500-1-git-send-email-david.s.gordon@intel.com
parent 883445d4
...@@ -1570,13 +1570,13 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) ...@@ -1570,13 +1570,13 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
struct i915_page_table *unused; struct i915_page_table *unused;
gen6_pte_t scratch_pte; gen6_pte_t scratch_pte;
uint32_t pd_entry; uint32_t pd_entry;
uint32_t pte, pde, temp; uint32_t pte, pde;
uint32_t start = ppgtt->base.start, length = ppgtt->base.total; uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
I915_CACHE_LLC, true, 0); I915_CACHE_LLC, true, 0);
gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) { gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) {
u32 expected; u32 expected;
gen6_pte_t *pt_vaddr; gen6_pte_t *pt_vaddr;
const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]); const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
...@@ -1640,9 +1640,9 @@ static void gen6_write_page_range(struct drm_i915_private *dev_priv, ...@@ -1640,9 +1640,9 @@ static void gen6_write_page_range(struct drm_i915_private *dev_priv,
{ {
struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct i915_page_table *pt; struct i915_page_table *pt;
uint32_t pde, temp; uint32_t pde;
gen6_for_each_pde(pt, pd, start, length, temp, pde) gen6_for_each_pde(pt, pd, start, length, pde)
gen6_write_pde(pd, pde, pt); gen6_write_pde(pd, pde, pt);
/* Make sure write is complete before other code can use this page /* Make sure write is complete before other code can use this page
...@@ -1875,7 +1875,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, ...@@ -1875,7 +1875,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
struct i915_page_table *pt; struct i915_page_table *pt;
uint32_t start, length, start_save, length_save; uint32_t start, length, start_save, length_save;
uint32_t pde, temp; uint32_t pde;
int ret; int ret;
if (WARN_ON(start_in + length_in > ppgtt->base.total)) if (WARN_ON(start_in + length_in > ppgtt->base.total))
...@@ -1891,7 +1891,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, ...@@ -1891,7 +1891,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
* need allocation. The second stage marks use ptes within the page * need allocation. The second stage marks use ptes within the page
* tables. * tables.
*/ */
gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) { gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
if (pt != vm->scratch_pt) { if (pt != vm->scratch_pt) {
WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
continue; continue;
...@@ -1916,7 +1916,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, ...@@ -1916,7 +1916,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
start = start_save; start = start_save;
length = length_save; length = length_save;
gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) { gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
bitmap_zero(tmp_bitmap, GEN6_PTES); bitmap_zero(tmp_bitmap, GEN6_PTES);
...@@ -1985,15 +1985,16 @@ static void gen6_free_scratch(struct i915_address_space *vm) ...@@ -1985,15 +1985,16 @@ static void gen6_free_scratch(struct i915_address_space *vm)
static void gen6_ppgtt_cleanup(struct i915_address_space *vm) static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
{ {
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
struct i915_page_directory *pd = &ppgtt->pd;
struct drm_device *dev = vm->dev;
struct i915_page_table *pt; struct i915_page_table *pt;
uint32_t pde; uint32_t pde;
drm_mm_remove_node(&ppgtt->node); drm_mm_remove_node(&ppgtt->node);
gen6_for_all_pdes(pt, ppgtt, pde) { gen6_for_all_pdes(pt, pd, pde)
if (pt != vm->scratch_pt) if (pt != vm->scratch_pt)
free_pt(ppgtt->base.dev, pt); free_pt(dev, pt);
}
gen6_free_scratch(vm); gen6_free_scratch(vm);
} }
...@@ -2059,9 +2060,9 @@ static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, ...@@ -2059,9 +2060,9 @@ static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
uint64_t start, uint64_t length) uint64_t start, uint64_t length)
{ {
struct i915_page_table *unused; struct i915_page_table *unused;
uint32_t pde, temp; uint32_t pde;
gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde)
ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
} }
......
...@@ -390,27 +390,27 @@ struct i915_hw_ppgtt { ...@@ -390,27 +390,27 @@ struct i915_hw_ppgtt {
void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
}; };
/* For each pde iterates over every pde between from start until start + length. /*
* If start, and start+length are not perfectly divisible, the macro will round * gen6_for_each_pde() iterates over every pde from start until start+length.
* down, and up as needed. The macro modifies pde, start, and length. Dev is * If start and start+length are not perfectly divisible, the macro will round
* only used to differentiate shift values. Temp is temp. On gen6/7, start = 0, * down and up as needed. Start=0 and length=2G effectively iterates over
* and length = 2G effectively iterates over every PDE in the system. * every PDE in the system. The macro modifies ALL its parameters except 'pd',
* * so each of the other parameters should preferably be a simple variable, or
* XXX: temp is not actually needed, but it saves doing the ALIGN operation. * at most an lvalue with no side-effects!
*/ */
#define gen6_for_each_pde(pt, pd, start, length, temp, iter) \ #define gen6_for_each_pde(pt, pd, start, length, iter) \
for (iter = gen6_pde_index(start); \ for (iter = gen6_pde_index(start); \
length > 0 && iter < I915_PDES ? \ length > 0 && iter < I915_PDES && \
(pt = (pd)->page_table[iter]), 1 : 0; \ (pt = (pd)->page_table[iter], true); \
iter++, \ ({ u32 temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT); \
temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT) - start, \ temp = min(temp - start, length); \
temp = min_t(unsigned, temp, length), \ start += temp, length -= temp; }), ++iter)
start += temp, length -= temp)
#define gen6_for_all_pdes(pt, pd, iter) \
#define gen6_for_all_pdes(pt, ppgtt, iter) \ for (iter = 0; \
for (iter = 0; \ iter < I915_PDES && \
pt = ppgtt->pd.page_table[iter], iter < I915_PDES; \ (pt = (pd)->page_table[iter], true); \
iter++) ++iter)
static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift) static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment