Commit bea5faf7 authored by Chris Wilson's avatar Chris Wilson

drm/i915/gtt: Tidy up ppgtt insertion for gen8

Apply the new radix shift helpers to extract the multi-level indices
cleanly when inserting pte into the gtt tree.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarAbdiel Janulgue <abdiel.janulgue@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190712112725.2892-5-chris@chris-wilson.co.uk
parent 8a98e839
...@@ -1131,47 +1131,28 @@ static inline struct sgt_dma { ...@@ -1131,47 +1131,28 @@ static inline struct sgt_dma {
return (struct sgt_dma) { sg, addr, addr + sg->length }; return (struct sgt_dma) { sg, addr, addr + sg->length };
} }
struct gen8_insert_pte { static __always_inline u64
u16 pml4e;
u16 pdpe;
u16 pde;
u16 pte;
};
static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
{
return (struct gen8_insert_pte) {
gen8_pml4e_index(start),
gen8_pdpe_index(start),
gen8_pde_index(start),
gen8_pte_index(start),
};
}
static __always_inline bool
gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt, gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
struct i915_page_directory *pdp, struct i915_page_directory *pdp,
struct sgt_dma *iter, struct sgt_dma *iter,
struct gen8_insert_pte *idx, u64 idx,
enum i915_cache_level cache_level, enum i915_cache_level cache_level,
u32 flags) u32 flags)
{ {
struct i915_page_directory *pd; struct i915_page_directory *pd;
const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
gen8_pte_t *vaddr; gen8_pte_t *vaddr;
bool ret;
GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm)); pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
pd = i915_pd_entry(pdp, idx->pdpe); vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
do { do {
vaddr[idx->pte] = pte_encode | iter->dma; vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
iter->dma += I915_GTT_PAGE_SIZE; iter->dma += I915_GTT_PAGE_SIZE;
if (iter->dma >= iter->max) { if (iter->dma >= iter->max) {
iter->sg = __sg_next(iter->sg); iter->sg = __sg_next(iter->sg);
if (!iter->sg) { if (!iter->sg) {
ret = false; idx = 0;
break; break;
} }
...@@ -1179,30 +1160,22 @@ gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt, ...@@ -1179,30 +1160,22 @@ gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
iter->max = iter->dma + iter->sg->length; iter->max = iter->dma + iter->sg->length;
} }
if (++idx->pte == GEN8_PTES) { if (gen8_pd_index(++idx, 0) == 0) {
idx->pte = 0; if (gen8_pd_index(idx, 1) == 0) {
if (++idx->pde == I915_PDES) {
idx->pde = 0;
/* Limited by sg length for 3lvl */ /* Limited by sg length for 3lvl */
if (++idx->pdpe == GEN8_PML4ES_PER_PML4) { if (gen8_pd_index(idx, 2) == 0)
idx->pdpe = 0;
ret = true;
break; break;
}
GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm)); pd = pdp->entry[gen8_pd_index(idx, 2)];
pd = pdp->entry[idx->pdpe];
} }
kunmap_atomic(vaddr); kunmap_atomic(vaddr);
vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde)); vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
} }
} while (1); } while (1);
kunmap_atomic(vaddr); kunmap_atomic(vaddr);
return ret; return idx;
} }
static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
...@@ -1212,9 +1185,9 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, ...@@ -1212,9 +1185,9 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
{ {
struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
struct sgt_dma iter = sgt_dma(vma); struct sgt_dma iter = sgt_dma(vma);
struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter, &idx, gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter,
vma->node.start >> GEN8_PTE_SHIFT,
cache_level, flags); cache_level, flags);
vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
...@@ -1231,39 +1204,38 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, ...@@ -1231,39 +1204,38 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
dma_addr_t rem = iter->sg->length; dma_addr_t rem = iter->sg->length;
do { do {
struct gen8_insert_pte idx = gen8_insert_pte(start);
struct i915_page_directory *pdp = struct i915_page_directory *pdp =
i915_pdp_entry(pml4, idx.pml4e); i915_pd_entry(pml4, __gen8_pte_index(start, 3));
struct i915_page_directory *pd = i915_pd_entry(pdp, idx.pdpe); struct i915_page_directory *pd =
unsigned int page_size; i915_pd_entry(pdp, __gen8_pte_index(start, 2));
bool maybe_64K = false;
gen8_pte_t encode = pte_encode; gen8_pte_t encode = pte_encode;
unsigned int maybe_64K = -1;
unsigned int page_size;
gen8_pte_t *vaddr; gen8_pte_t *vaddr;
u16 index, max; u16 index;
if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M && if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) { rem >= I915_GTT_PAGE_SIZE_2M &&
index = idx.pde; !__gen8_pte_index(start, 0)) {
max = I915_PDES; index = __gen8_pte_index(start, 1);
page_size = I915_GTT_PAGE_SIZE_2M;
encode |= GEN8_PDE_PS_2M; encode |= GEN8_PDE_PS_2M;
page_size = I915_GTT_PAGE_SIZE_2M;
vaddr = kmap_atomic_px(pd); vaddr = kmap_atomic_px(pd);
} else { } else {
struct i915_page_table *pt = i915_pt_entry(pd, idx.pde); struct i915_page_table *pt =
i915_pt_entry(pd, __gen8_pte_index(start, 1));
index = idx.pte; index = __gen8_pte_index(start, 0);
max = GEN8_PTES;
page_size = I915_GTT_PAGE_SIZE; page_size = I915_GTT_PAGE_SIZE;
if (!index && if (!index &&
vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
(IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
rem >= (max - index) * I915_GTT_PAGE_SIZE)) rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
maybe_64K = true; maybe_64K = __gen8_pte_index(start, 1);
vaddr = kmap_atomic_px(pt); vaddr = kmap_atomic_px(pt);
} }
...@@ -1284,16 +1256,16 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, ...@@ -1284,16 +1256,16 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
iter->dma = sg_dma_address(iter->sg); iter->dma = sg_dma_address(iter->sg);
iter->max = iter->dma + rem; iter->max = iter->dma + rem;
if (maybe_64K && index < max && if (maybe_64K != -1 && index < I915_PDES &&
!(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
(IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
rem >= (max - index) * I915_GTT_PAGE_SIZE))) rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
maybe_64K = false; maybe_64K = -1;
if (unlikely(!IS_ALIGNED(iter->dma, page_size))) if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
break; break;
} }
} while (rem >= page_size && index < max); } while (rem >= page_size && index < I915_PDES);
kunmap_atomic(vaddr); kunmap_atomic(vaddr);
...@@ -1303,14 +1275,14 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, ...@@ -1303,14 +1275,14 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
* it and have reached the end of the sg table and we have * it and have reached the end of the sg table and we have
* enough padding. * enough padding.
*/ */
if (maybe_64K && if (maybe_64K != -1 &&
(index == max || (index == I915_PDES ||
(i915_vm_has_scratch_64K(vma->vm) && (i915_vm_has_scratch_64K(vma->vm) &&
!iter->sg && IS_ALIGNED(vma->node.start + !iter->sg && IS_ALIGNED(vma->node.start +
vma->node.size, vma->node.size,
I915_GTT_PAGE_SIZE_2M)))) { I915_GTT_PAGE_SIZE_2M)))) {
vaddr = kmap_atomic_px(pd); vaddr = kmap_atomic_px(pd);
vaddr[idx.pde] |= GEN8_PDE_IPS_64K; vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
kunmap_atomic(vaddr); kunmap_atomic(vaddr);
page_size = I915_GTT_PAGE_SIZE_64K; page_size = I915_GTT_PAGE_SIZE_64K;
...@@ -1327,8 +1299,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, ...@@ -1327,8 +1299,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
u16 i; u16 i;
encode = vma->vm->scratch[0].encode; encode = vma->vm->scratch[0].encode;
vaddr = kmap_atomic_px(i915_pt_entry(pd, vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
idx.pde));
for (i = 1; i < index; i += 16) for (i = 1; i < index; i += 16)
memset64(vaddr + i, encode, 15); memset64(vaddr + i, encode, 15);
...@@ -1354,13 +1325,13 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, ...@@ -1354,13 +1325,13 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
gen8_ppgtt_insert_huge_entries(vma, pml4, &iter, cache_level, gen8_ppgtt_insert_huge_entries(vma, pml4, &iter, cache_level,
flags); flags);
} else { } else {
struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
while (gen8_ppgtt_insert_pte_entries(ppgtt, while ((idx = gen8_ppgtt_insert_pte_entries(ppgtt,
i915_pdp_entry(pml4, idx.pml4e++), i915_pd_entry(pml4, gen8_pd_index(idx, 3)),
&iter, &idx, cache_level, &iter, idx, cache_level,
flags)) flags)))
GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); ;
vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
} }
......
...@@ -115,29 +115,18 @@ typedef u64 gen8_pte_t; ...@@ -115,29 +115,18 @@ typedef u64 gen8_pte_t;
#define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0)) #define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0))
#define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) #define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr)
/* GEN8 32b style address is defined as a 3 level page table: /*
* GEN8 32b style address is defined as a 3 level page table:
* 31:30 | 29:21 | 20:12 | 11:0 * 31:30 | 29:21 | 20:12 | 11:0
* PDPE | PDE | PTE | offset * PDPE | PDE | PTE | offset
* The difference as compared to normal x86 3 level page table is the PDPEs are * The difference as compared to normal x86 3 level page table is the PDPEs are
* programmed via register. * programmed via register.
*/ *
#define GEN8_3LVL_PDPES 4 * GEN8 48b style address is defined as a 4 level page table:
#define GEN8_PDE_SHIFT 21
#define GEN8_PDE_MASK 0x1ff
#define GEN8_PTE_MASK 0x1ff
#define GEN8_PTES I915_PTES(sizeof(gen8_pte_t))
/* GEN8 48b style address is defined as a 4 level page table:
* 47:39 | 38:30 | 29:21 | 20:12 | 11:0 * 47:39 | 38:30 | 29:21 | 20:12 | 11:0
* PML4E | PDPE | PDE | PTE | offset * PML4E | PDPE | PDE | PTE | offset
*/ */
#define GEN8_PML4ES_PER_PML4 512 #define GEN8_3LVL_PDPES 4
#define GEN8_PML4E_SHIFT 39
#define GEN8_PML4E_MASK (GEN8_PML4ES_PER_PML4 - 1)
#define GEN8_PDPE_SHIFT 30
/* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page
* tables */
#define GEN8_PDPE_MASK 0x1ff
#define PPAT_UNCACHED (_PAGE_PWT | _PAGE_PCD) #define PPAT_UNCACHED (_PAGE_PWT | _PAGE_PCD)
#define PPAT_CACHED_PDE 0 /* WB LLC */ #define PPAT_CACHED_PDE 0 /* WB LLC */
...@@ -521,15 +510,6 @@ static inline u32 gen6_pde_index(u32 addr) ...@@ -521,15 +510,6 @@ static inline u32 gen6_pde_index(u32 addr)
return i915_pde_index(addr, GEN6_PDE_SHIFT); return i915_pde_index(addr, GEN6_PDE_SHIFT);
} }
static inline unsigned int
i915_pdpes_per_pdp(const struct i915_address_space *vm)
{
if (i915_vm_is_4lvl(vm))
return GEN8_PML4ES_PER_PML4;
return GEN8_3LVL_PDPES;
}
static inline struct i915_page_table * static inline struct i915_page_table *
i915_pt_entry(const struct i915_page_directory * const pd, i915_pt_entry(const struct i915_page_directory * const pd,
const unsigned short n) const unsigned short n)
...@@ -544,66 +524,6 @@ i915_pd_entry(const struct i915_page_directory * const pdp, ...@@ -544,66 +524,6 @@ i915_pd_entry(const struct i915_page_directory * const pdp,
return pdp->entry[n]; return pdp->entry[n];
} }
static inline struct i915_page_directory *
i915_pdp_entry(const struct i915_page_directory * const pml4,
const unsigned short n)
{
return pml4->entry[n];
}
/* Equivalent to the gen6 version, For each pde iterates over every pde
* between from start until start + length. On gen8+ it simply iterates
* over every page directory entry in a page directory.
*/
#define gen8_for_each_pde(pt, pd, start, length, iter) \
for (iter = gen8_pde_index(start); \
length > 0 && iter < I915_PDES && \
(pt = i915_pt_entry(pd, iter), true); \
({ u64 temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT); \
temp = min(temp - start, length); \
start += temp, length -= temp; }), ++iter)
#define gen8_for_each_pdpe(pd, pdp, start, length, iter) \
for (iter = gen8_pdpe_index(start); \
length > 0 && iter < i915_pdpes_per_pdp(vm) && \
(pd = i915_pd_entry(pdp, iter), true); \
({ u64 temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT); \
temp = min(temp - start, length); \
start += temp, length -= temp; }), ++iter)
#define gen8_for_each_pml4e(pdp, pml4, start, length, iter) \
for (iter = gen8_pml4e_index(start); \
length > 0 && iter < GEN8_PML4ES_PER_PML4 && \
(pdp = i915_pdp_entry(pml4, iter), true); \
({ u64 temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT); \
temp = min(temp - start, length); \
start += temp, length -= temp; }), ++iter)
static inline u32 gen8_pte_index(u64 address)
{
return i915_pte_index(address, GEN8_PDE_SHIFT);
}
static inline u32 gen8_pde_index(u64 address)
{
return i915_pde_index(address, GEN8_PDE_SHIFT);
}
static inline u32 gen8_pdpe_index(u64 address)
{
return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
}
static inline u32 gen8_pml4e_index(u64 address)
{
return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK;
}
static inline u64 gen8_pte_count(u64 address, u64 length)
{
return i915_pte_count(address, length, GEN8_PDE_SHIFT);
}
static inline dma_addr_t static inline dma_addr_t
i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n) i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment