Commit e5815a2e authored by Michel Thierry's avatar Michel Thierry Committed by Daniel Vetter

drm/i915/gen8: Split out mappings

When we do dynamic page table allocations for gen8, we'll need to have
more control over how and when we map page tables, similar to gen6.
In particular, DMA mappings for page directories/tables occur at allocation
time.

This patch adds the functionality and calls it at init, which should
have no functional change.

The PDPEs are still a special case for now. We'll need a function for
that in the future as well.

v2: Handle renamed unmap_and_free_page functions.
v3: Updated after teardown_va logic was removed.
v4: Rebase after s/page_tables/page_table/.
v5: No longer allocate all PDPs in GEN8+ systems with less than 4GB of
memory, and update populate_lr_context to handle this new case (proper
tracking will be added later in the patch series).
v6: Assign lrc page directory pointer addresses using a macro. (Mika)

Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: default avatarBen Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+)
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
parent c488dbba
...@@ -333,7 +333,7 @@ static void unmap_and_free_pt(struct i915_page_table *pt, ...@@ -333,7 +333,7 @@ static void unmap_and_free_pt(struct i915_page_table *pt,
} }
static void gen8_initialize_pt(struct i915_address_space *vm, static void gen8_initialize_pt(struct i915_address_space *vm,
struct i915_page_table *pt) struct i915_page_table *pt)
{ {
gen8_pte_t *pt_vaddr, scratch_pte; gen8_pte_t *pt_vaddr, scratch_pte;
int i; int i;
...@@ -431,17 +431,20 @@ static int alloc_pt_range(struct i915_page_directory *pd, uint16_t pde, size_t c ...@@ -431,17 +431,20 @@ static int alloc_pt_range(struct i915_page_directory *pd, uint16_t pde, size_t c
return ret; return ret;
} }
static void unmap_and_free_pd(struct i915_page_directory *pd) static void unmap_and_free_pd(struct i915_page_directory *pd,
struct drm_device *dev)
{ {
if (pd->page) { if (pd->page) {
i915_dma_unmap_single(pd, dev);
__free_page(pd->page); __free_page(pd->page);
kfree(pd); kfree(pd);
} }
} }
static struct i915_page_directory *alloc_pd_single(void) static struct i915_page_directory *alloc_pd_single(struct drm_device *dev)
{ {
struct i915_page_directory *pd; struct i915_page_directory *pd;
int ret;
pd = kzalloc(sizeof(*pd), GFP_KERNEL); pd = kzalloc(sizeof(*pd), GFP_KERNEL);
if (!pd) if (!pd)
...@@ -453,6 +456,13 @@ static struct i915_page_directory *alloc_pd_single(void) ...@@ -453,6 +456,13 @@ static struct i915_page_directory *alloc_pd_single(void)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
ret = i915_dma_map_single(pd, dev);
if (ret) {
__free_page(pd->page);
kfree(pd);
return ERR_PTR(ret);
}
return pd; return pd;
} }
...@@ -637,6 +647,27 @@ static void gen8_initialize_pd(struct i915_address_space *vm, ...@@ -637,6 +647,27 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
kunmap_atomic(page_directory); kunmap_atomic(page_directory);
} }
/* It's likely we'll map more than one pagetable at a time. This function will
* save us unnecessary kmap calls, but do no more functionally than multiple
* calls to map_pt. */
static void gen8_map_pagetable_range(struct i915_page_directory *pd,
uint64_t start,
uint64_t length,
struct drm_device *dev)
{
gen8_pde_t *page_directory = kmap_atomic(pd->page);
struct i915_page_table *pt;
uint64_t temp, pde;
gen8_for_each_pde(pt, pd, start, length, temp, pde)
__gen8_do_map_pt(page_directory + pde, pt, dev);
if (!HAS_LLC(dev))
drm_clflush_virt_range(page_directory, PAGE_SIZE);
kunmap_atomic(page_directory);
}
static void gen8_free_page_tables(struct i915_page_directory *pd, struct drm_device *dev) static void gen8_free_page_tables(struct i915_page_directory *pd, struct drm_device *dev)
{ {
int i; int i;
...@@ -662,10 +693,10 @@ static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) ...@@ -662,10 +693,10 @@ static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
continue; continue;
gen8_free_page_tables(ppgtt->pdp.page_directory[i], ppgtt->base.dev); gen8_free_page_tables(ppgtt->pdp.page_directory[i], ppgtt->base.dev);
unmap_and_free_pd(ppgtt->pdp.page_directory[i]); unmap_and_free_pd(ppgtt->pdp.page_directory[i], ppgtt->base.dev);
} }
unmap_and_free_pd(ppgtt->scratch_pd); unmap_and_free_pd(ppgtt->scratch_pd, ppgtt->base.dev);
unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev); unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev);
} }
...@@ -677,41 +708,30 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) ...@@ -677,41 +708,30 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
gen8_ppgtt_free(ppgtt); gen8_ppgtt_free(ppgtt);
} }
static int gen8_ppgtt_alloc_pagetabs(struct i915_page_directory *pd, static int gen8_ppgtt_alloc_pagetabs(struct i915_hw_ppgtt *ppgtt,
struct i915_page_directory *pd,
uint64_t start, uint64_t start,
uint64_t length, uint64_t length)
struct i915_address_space *vm)
{ {
struct drm_device *dev = ppgtt->base.dev;
struct i915_page_table *unused; struct i915_page_table *unused;
uint64_t temp; uint64_t temp;
uint32_t pde; uint32_t pde;
gen8_for_each_pde(unused, pd, start, length, temp, pde) { gen8_for_each_pde(unused, pd, start, length, temp, pde) {
WARN_ON(unused); WARN_ON(unused);
pd->page_table[pde] = alloc_pt_single(vm->dev); pd->page_table[pde] = alloc_pt_single(dev);
if (IS_ERR(pd->page_table[pde])) if (IS_ERR(pd->page_table[pde]))
goto unwind_out; goto unwind_out;
gen8_initialize_pt(vm, pd->page_table[pde]); gen8_initialize_pt(&ppgtt->base, pd->page_table[pde]);
}
/* XXX: Still alloc all page tables in systems with less than
* 4GB of memory. This won't be needed after a subsequent patch.
*/
while (pde < I915_PDES) {
pd->page_table[pde] = alloc_pt_single(vm->dev);
if (IS_ERR(pd->page_table[pde]))
goto unwind_out;
gen8_initialize_pt(vm, pd->page_table[pde]);
pde++;
} }
return 0; return 0;
unwind_out: unwind_out:
while (pde--) while (pde--)
unmap_and_free_pt(pd->page_table[pde], vm->dev); unmap_and_free_pt(pd->page_table[pde], dev);
return -ENOMEM; return -ENOMEM;
} }
...@@ -721,6 +741,7 @@ static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt, ...@@ -721,6 +741,7 @@ static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt,
uint64_t start, uint64_t start,
uint64_t length) uint64_t length)
{ {
struct drm_device *dev = ppgtt->base.dev;
struct i915_page_directory *unused; struct i915_page_directory *unused;
uint64_t temp; uint64_t temp;
uint32_t pdpe; uint32_t pdpe;
...@@ -730,40 +751,29 @@ static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt, ...@@ -730,40 +751,29 @@ static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt,
gen8_for_each_pdpe(unused, pdp, start, length, temp, pdpe) { gen8_for_each_pdpe(unused, pdp, start, length, temp, pdpe) {
WARN_ON(unused); WARN_ON(unused);
pdp->page_directory[pdpe] = alloc_pd_single(); pdp->page_directory[pdpe] = alloc_pd_single(dev);
if (IS_ERR(pdp->page_directory[pdpe]))
goto unwind_out;
gen8_initialize_pd(&ppgtt->base,
ppgtt->pdp.page_directory[pdpe]);
}
/* XXX: Still alloc all page directories in systems with less than
* 4GB of memory. This won't be needed after a subsequent patch.
*/
while (pdpe < GEN8_LEGACY_PDPES) {
pdp->page_directory[pdpe] = alloc_pd_single();
if (IS_ERR(pdp->page_directory[pdpe])) if (IS_ERR(pdp->page_directory[pdpe]))
goto unwind_out; goto unwind_out;
gen8_initialize_pd(&ppgtt->base, gen8_initialize_pd(&ppgtt->base,
ppgtt->pdp.page_directory[pdpe]); ppgtt->pdp.page_directory[pdpe]);
pdpe++;
} }
return 0; return 0;
unwind_out: unwind_out:
while (pdpe--) while (pdpe--)
unmap_and_free_pd(pdp->page_directory[pdpe]); unmap_and_free_pd(pdp->page_directory[pdpe], dev);
return -ENOMEM; return -ENOMEM;
} }
static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt, static int gen8_alloc_va_range(struct i915_address_space *vm,
uint64_t start, uint64_t start,
uint64_t length) uint64_t length)
{ {
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
struct i915_page_directory *pd; struct i915_page_directory *pd;
uint64_t temp; uint64_t temp;
uint32_t pdpe; uint32_t pdpe;
...@@ -774,23 +784,9 @@ static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt, ...@@ -774,23 +784,9 @@ static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt,
return ret; return ret;
gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) { gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
ret = gen8_ppgtt_alloc_pagetabs(pd, start, length, ret = gen8_ppgtt_alloc_pagetabs(ppgtt, pd, start, length);
&ppgtt->base);
if (ret)
goto err_out;
}
/* XXX: We allocated all page directories in systems with less than
* 4GB of memory. So initalize page tables of all PDPs.
* This won't be needed after the next patch.
*/
while (pdpe < GEN8_LEGACY_PDPES) {
ret = gen8_ppgtt_alloc_pagetabs(ppgtt->pdp.page_directory[pdpe], start, length,
&ppgtt->base);
if (ret) if (ret)
goto err_out; goto err_out;
pdpe++;
} }
return 0; return 0;
...@@ -800,136 +796,54 @@ static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt, ...@@ -800,136 +796,54 @@ static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt,
return ret; return ret;
} }
static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt,
const int pd)
{
dma_addr_t pd_addr;
int ret;
pd_addr = pci_map_page(ppgtt->base.dev->pdev,
ppgtt->pdp.page_directory[pd]->page, 0,
PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr);
if (ret)
return ret;
ppgtt->pdp.page_directory[pd]->daddr = pd_addr;
return 0;
}
static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt,
const int pd,
const int pt)
{
dma_addr_t pt_addr;
struct i915_page_directory *pdir = ppgtt->pdp.page_directory[pd];
struct i915_page_table *ptab = pdir->page_table[pt];
struct page *p = ptab->page;
int ret;
gen8_initialize_pt(&ppgtt->base, ptab);
pt_addr = pci_map_page(ppgtt->base.dev->pdev,
p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr);
if (ret)
return ret;
ptab->daddr = pt_addr;
return 0;
}
/* /*
* GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
* with a net effect resembling a 2-level page table in normal x86 terms. Each * with a net effect resembling a 2-level page table in normal x86 terms. Each
* PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
* space. * space.
* *
* FIXME: split allocation into smaller pieces. For now we only ever do this
* once, but with full PPGTT, the multiple contiguous allocations will be bad.
* TODO: Do something with the size parameter
*/ */
static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
{ {
int i, j, ret; struct i915_page_directory *pd;
uint64_t temp, start = 0;
if (size % (1<<30)) const uint64_t orig_length = size;
DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size); uint32_t pdpe;
int ret;
ppgtt->base.start = 0; ppgtt->base.start = 0;
ppgtt->base.total = size; ppgtt->base.total = size;
ppgtt->base.clear_range = gen8_ppgtt_clear_range;
ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
ppgtt->base.cleanup = gen8_ppgtt_cleanup;
ppgtt->switch_mm = gen8_mm_switch;
ppgtt->scratch_pt = alloc_pt_single(ppgtt->base.dev); ppgtt->scratch_pt = alloc_pt_single(ppgtt->base.dev);
if (IS_ERR(ppgtt->scratch_pt)) if (IS_ERR(ppgtt->scratch_pt))
return PTR_ERR(ppgtt->scratch_pt); return PTR_ERR(ppgtt->scratch_pt);
ppgtt->scratch_pd = alloc_pd_single(); ppgtt->scratch_pd = alloc_pd_single(ppgtt->base.dev);
if (IS_ERR(ppgtt->scratch_pd)) if (IS_ERR(ppgtt->scratch_pd))
return PTR_ERR(ppgtt->scratch_pd); return PTR_ERR(ppgtt->scratch_pd);
gen8_initialize_pt(&ppgtt->base, ppgtt->scratch_pt); gen8_initialize_pt(&ppgtt->base, ppgtt->scratch_pt);
gen8_initialize_pd(&ppgtt->base, ppgtt->scratch_pd); gen8_initialize_pd(&ppgtt->base, ppgtt->scratch_pd);
/* 1. Do all our allocations for page directories and page tables. */ ret = gen8_alloc_va_range(&ppgtt->base, start, size);
ret = gen8_ppgtt_alloc(ppgtt, ppgtt->base.start, ppgtt->base.total);
if (ret) { if (ret) {
unmap_and_free_pd(ppgtt->scratch_pd); unmap_and_free_pd(ppgtt->scratch_pd, ppgtt->base.dev);
unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev); unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev);
return ret; return ret;
} }
/* start = 0;
* 2. Create DMA mappings for the page directories and page tables. size = orig_length;
*/
for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
ret = gen8_ppgtt_setup_page_directories(ppgtt, i);
if (ret)
goto bail;
for (j = 0; j < I915_PDES; j++) {
ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j);
if (ret)
goto bail;
}
}
/*
* 3. Map all the page directory entries to point to the page tables
* we've allocated.
*
* For now, the PPGTT helper functions all require that the PDEs are
* plugged in correctly. So we do that now/here. For aliasing PPGTT, we
* will never need to touch the PDEs again.
*/
for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
struct i915_page_directory *pd = ppgtt->pdp.page_directory[i];
gen8_pde_t *pd_vaddr;
pd_vaddr = kmap_atomic(ppgtt->pdp.page_directory[i]->page);
for (j = 0; j < I915_PDES; j++) {
struct i915_page_table *pt = pd->page_table[j];
dma_addr_t addr = pt->daddr;
pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
I915_CACHE_LLC);
}
if (!HAS_LLC(ppgtt->base.dev))
drm_clflush_virt_range(pd_vaddr, PAGE_SIZE);
kunmap_atomic(pd_vaddr);
}
ppgtt->switch_mm = gen8_mm_switch; gen8_for_each_pdpe(pd, &ppgtt->pdp, start, size, temp, pdpe)
ppgtt->base.clear_range = gen8_ppgtt_clear_range; gen8_map_pagetable_range(pd, start, size, ppgtt->base.dev);
ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
ppgtt->base.cleanup = gen8_ppgtt_cleanup;
ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
return 0; return 0;
bail:
gen8_ppgtt_free(ppgtt);
return ret;
} }
static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
...@@ -1354,7 +1268,7 @@ static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt) ...@@ -1354,7 +1268,7 @@ static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
} }
unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev); unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev);
unmap_and_free_pd(&ppgtt->pd); unmap_and_free_pd(&ppgtt->pd, ppgtt->base.dev);
} }
static void gen6_ppgtt_cleanup(struct i915_address_space *vm) static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
......
...@@ -188,6 +188,15 @@ ...@@ -188,6 +188,15 @@
#define GEN8_CTX_FORCE_RESTORE (1<<2) #define GEN8_CTX_FORCE_RESTORE (1<<2)
#define GEN8_CTX_L3LLC_COHERENT (1<<5) #define GEN8_CTX_L3LLC_COHERENT (1<<5)
#define GEN8_CTX_PRIVILEGE (1<<8) #define GEN8_CTX_PRIVILEGE (1<<8)
#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) { \
const u64 _addr = ppgtt->pdp.page_directory[n] ? \
ppgtt->pdp.page_directory[n]->daddr : \
ppgtt->scratch_pd->daddr; \
reg_state[CTX_PDP ## n ## _UDW+1] = upper_32_bits(_addr); \
reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
}
enum { enum {
ADVANCED_CONTEXT = 0, ADVANCED_CONTEXT = 0,
LEGACY_CONTEXT, LEGACY_CONTEXT,
...@@ -1755,14 +1764,14 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o ...@@ -1755,14 +1764,14 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1); reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0); reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0); reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[3]->daddr); /* XXX: Systems with less than 4GB of memory do not have
reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[3]->daddr); * all PDPs. Proper PDP tracking will be added in a
reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[2]->daddr); * subsequent patch.
reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[2]->daddr); */
reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[1]->daddr); ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[1]->daddr); ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[0]->daddr); ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[0]->daddr); ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
if (ring->id == RCS) { if (ring->id == RCS) {
reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE; reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment