Commit 671f9a3e authored by Anup Patel's avatar Anup Patel Committed by Paul Walmsley

RISC-V: Setup initial page tables in two stages

Currently, the setup_vm() does initial page table setup in one-shot
very early before enabling MMU. Due to this, the setup_vm() has to map
all possible kernel virtual addresses since it does not know size and
location of RAM. This means we have kernel mappings for non-existent
RAM and any buggy driver (or kernel) code doing out-of-bound access
to RAM will not fault and cause underterministic behaviour.

Further, the setup_vm() creates PMD mappings (i.e. 2M mappings) for
RV64 systems. This means for PAGE_OFFSET=0xffffffe000000000 (i.e.
MAXPHYSMEM_128GB=y), the setup_vm() will require 129 pages (i.e.
516 KB) of memory for initial page tables which is never freed. The
memory required for initial page tables will further increase if
we chose a lower value of PAGE_OFFSET (e.g. 0xffffff0000000000)

This patch implements two-staged initial page table setup, as follows:
1. Early (i.e. setup_vm()): This stage maps kernel image and DTB in
a early page table (i.e. early_pg_dir). The early_pg_dir will be used
only by boot HART so it can be freed as-part of init memory free-up.
2. Final (i.e. setup_vm_final()): This stage maps all possible RAM
banks in the final page table (i.e. swapper_pg_dir). The boot HART
will start using swapper_pg_dir at the end of setup_vm_final(). All
non-boot HARTs directly use the swapper_pg_dir created by boot HART.

We have following advantages with this new approach:
1. Kernel mappings for non-existent RAM don't exists anymore.
2. Memory consumed by initial page tables is now indpendent of the
chosen PAGE_OFFSET.
3. Memory consumed by initial page tables on RV64 system is 2 pages
(i.e. 8 KB) which has significantly reduced and these pages will be
freed as-part of the init memory free-up.

The patch also provides a foundation for implementing strict kernel
mappings where we protect kernel text and rodata using PTE permissions.
Suggested-by: default avatarMike Rapoport <rppt@linux.ibm.com>
Signed-off-by: default avatarAnup Patel <anup.patel@wdc.com>
[paul.walmsley@sifive.com: updated to apply; fixed a checkpatch warning]
Signed-off-by: default avatarPaul Walmsley <paul.walmsley@sifive.com>
parent 2ebca1cb
......@@ -21,6 +21,11 @@
*/
enum fixed_addresses {
FIX_HOLE,
#define FIX_FDT_SIZE SZ_1M
FIX_FDT_END,
FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
FIX_PTE,
FIX_PMD,
FIX_EARLYCON_MEM_BASE,
__end_of_fixed_addresses
};
......
......@@ -70,6 +70,11 @@ static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot)
return __pmd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
}
static inline unsigned long _pmd_pfn(pmd_t pmd)
{
return pmd_val(pmd) >> _PAGE_PFN_SHIFT;
}
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
......
......@@ -59,6 +59,8 @@
#define PAGE_KERNEL __pgprot(_PAGE_KERNEL)
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC)
#define PAGE_TABLE __pgprot(_PAGE_TABLE)
extern pgd_t swapper_pg_dir[];
/* MAP_PRIVATE permissions: xwr (copy-on-write) */
......@@ -118,6 +120,11 @@ static inline pgd_t pfn_pgd(unsigned long pfn, pgprot_t prot)
return __pgd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
}
static inline unsigned long _pgd_pfn(pgd_t pgd)
{
return pgd_val(pgd) >> _PAGE_PFN_SHIFT;
}
#define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
/* Locate an entry in the page global directory */
......@@ -400,6 +407,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
#define kern_addr_valid(addr) (1) /* FIXME */
#endif
extern void *dtb_early_va;
extern void setup_bootmem(void);
extern void paging_init(void);
......
......@@ -55,7 +55,9 @@ clear_bss_done:
/* Initialize page tables and relocate to virtual addresses */
la sp, init_thread_union + THREAD_SIZE
mv a0, s1
call setup_vm
la a0, early_pg_dir
call relocate
/* Restore C environment */
......@@ -64,25 +66,23 @@ clear_bss_done:
la sp, init_thread_union + THREAD_SIZE
/* Start the kernel */
mv a0, s1
call parse_dtb
tail start_kernel
relocate:
/* Relocate return address */
li a1, PAGE_OFFSET
la a0, _start
sub a1, a1, a0
la a2, _start
sub a1, a1, a2
add ra, ra, a1
/* Point stvec to virtual address of intruction after satp write */
la a0, 1f
add a0, a0, a1
csrw CSR_STVEC, a0
la a2, 1f
add a2, a2, a1
csrw CSR_STVEC, a2
/* Compute satp for kernel page tables, but don't load it yet */
la a2, swapper_pg_dir
srl a2, a2, PAGE_SHIFT
srl a2, a0, PAGE_SHIFT
li a1, SATP_MODE
or a2, a2, a1
......@@ -148,6 +148,7 @@ relocate:
fence
/* Enable virtual memory and relocate to virtual address */
la a0, swapper_pg_dir
call relocate
tail smp_callin
......
......@@ -39,11 +39,9 @@ struct screen_info screen_info = {
atomic_t hart_lottery;
unsigned long boot_cpu_hartid;
void __init parse_dtb(phys_addr_t dtb_phys)
void __init parse_dtb(void)
{
void *dtb = __va(dtb_phys);
if (early_init_dt_scan(dtb))
if (early_init_dt_scan(dtb_early_va))
return;
pr_err("No DTB passed to the kernel\n");
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2012 Regents of the University of California
* Copyright (C) 2019 Western Digital Corporation or its affiliates.
*/
#include <linux/init.h>
......@@ -41,13 +42,6 @@ void setup_zero_page(void)
memset((void *)empty_zero_page, 0, PAGE_SIZE);
}
void __init paging_init(void)
{
setup_zero_page();
local_flush_tlb_all();
zone_sizes_init();
}
void __init mem_init(void)
{
#ifdef CONFIG_FLATMEM
......@@ -143,17 +137,15 @@ EXPORT_SYMBOL(va_pa_offset);
unsigned long pfn_base;
EXPORT_SYMBOL(pfn_base);
void *dtb_early_va;
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
static bool mmu_enabled;
#ifndef __PAGETABLE_PMD_FOLDED
#define NUM_SWAPPER_PMDS ((uintptr_t)-PAGE_OFFSET >> PGDIR_SHIFT)
pmd_t swapper_pmd[PTRS_PER_PMD*((-PAGE_OFFSET)/PGDIR_SIZE)] __page_aligned_bss;
pmd_t trampoline_pmd[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
#endif
#define MAX_EARLY_MAPPING_SIZE SZ_128M
pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
{
......@@ -172,6 +164,156 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
}
}
static pte_t *__init get_pte_virt(phys_addr_t pa)
{
if (mmu_enabled) {
clear_fixmap(FIX_PTE);
return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
} else {
return (pte_t *)((uintptr_t)pa);
}
}
static phys_addr_t __init alloc_pte(uintptr_t va)
{
/*
* We only create PMD or PGD early mappings so we
* should never reach here with MMU disabled.
*/
BUG_ON(!mmu_enabled);
return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
}
static void __init create_pte_mapping(pte_t *ptep,
uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot)
{
uintptr_t pte_index = pte_index(va);
BUG_ON(sz != PAGE_SIZE);
if (pte_none(ptep[pte_index]))
ptep[pte_index] = pfn_pte(PFN_DOWN(pa), prot);
}
#ifndef __PAGETABLE_PMD_FOLDED
pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
#if MAX_EARLY_MAPPING_SIZE < PGDIR_SIZE
#define NUM_EARLY_PMDS 1UL
#else
#define NUM_EARLY_PMDS (1UL + MAX_EARLY_MAPPING_SIZE / PGDIR_SIZE)
#endif
pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE);
static pmd_t *__init get_pmd_virt(phys_addr_t pa)
{
if (mmu_enabled) {
clear_fixmap(FIX_PMD);
return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
} else {
return (pmd_t *)((uintptr_t)pa);
}
}
static phys_addr_t __init alloc_pmd(uintptr_t va)
{
uintptr_t pmd_num;
if (mmu_enabled)
return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT;
BUG_ON(pmd_num >= NUM_EARLY_PMDS);
return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD];
}
static void __init create_pmd_mapping(pmd_t *pmdp,
uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot)
{
pte_t *ptep;
phys_addr_t pte_phys;
uintptr_t pmd_index = pmd_index(va);
if (sz == PMD_SIZE) {
if (pmd_none(pmdp[pmd_index]))
pmdp[pmd_index] = pfn_pmd(PFN_DOWN(pa), prot);
return;
}
if (pmd_none(pmdp[pmd_index])) {
pte_phys = alloc_pte(va);
pmdp[pmd_index] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE);
ptep = get_pte_virt(pte_phys);
memset(ptep, 0, PAGE_SIZE);
} else {
pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_index]));
ptep = get_pte_virt(pte_phys);
}
create_pte_mapping(ptep, va, pa, sz, prot);
}
#define pgd_next_t pmd_t
#define alloc_pgd_next(__va) alloc_pmd(__va)
#define get_pgd_next_virt(__pa) get_pmd_virt(__pa)
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pmd_mapping(__nextp, __va, __pa, __sz, __prot)
#define PTE_PARENT_SIZE PMD_SIZE
#define fixmap_pgd_next fixmap_pmd
#else
#define pgd_next_t pte_t
#define alloc_pgd_next(__va) alloc_pte(__va)
#define get_pgd_next_virt(__pa) get_pte_virt(__pa)
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
#define PTE_PARENT_SIZE PGDIR_SIZE
#define fixmap_pgd_next fixmap_pte
#endif
static void __init create_pgd_mapping(pgd_t *pgdp,
uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot)
{
pgd_next_t *nextp;
phys_addr_t next_phys;
uintptr_t pgd_index = pgd_index(va);
if (sz == PGDIR_SIZE) {
if (pgd_val(pgdp[pgd_index]) == 0)
pgdp[pgd_index] = pfn_pgd(PFN_DOWN(pa), prot);
return;
}
if (pgd_val(pgdp[pgd_index]) == 0) {
next_phys = alloc_pgd_next(va);
pgdp[pgd_index] = pfn_pgd(PFN_DOWN(next_phys), PAGE_TABLE);
nextp = get_pgd_next_virt(next_phys);
memset(nextp, 0, PAGE_SIZE);
} else {
next_phys = PFN_PHYS(_pgd_pfn(pgdp[pgd_index]));
nextp = get_pgd_next_virt(next_phys);
}
create_pgd_next_mapping(nextp, va, pa, sz, prot);
}
static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
{
uintptr_t map_size = PAGE_SIZE;
/* Upgrade to PMD/PGDIR mappings whenever possible */
if (!(base & (PTE_PARENT_SIZE - 1)) &&
!(size & (PTE_PARENT_SIZE - 1)))
map_size = PTE_PARENT_SIZE;
return map_size;
}
/*
* setup_vm() is called from head.S with MMU-off.
*
......@@ -191,54 +333,115 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
"not use absolute addressing."
#endif
asmlinkage void __init setup_vm(void)
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
uintptr_t i;
uintptr_t pa = (uintptr_t) &_start;
pgprot_t prot = __pgprot(pgprot_val(PAGE_KERNEL) | _PAGE_EXEC);
uintptr_t va, end_va;
uintptr_t load_pa = (uintptr_t)(&_start);
uintptr_t load_sz = (uintptr_t)(&_end) - load_pa;
uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE);
va_pa_offset = PAGE_OFFSET - pa;
pfn_base = PFN_DOWN(pa);
va_pa_offset = PAGE_OFFSET - load_pa;
pfn_base = PFN_DOWN(load_pa);
/*
* Enforce boot alignment requirements of RV32 and
* RV64 by only allowing PMD or PGD mappings.
*/
BUG_ON(map_size == PAGE_SIZE);
/* Sanity check alignment and size */
BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
BUG_ON((pa % (PAGE_SIZE * PTRS_PER_PTE)) != 0);
BUG_ON((load_pa % map_size) != 0);
BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE);
/* Setup early PGD for fixmap */
create_pgd_mapping(early_pg_dir, FIXADDR_START,
(uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
#ifndef __PAGETABLE_PMD_FOLDED
trampoline_pg_dir[(PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD] =
pfn_pgd(PFN_DOWN((uintptr_t)trampoline_pmd),
__pgprot(_PAGE_TABLE));
trampoline_pmd[0] = pfn_pmd(PFN_DOWN(pa), prot);
/* Setup fixmap PMD */
create_pmd_mapping(fixmap_pmd, FIXADDR_START,
(uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
/* Setup trampoline PGD and PMD */
create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
(uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE);
create_pmd_mapping(trampoline_pmd, PAGE_OFFSET,
load_pa, PMD_SIZE, PAGE_KERNEL_EXEC);
#else
/* Setup trampoline PGD */
create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
load_pa, PGDIR_SIZE, PAGE_KERNEL_EXEC);
#endif
for (i = 0; i < (-PAGE_OFFSET)/PGDIR_SIZE; ++i) {
size_t o = (PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD + i;
/*
* Setup early PGD covering entire kernel which will allows
* us to reach paging_init(). We map all memory banks later
* in setup_vm_final() below.
*/
end_va = PAGE_OFFSET + load_sz;
for (va = PAGE_OFFSET; va < end_va; va += map_size)
create_pgd_mapping(early_pg_dir, va,
load_pa + (va - PAGE_OFFSET),
map_size, PAGE_KERNEL_EXEC);
/* Create fixed mapping for early FDT parsing */
end_va = __fix_to_virt(FIX_FDT) + FIX_FDT_SIZE;
for (va = __fix_to_virt(FIX_FDT); va < end_va; va += PAGE_SIZE)
create_pte_mapping(fixmap_pte, va,
dtb_pa + (va - __fix_to_virt(FIX_FDT)),
PAGE_SIZE, PAGE_KERNEL);
/* Save pointer to DTB for early FDT parsing */
dtb_early_va = (void *)fix_to_virt(FIX_FDT) + (dtb_pa & ~PAGE_MASK);
}
swapper_pg_dir[o] =
pfn_pgd(PFN_DOWN((uintptr_t)swapper_pmd) + i,
__pgprot(_PAGE_TABLE));
}
for (i = 0; i < ARRAY_SIZE(swapper_pmd); i++)
swapper_pmd[i] = pfn_pmd(PFN_DOWN(pa + i * PMD_SIZE), prot);
swapper_pg_dir[(FIXADDR_START >> PGDIR_SHIFT) % PTRS_PER_PGD] =
pfn_pgd(PFN_DOWN((uintptr_t)fixmap_pmd),
__pgprot(_PAGE_TABLE));
fixmap_pmd[(FIXADDR_START >> PMD_SHIFT) % PTRS_PER_PMD] =
pfn_pmd(PFN_DOWN((uintptr_t)fixmap_pte),
__pgprot(_PAGE_TABLE));
#else
trampoline_pg_dir[(PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD] =
pfn_pgd(PFN_DOWN(pa), prot);
static void __init setup_vm_final(void)
{
uintptr_t va, map_size;
phys_addr_t pa, start, end;
struct memblock_region *reg;
for (i = 0; i < (-PAGE_OFFSET)/PGDIR_SIZE; ++i) {
size_t o = (PAGE_OFFSET >> PGDIR_SHIFT) % PTRS_PER_PGD + i;
/* Set mmu_enabled flag */
mmu_enabled = true;
swapper_pg_dir[o] =
pfn_pgd(PFN_DOWN(pa + i * PGDIR_SIZE), prot);
/* Setup swapper PGD for fixmap */
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
__pa(fixmap_pgd_next),
PGDIR_SIZE, PAGE_TABLE);
/* Map all memory banks */
for_each_memblock(memory, reg) {
start = reg->base;
end = start + reg->size;
if (start >= end)
break;
if (memblock_is_nomap(reg))
continue;
if (start <= __pa(PAGE_OFFSET) &&
__pa(PAGE_OFFSET) < end)
start = __pa(PAGE_OFFSET);
map_size = best_map_size(start, end - start);
for (pa = start; pa < end; pa += map_size) {
va = (uintptr_t)__va(pa);
create_pgd_mapping(swapper_pg_dir, va, pa,
map_size, PAGE_KERNEL_EXEC);
}
}
swapper_pg_dir[(FIXADDR_START >> PGDIR_SHIFT) % PTRS_PER_PGD] =
pfn_pgd(PFN_DOWN((uintptr_t)fixmap_pte),
__pgprot(_PAGE_TABLE));
#endif
/* Clear fixmap PTE and PMD mappings */
clear_fixmap(FIX_PTE);
clear_fixmap(FIX_PMD);
/* Move to swapper page table */
csr_write(sptbr, PFN_DOWN(__pa(swapper_pg_dir)) | SATP_MODE);
local_flush_tlb_all();
}
void __init paging_init(void)
{
setup_vm_final();
setup_zero_page();
zone_sizes_init();
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment