Commit 2e75ab31 authored by Palmer Dabbelt's avatar Palmer Dabbelt

Merge patch series "riscv: Use PUD/P4D/PGD pages for the linear mapping"

Alexandre Ghiti <alexghiti@rivosinc.com> says:

This patchset intends to improve tlb utilization by using hugepages for
the linear mapping.

As reported by Anup in v6, when STRICT_KERNEL_RWX is enabled, we must
take care of isolating the kernel text and rodata so that they are not
mapped with a PUD mapping which would then assign wrong permissions to
the whole region: it is achieved the same way as arm64 by using the
memblock nomap API which isolates those regions and re-merge them afterwards
thus avoiding any issue with the system resources tree creation.

arch/riscv/include/asm/page.h |  19 ++++++-
 arch/riscv/mm/init.c          | 102 ++++++++++++++++++++++++++--------
 arch/riscv/mm/physaddr.c      |  16 ++++++
 drivers/of/fdt.c              |  11 ++--
 4 files changed, 118 insertions(+), 30 deletions(-)

* b4-shazam-merge:
  riscv: Use PUD/P4D/PGD pages for the linear mapping
  riscv: Move the linear mapping creation in its own function
  riscv: Get rid of riscv_pfn_base variable

Link: https://lore.kernel.org/r/20230324155421.271544-1-alexghiti@rivosinc.comSigned-off-by: default avatarPalmer Dabbelt <palmer@rivosinc.com>
parents 5464912c 3335068f
......@@ -89,9 +89,16 @@ typedef struct page *pgtable_t;
#define PTE_FMT "%08lx"
#endif
#ifdef CONFIG_64BIT
/*
* We override this value as its generic definition uses __pa too early in
* the boot process (before kernel_map.va_pa_offset is set).
*/
#define MIN_MEMBLOCK_ADDR 0
#endif
#ifdef CONFIG_MMU
extern unsigned long riscv_pfn_base;
#define ARCH_PFN_OFFSET (riscv_pfn_base)
#define ARCH_PFN_OFFSET (PFN_DOWN((unsigned long)phys_ram_base))
#else
#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT)
#endif /* CONFIG_MMU */
......@@ -121,7 +128,11 @@ extern phys_addr_t phys_ram_base;
#define is_linear_mapping(x) \
((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < PAGE_OFFSET + KERN_VIRT_SIZE))
#ifndef CONFIG_DEBUG_VIRTUAL
#define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset))
#else
void *linear_mapping_pa_to_va(unsigned long x);
#endif
#define kernel_mapping_pa_to_va(y) ({ \
unsigned long _y = (unsigned long)(y); \
(IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ? \
......@@ -130,7 +141,11 @@ extern phys_addr_t phys_ram_base;
})
#define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x)
#ifndef CONFIG_DEBUG_VIRTUAL
#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - kernel_map.va_pa_offset)
#else
phys_addr_t linear_mapping_va_to_pa(unsigned long x);
#endif
#define kernel_mapping_va_to_pa(y) ({ \
unsigned long _y = (unsigned long)(y); \
(IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ? \
......
......@@ -213,6 +213,14 @@ static void __init setup_bootmem(void)
phys_ram_end = memblock_end_of_DRAM();
if (!IS_ENABLED(CONFIG_XIP_KERNEL))
phys_ram_base = memblock_start_of_DRAM();
/*
* In 64-bit, any use of __va/__pa before this point is wrong as we
* did not know the start of DRAM before.
*/
if (IS_ENABLED(CONFIG_64BIT))
kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base;
/*
* memblock allocator is not aware of the fact that last 4K bytes of
* the addressable memory can not be mapped because of IS_ERR_VALUE
......@@ -271,9 +279,6 @@ static void __init setup_bootmem(void)
#ifdef CONFIG_MMU
struct pt_alloc_ops pt_ops __initdata;
unsigned long riscv_pfn_base __ro_after_init;
EXPORT_SYMBOL(riscv_pfn_base);
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
......@@ -285,7 +290,6 @@ static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAG
#ifdef CONFIG_XIP_KERNEL
#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
#define riscv_pfn_base (*(unsigned long *)XIP_FIXUP(&riscv_pfn_base))
#define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
#define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte))
#define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir))
......@@ -671,9 +675,16 @@ void __init create_pgd_mapping(pgd_t *pgdp,
static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
{
/* Upgrade to PMD_SIZE mappings whenever possible */
base &= PMD_SIZE - 1;
if (!base && size >= PMD_SIZE)
if (!(base & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE)
return PGDIR_SIZE;
if (!(base & (P4D_SIZE - 1)) && size >= P4D_SIZE)
return P4D_SIZE;
if (!(base & (PUD_SIZE - 1)) && size >= PUD_SIZE)
return PUD_SIZE;
if (!(base & (PMD_SIZE - 1)) && size >= PMD_SIZE)
return PMD_SIZE;
return PAGE_SIZE;
......@@ -982,11 +993,22 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
set_satp_mode();
#endif
kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr;
/*
* In 64-bit, we defer the setup of va_pa_offset to setup_bootmem,
* where we have the system memory layout: this allows us to align
* the physical and virtual mappings and then make use of PUD/P4D/PGD
* for the linear mapping. This is only possible because the kernel
* mapping lies outside the linear mapping.
* In 32-bit however, as the kernel resides in the linear mapping,
* setup_vm_final can not change the mapping established here,
* otherwise the same kernel addresses would get mapped to different
* physical addresses (if the start of dram is different from the
* kernel physical address start).
*/
kernel_map.va_pa_offset = IS_ENABLED(CONFIG_64BIT) ?
0UL : PAGE_OFFSET - kernel_map.phys_addr;
kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr);
/*
* The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit
* kernel, whereas for 64-bit kernel, the end of the virtual address
......@@ -1090,16 +1112,36 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
pt_ops_set_fixmap();
}
static void __init setup_vm_final(void)
static void __init create_linear_mapping_range(phys_addr_t start,
phys_addr_t end)
{
phys_addr_t pa;
uintptr_t va, map_size;
phys_addr_t pa, start, end;
for (pa = start; pa < end; pa += map_size) {
va = (uintptr_t)__va(pa);
map_size = best_map_size(pa, end - pa);
create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
pgprot_from_va(va));
}
}
static void __init create_linear_mapping_page_table(void)
{
phys_addr_t start, end;
u64 i;
/* Setup swapper PGD for fixmap */
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
__pa_symbol(fixmap_pgd_next),
PGDIR_SIZE, PAGE_TABLE);
#ifdef CONFIG_STRICT_KERNEL_RWX
phys_addr_t ktext_start = __pa_symbol(_start);
phys_addr_t ktext_size = __init_data_begin - _start;
phys_addr_t krodata_start = __pa_symbol(__start_rodata);
phys_addr_t krodata_size = _data - __start_rodata;
/* Isolate kernel text and rodata so they don't get mapped with a PUD */
memblock_mark_nomap(ktext_start, ktext_size);
memblock_mark_nomap(krodata_start, krodata_size);
#endif
/* Map all memory banks in the linear mapping */
for_each_mem_range(i, &start, &end) {
......@@ -1111,15 +1153,29 @@ static void __init setup_vm_final(void)
if (end >= __pa(PAGE_OFFSET) + memory_limit)
end = __pa(PAGE_OFFSET) + memory_limit;
for (pa = start; pa < end; pa += map_size) {
va = (uintptr_t)__va(pa);
map_size = best_map_size(pa, end - pa);
create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
pgprot_from_va(va));
}
create_linear_mapping_range(start, end);
}
#ifdef CONFIG_STRICT_KERNEL_RWX
create_linear_mapping_range(ktext_start, ktext_start + ktext_size);
create_linear_mapping_range(krodata_start,
krodata_start + krodata_size);
memblock_clear_nomap(ktext_start, ktext_size);
memblock_clear_nomap(krodata_start, krodata_size);
#endif
}
static void __init setup_vm_final(void)
{
/* Setup swapper PGD for fixmap */
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
__pa_symbol(fixmap_pgd_next),
PGDIR_SIZE, PAGE_TABLE);
/* Map the linear mapping */
create_linear_mapping_page_table();
/* Map the kernel */
if (IS_ENABLED(CONFIG_64BIT))
create_kernel_page_table(swapper_pg_dir, false);
......
......@@ -33,3 +33,19 @@ phys_addr_t __phys_addr_symbol(unsigned long x)
return __va_to_pa_nodebug(x);
}
EXPORT_SYMBOL(__phys_addr_symbol);
phys_addr_t linear_mapping_va_to_pa(unsigned long x)
{
BUG_ON(!kernel_map.va_pa_offset);
return ((unsigned long)(x) - kernel_map.va_pa_offset);
}
EXPORT_SYMBOL(linear_mapping_va_to_pa);
void *linear_mapping_pa_to_va(unsigned long x)
{
BUG_ON(!kernel_map.va_pa_offset);
return ((void *)((unsigned long)(x) + kernel_map.va_pa_offset));
}
EXPORT_SYMBOL(linear_mapping_pa_to_va);
......@@ -887,12 +887,13 @@ const void * __init of_flat_dt_match_machine(const void *default_match,
static void __early_init_dt_declare_initrd(unsigned long start,
unsigned long end)
{
/* ARM64 would cause a BUG to occur here when CONFIG_DEBUG_VM is
* enabled since __va() is called too early. ARM64 does make use
* of phys_initrd_start/phys_initrd_size so we can skip this
* conversion.
/*
* __va() is not yet available this early on some platforms. In that
* case, the platform uses phys_initrd_start/phys_initrd_size instead
* and does the VA conversion itself.
*/
if (!IS_ENABLED(CONFIG_ARM64)) {
if (!IS_ENABLED(CONFIG_ARM64) &&
!(IS_ENABLED(CONFIG_RISCV) && IS_ENABLED(CONFIG_64BIT))) {
initrd_start = (unsigned long)__va(start);
initrd_end = (unsigned long)__va(end);
initrd_below_start_ok = 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment