Commit 7867e402 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'riscv-for-linus-5.17-mw1' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux

Pull more RISC-V updates from Palmer Dabbelt:

 - Support for sv48 paging

 - Hart ID mappings are now sparse, which enables more CPUs to come up
   on systems with sparse hart IDs

 - A handful of cleanups and fixes

* tag 'riscv-for-linus-5.17-mw1' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (27 commits)
  RISC-V: nommu_virt: Drop unused SLAB_MERGE_DEFAULT
  RISC-V: Remove redundant err variable
  riscv: dts: sifive unmatched: Add gpio poweroff
  riscv: canaan: remove useless select of non-existing config SYSCON
  RISC-V: Do not use cpumask data structure for hartid bitmap
  RISC-V: Move spinwait booting method to its own config
  RISC-V: Move the entire hart selection via lottery to SMP
  RISC-V: Use __cpu_up_stack/task_pointer only for spinwait method
  RISC-V: Do not print the SBI version during HSM extension boot print
  RISC-V: Avoid using per cpu array for ordered booting
  riscv: default to CONFIG_RISCV_SBI_V01=n
  riscv: fix boolconv.cocci warnings
  riscv: Explicit comment about user virtual address space size
  riscv: Use pgtable_l4_enabled to output mmu_type in cpuinfo
  riscv: Implement sv48 support
  asm-generic: Prepare for riscv use of pud_alloc_one and pud_free
  riscv: Allow to dynamically define VA_BITS
  riscv: Introduce functions to switch pt_ops
  riscv: Split early kasan mapping to prepare sv48 introduction
  riscv: Move KASAN mapping next to the kernel mapping
  ...
parents b21bae9a c59cd507
......@@ -47,12 +47,12 @@ RISC-V Linux Kernel SV39
| Kernel-space virtual memory, shared between all processes:
____________________________________________________________|___________________________________________________________
| | | |
ffffffc000000000 | -256 GB | ffffffc7ffffffff | 32 GB | kasan
ffffffcefee00000 | -196 GB | ffffffcefeffffff | 2 MB | fixmap
ffffffceff000000 | -196 GB | ffffffceffffffff | 16 MB | PCI io
ffffffcf00000000 | -196 GB | ffffffcfffffffff | 4 GB | vmemmap
ffffffd000000000 | -192 GB | ffffffdfffffffff | 64 GB | vmalloc/ioremap space
ffffffe000000000 | -128 GB | ffffffff7fffffff | 124 GB | direct mapping of all physical memory
ffffffc6fee00000 | -228 GB | ffffffc6feffffff | 2 MB | fixmap
ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io
ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap
ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space
ffffffd800000000 | -160 GB | fffffff6ffffffff | 124 GB | direct mapping of all physical memory
fffffff700000000 | -36 GB | fffffffeffffffff | 32 GB | kasan
__________________|____________|__________________|_________|____________________________________________________________
|
|
......
......@@ -147,27 +147,16 @@ config MMU
Select if you want MMU-based virtualised addressing space
support by paged memory management. If unsure, say 'Y'.
config VA_BITS
int
default 32 if 32BIT
default 39 if 64BIT
config PA_BITS
int
default 34 if 32BIT
default 56 if 64BIT
config PAGE_OFFSET
hex
default 0xC0000000 if 32BIT && MAXPHYSMEM_1GB
default 0xC0000000 if 32BIT
default 0x80000000 if 64BIT && !MMU
default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB
default 0xffffaf8000000000 if 64BIT
config KASAN_SHADOW_OFFSET
hex
depends on KASAN_GENERIC
default 0xdfffffc800000000 if 64BIT
default 0xdfffffff00000000 if 64BIT
default 0xffffffff if 32BIT
config ARCH_FLATMEM_ENABLE
......@@ -213,7 +202,7 @@ config FIX_EARLYCON_MEM
config PGTABLE_LEVELS
int
default 3 if 64BIT
default 4 if 64BIT
default 2
config LOCKDEP_SUPPORT
......@@ -271,24 +260,6 @@ config MODULE_SECTIONS
bool
select HAVE_MOD_ARCH_SPECIFIC
choice
prompt "Maximum Physical Memory"
default MAXPHYSMEM_1GB if 32BIT
default MAXPHYSMEM_2GB if 64BIT && CMODEL_MEDLOW
default MAXPHYSMEM_128GB if 64BIT && CMODEL_MEDANY
config MAXPHYSMEM_1GB
depends on 32BIT
bool "1GiB"
config MAXPHYSMEM_2GB
depends on 64BIT
bool "2GiB"
config MAXPHYSMEM_128GB
depends on 64BIT && CMODEL_MEDANY
bool "128GiB"
endchoice
config SMP
bool "Symmetric Multi-Processing"
help
......@@ -392,12 +363,25 @@ source "kernel/Kconfig.hz"
config RISCV_SBI_V01
bool "SBI v0.1 support"
default y
depends on RISCV_SBI
help
This config allows kernel to use SBI v0.1 APIs. This will be
deprecated in future once legacy M-mode software are no longer in use.
config RISCV_BOOT_SPINWAIT
bool "Spinwait booting method"
depends on SMP
default y
help
This enables support for booting Linux via spinwait method. In the
spinwait method, all cores randomly jump to Linux. One of the cores
gets chosen via lottery and all other keep spinning on a percpu
variable. This method cannot support CPU hotplug and sparse hartid
scheme. It should be only enabled for M-mode Linux or platforms relying
on older firmware without SBI HSM extension. All other platforms should
rely on ordered booting via SBI HSM extension which gets chosen
dynamically at runtime if the firmware supports it.
config KEXEC
bool "Kexec system call"
select KEXEC_CORE
......
......@@ -39,6 +39,11 @@ rtcclk: rtcclk {
clock-frequency = <RTCCLK_FREQ>;
clock-output-names = "rtcclk";
};
gpio-poweroff {
compatible = "gpio-poweroff";
gpios = <&gpio 2 GPIO_ACTIVE_LOW>;
};
};
&uart0 {
......
......@@ -29,7 +29,6 @@ CONFIG_EMBEDDED=y
CONFIG_SLOB=y
# CONFIG_MMU is not set
CONFIG_SOC_CANAAN=y
CONFIG_MAXPHYSMEM_2GB=y
CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_CMDLINE="earlycon console=ttySIF0"
......
......@@ -21,7 +21,6 @@ CONFIG_EMBEDDED=y
CONFIG_SLOB=y
# CONFIG_MMU is not set
CONFIG_SOC_CANAAN=y
CONFIG_MAXPHYSMEM_2GB=y
CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_CMDLINE="earlycon console=ttySIF0 rootdelay=2 root=/dev/mmcblk0p1 ro"
......
......@@ -24,10 +24,8 @@ CONFIG_EXPERT=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
# CONFIG_SLAB_MERGE_DEFAULT is not set
# CONFIG_MMU is not set
CONFIG_SOC_VIRT=y
CONFIG_MAXPHYSMEM_2GB=y
CONFIG_SMP=y
CONFIG_CMDLINE="root=/dev/vda rw earlycon=uart8250,mmio,0x10000000,115200n8 console=ttyS0"
CONFIG_CMDLINE_FORCE=y
......
......@@ -40,7 +40,5 @@ struct cpu_operations {
extern const struct cpu_operations *cpu_ops[NR_CPUS];
void __init cpu_set_ops(int cpu);
void cpu_update_secondary_bootdata(unsigned int cpuid,
struct task_struct *tidle);
#endif /* ifndef __ASM_CPU_OPS_H */
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2021 by Rivos Inc.
*/
#ifndef __ASM_CPU_OPS_SBI_H
#define __ASM_CPU_OPS_SBI_H
#ifndef __ASSEMBLY__
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/threads.h>
/**
* struct sbi_hart_boot_data - Hart specific boot used during booting and
* cpu hotplug.
* @task_ptr: A pointer to the hart specific tp
* @stack_ptr: A pointer to the hart specific sp
*/
struct sbi_hart_boot_data {
void *task_ptr;
void *stack_ptr;
};
#endif
#endif /* ifndef __ASM_CPU_OPS_SBI_H */
......@@ -40,14 +40,13 @@
#ifndef CONFIG_64BIT
#define SATP_PPN _AC(0x003FFFFF, UL)
#define SATP_MODE_32 _AC(0x80000000, UL)
#define SATP_MODE SATP_MODE_32
#define SATP_ASID_BITS 9
#define SATP_ASID_SHIFT 22
#define SATP_ASID_MASK _AC(0x1FF, UL)
#else
#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
#define SATP_MODE_39 _AC(0x8000000000000000, UL)
#define SATP_MODE SATP_MODE_39
#define SATP_MODE_48 _AC(0x9000000000000000, UL)
#define SATP_ASID_BITS 16
#define SATP_ASID_SHIFT 44
#define SATP_ASID_MASK _AC(0xFFFF, UL)
......
......@@ -24,6 +24,7 @@ enum fixed_addresses {
FIX_HOLE,
FIX_PTE,
FIX_PMD,
FIX_PUD,
FIX_TEXT_POKE1,
FIX_TEXT_POKE0,
FIX_EARLYCON_MEM_BASE,
......
......@@ -27,13 +27,18 @@
*/
#define KASAN_SHADOW_SCALE_SHIFT 3
#define KASAN_SHADOW_SIZE (UL(1) << ((CONFIG_VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT))
#define KASAN_SHADOW_START KERN_VIRT_START
#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
#define KASAN_SHADOW_SIZE (UL(1) << ((VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT))
/*
* Depending on the size of the virtual address space, the region may not be
* aligned on PGDIR_SIZE, so force its alignment to ease its population.
*/
#define KASAN_SHADOW_START ((KASAN_SHADOW_END - KASAN_SHADOW_SIZE) & PGDIR_MASK)
#define KASAN_SHADOW_END MODULES_LOWEST_VADDR
#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
void kasan_init(void);
asmlinkage void kasan_early_init(void);
void kasan_swapper_init(void);
#endif
#endif
......
......@@ -31,9 +31,20 @@
* When not using MMU this corresponds to the first free page in
* physical memory (aligned on a page boundary).
*/
#ifdef CONFIG_64BIT
#ifdef CONFIG_MMU
#define PAGE_OFFSET kernel_map.page_offset
#else
#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
#define KERN_VIRT_SIZE (-PAGE_OFFSET)
#endif
/*
* By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so
* define the PAGE_OFFSET value for SV39.
*/
#define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL)
#else
#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
#endif /* CONFIG_64BIT */
#ifndef __ASSEMBLY__
......@@ -86,6 +97,7 @@ extern unsigned long riscv_pfn_base;
#endif /* CONFIG_MMU */
struct kernel_mapping {
unsigned long page_offset;
unsigned long virt_addr;
uintptr_t phys_addr;
uintptr_t size;
......
......@@ -11,6 +11,8 @@
#include <asm/tlb.h>
#ifdef CONFIG_MMU
#define __HAVE_ARCH_PUD_ALLOC_ONE
#define __HAVE_ARCH_PUD_FREE
#include <asm-generic/pgalloc.h>
static inline void pmd_populate_kernel(struct mm_struct *mm,
......@@ -36,6 +38,44 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
set_pud(pud, __pud((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
{
if (pgtable_l4_enabled) {
unsigned long pfn = virt_to_pfn(pud);
set_p4d(p4d, __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
}
static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
pud_t *pud)
{
if (pgtable_l4_enabled) {
unsigned long pfn = virt_to_pfn(pud);
set_p4d_safe(p4d,
__p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
}
#define pud_alloc_one pud_alloc_one
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
if (pgtable_l4_enabled)
return __pud_alloc_one(mm, addr);
return NULL;
}
#define pud_free pud_free
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
{
if (pgtable_l4_enabled)
__pud_free(mm, pud);
}
#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
#endif /* __PAGETABLE_PMD_FOLDED */
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
......
......@@ -8,16 +8,36 @@
#include <linux/const.h>
#define PGDIR_SHIFT 30
extern bool pgtable_l4_enabled;
#define PGDIR_SHIFT_L3 30
#define PGDIR_SHIFT_L4 39
#define PGDIR_SIZE_L3 (_AC(1, UL) << PGDIR_SHIFT_L3)
#define PGDIR_SHIFT (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)
/* Size of region mapped by a page global directory */
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
/* pud is folded into pgd in case of 3-level page table */
#define PUD_SHIFT 30
#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE - 1))
#define PMD_SHIFT 21
/* Size of region mapped by a page middle directory */
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE - 1))
/* Page Upper Directory entry */
typedef struct {
unsigned long pud;
} pud_t;
#define pud_val(x) ((x).pud)
#define __pud(x) ((pud_t) { (x) })
#define PTRS_PER_PUD (PAGE_SIZE / sizeof(pud_t))
/* Page Middle Directory entry */
typedef struct {
unsigned long pmd;
......@@ -59,6 +79,16 @@ static inline void pud_clear(pud_t *pudp)
set_pud(pudp, __pud(0));
}
static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot)
{
return __pud((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
}
static inline unsigned long _pud_pfn(pud_t pud)
{
return pud_val(pud) >> _PAGE_PFN_SHIFT;
}
static inline pmd_t *pud_pgtable(pud_t pud)
{
return (pmd_t *)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT);
......@@ -69,6 +99,17 @@ static inline struct page *pud_page(pud_t pud)
return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
}
#define mm_pud_folded mm_pud_folded
static inline bool mm_pud_folded(struct mm_struct *mm)
{
if (pgtable_l4_enabled)
return false;
return true;
}
#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot)
{
return __pmd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
......@@ -84,4 +125,69 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
#define pud_ERROR(e) \
pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
{
if (pgtable_l4_enabled)
*p4dp = p4d;
else
set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) });
}
static inline int p4d_none(p4d_t p4d)
{
if (pgtable_l4_enabled)
return (p4d_val(p4d) == 0);
return 0;
}
static inline int p4d_present(p4d_t p4d)
{
if (pgtable_l4_enabled)
return (p4d_val(p4d) & _PAGE_PRESENT);
return 1;
}
static inline int p4d_bad(p4d_t p4d)
{
if (pgtable_l4_enabled)
return !p4d_present(p4d);
return 0;
}
static inline void p4d_clear(p4d_t *p4d)
{
if (pgtable_l4_enabled)
set_p4d(p4d, __p4d(0));
}
static inline pud_t *p4d_pgtable(p4d_t p4d)
{
if (pgtable_l4_enabled)
return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) });
}
static inline struct page *p4d_page(p4d_t p4d)
{
return pfn_to_page(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
}
#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
#define pud_offset pud_offset
static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
{
if (pgtable_l4_enabled)
return p4d_pgtable(*p4d) + pud_index(address);
return (pud_t *)p4d;
}
#endif /* _ASM_RISCV_PGTABLE_64_H */
......@@ -24,6 +24,17 @@
#define KERNEL_LINK_ADDR PAGE_OFFSET
#endif
/* Number of entries in the page global directory */
#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t))
/* Number of entries in the page table */
#define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t))
/*
* Half of the kernel address space (half of the entries of the page global
* directory) is for the direct mapping.
*/
#define KERN_VIRT_SIZE ((PTRS_PER_PGD / 2 * PGDIR_SIZE) / 2)
#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
#define VMALLOC_END PAGE_OFFSET
#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
......@@ -39,6 +50,8 @@
/* Modules always live before the kernel */
#ifdef CONFIG_64BIT
/* This is used to define the end of the KASAN shadow region */
#define MODULES_LOWEST_VADDR (KERNEL_LINK_ADDR - SZ_2G)
#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
#define MODULES_END (PFN_ALIGN((unsigned long)&_start))
#endif
......@@ -48,8 +61,14 @@
* struct pages to map half the virtual address space. Then
* position vmemmap directly below the VMALLOC region.
*/
#ifdef CONFIG_64BIT
#define VA_BITS (pgtable_l4_enabled ? 48 : 39)
#else
#define VA_BITS 32
#endif
#define VMEMMAP_SHIFT \
(CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
(VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT)
#define VMEMMAP_END VMALLOC_START
#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE)
......@@ -83,8 +102,7 @@
#ifndef __ASSEMBLY__
/* Page Upper Directory not used in RISC-V */
#include <asm-generic/pgtable-nopud.h>
#include <asm-generic/pgtable-nop4d.h>
#include <asm/page.h>
#include <asm/tlbflush.h>
#include <linux/mm_types.h>
......@@ -107,12 +125,20 @@
#define XIP_FIXUP(addr) (addr)
#endif /* CONFIG_XIP_KERNEL */
#ifdef CONFIG_MMU
/* Number of entries in the page global directory */
#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t))
/* Number of entries in the page table */
#define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t))
struct pt_alloc_ops {
pte_t *(*get_pte_virt)(phys_addr_t pa);
phys_addr_t (*alloc_pte)(uintptr_t va);
#ifndef __PAGETABLE_PMD_FOLDED
pmd_t *(*get_pmd_virt)(phys_addr_t pa);
phys_addr_t (*alloc_pmd)(uintptr_t va);
pud_t *(*get_pud_virt)(phys_addr_t pa);
phys_addr_t (*alloc_pud)(uintptr_t va);
#endif
};
extern struct pt_alloc_ops pt_ops __initdata;
#ifdef CONFIG_MMU
/* Number of PGD entries that a user-mode program can use */
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
......@@ -659,7 +685,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
* and give the kernel the other (upper) half.
*/
#ifdef CONFIG_64BIT
#define KERN_VIRT_START (-(BIT(CONFIG_VA_BITS)) + TASK_SIZE)
#define KERN_VIRT_START (-(BIT(VA_BITS)) + TASK_SIZE)
#else
#define KERN_VIRT_START FIXADDR_START
#endif
......@@ -667,11 +693,22 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
/*
* Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
* Note that PGDIR_SIZE must evenly divide TASK_SIZE.
* Task size is:
* - 0x9fc00000 (~2.5GB) for RV32.
* - 0x4000000000 ( 256GB) for RV64 using SV39 mmu
* - 0x800000000000 ( 128TB) for RV64 using SV48 mmu
*
* Note that PGDIR_SIZE must evenly divide TASK_SIZE since "RISC-V
* Instruction Set Manual Volume II: Privileged Architecture" states that
* "load and store effective addresses, which are 64bits, must have bits
* 63–48 all equal to bit 47, or else a page-fault exception will occur."
*/
#ifdef CONFIG_64BIT
#define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2)
#define TASK_SIZE_MIN (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2)
#else
#define TASK_SIZE FIXADDR_START
#define TASK_SIZE_MIN TASK_SIZE
#endif
#else /* CONFIG_MMU */
......@@ -697,6 +734,8 @@ extern uintptr_t _dtb_early_pa;
#define dtb_early_va _dtb_early_va
#define dtb_early_pa _dtb_early_pa
#endif /* CONFIG_XIP_KERNEL */
extern u64 satp_mode;
extern bool pgtable_l4_enabled;
void paging_init(void);
void misc_mem_init(void);
......
......@@ -8,6 +8,7 @@
#define _ASM_RISCV_SBI_H
#include <linux/types.h>
#include <linux/cpumask.h>
#ifdef CONFIG_RISCV_SBI
enum sbi_ext_id {
......@@ -128,27 +129,27 @@ long sbi_get_mimpid(void);
void sbi_set_timer(uint64_t stime_value);
void sbi_shutdown(void);
void sbi_clear_ipi(void);
int sbi_send_ipi(const unsigned long *hart_mask);
int sbi_remote_fence_i(const unsigned long *hart_mask);
int sbi_remote_sfence_vma(const unsigned long *hart_mask,
int sbi_send_ipi(const struct cpumask *cpu_mask);
int sbi_remote_fence_i(const struct cpumask *cpu_mask);
int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size);
int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid);
int sbi_remote_hfence_gvma(const unsigned long *hart_mask,
int sbi_remote_hfence_gvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size);
int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask,
int sbi_remote_hfence_gvma_vmid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long vmid);
int sbi_remote_hfence_vvma(const unsigned long *hart_mask,
int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size);
int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask,
int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid);
......@@ -183,7 +184,7 @@ static inline unsigned long sbi_mk_version(unsigned long major,
int sbi_err_map_linux_errno(int err);
#else /* CONFIG_RISCV_SBI */
static inline int sbi_remote_fence_i(const unsigned long *hart_mask) { return -1; }
static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1; }
static inline void sbi_init(void) {}
#endif /* CONFIG_RISCV_SBI */
#endif /* _ASM_RISCV_SBI_H */
......@@ -92,8 +92,6 @@ static inline void riscv_clear_ipi(void)
#endif /* CONFIG_SMP */
void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out);
#if defined(CONFIG_HOTPLUG_CPU) && (CONFIG_SMP)
bool cpu_has_hotplug(unsigned int cpu);
#else
......
......@@ -4,7 +4,11 @@
#define _ASM_RISCV_SPARSEMEM_H
#ifdef CONFIG_SPARSEMEM
#define MAX_PHYSMEM_BITS CONFIG_PA_BITS
#ifdef CONFIG_64BIT
#define MAX_PHYSMEM_BITS 56
#else
#define MAX_PHYSMEM_BITS 34
#endif /* CONFIG_64BIT */
#define SECTION_SIZE_BITS 27
#endif /* CONFIG_SPARSEMEM */
......
......@@ -43,7 +43,8 @@ obj-$(CONFIG_FPU) += fpu.o
obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SMP) += cpu_ops.o
obj-$(CONFIG_SMP) += cpu_ops_spinwait.o
obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o
......
......@@ -12,6 +12,7 @@
#include <asm/kvm_host.h>
#include <asm/thread_info.h>
#include <asm/ptrace.h>
#include <asm/cpu_ops_sbi.h>
void asm_offsets(void);
......@@ -468,4 +469,6 @@ void asm_offsets(void)
DEFINE(PT_SIZE_ON_STACK, ALIGN(sizeof(struct pt_regs), STACK_ALIGN));
OFFSET(KERNEL_MAP_VIRT_ADDR, kernel_mapping, virt_addr);
OFFSET(SBI_HART_BOOT_TASK_PTR_OFFSET, sbi_hart_boot_data, task_ptr);
OFFSET(SBI_HART_BOOT_STACK_PTR_OFFSET, sbi_hart_boot_data, stack_ptr);
}
......@@ -7,6 +7,7 @@
#include <linux/seq_file.h>
#include <linux/of.h>
#include <asm/smp.h>
#include <asm/pgtable.h>
/*
* Returns the hart ID of the given device tree node, or -ENODEV if the node
......@@ -71,18 +72,19 @@ static void print_isa(struct seq_file *f, const char *isa)
seq_puts(f, "\n");
}
static void print_mmu(struct seq_file *f, const char *mmu_type)
static void print_mmu(struct seq_file *f)
{
char sv_type[16];
#if defined(CONFIG_32BIT)
if (strcmp(mmu_type, "riscv,sv32") != 0)
return;
strncpy(sv_type, "sv32", 5);
#elif defined(CONFIG_64BIT)
if (strcmp(mmu_type, "riscv,sv39") != 0 &&
strcmp(mmu_type, "riscv,sv48") != 0)
return;
if (pgtable_l4_enabled)
strncpy(sv_type, "sv48", 5);
else
strncpy(sv_type, "sv39", 5);
#endif
seq_printf(f, "mmu\t\t: %s\n", mmu_type+6);
seq_printf(f, "mmu\t\t: %s\n", sv_type);
}
static void *c_start(struct seq_file *m, loff_t *pos)
......@@ -107,14 +109,13 @@ static int c_show(struct seq_file *m, void *v)
{
unsigned long cpu_id = (unsigned long)v - 1;
struct device_node *node = of_get_cpu_node(cpu_id, NULL);
const char *compat, *isa, *mmu;
const char *compat, *isa;
seq_printf(m, "processor\t: %lu\n", cpu_id);
seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id));
if (!of_property_read_string(node, "riscv,isa", &isa))
print_isa(m, isa);
if (!of_property_read_string(node, "mmu-type", &mmu))
print_mmu(m, mmu);
print_mmu(m);
if (!of_property_read_string(node, "compatible", &compat)
&& strcmp(compat, "riscv"))
seq_printf(m, "uarch\t\t: %s\n", compat);
......
......@@ -8,37 +8,29 @@
#include <linux/of.h>
#include <linux/string.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <asm/cpu_ops.h>
#include <asm/sbi.h>
#include <asm/smp.h>
const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
void *__cpu_up_stack_pointer[NR_CPUS] __section(".data");
void *__cpu_up_task_pointer[NR_CPUS] __section(".data");
extern const struct cpu_operations cpu_ops_sbi;
#ifdef CONFIG_RISCV_BOOT_SPINWAIT
extern const struct cpu_operations cpu_ops_spinwait;
void cpu_update_secondary_bootdata(unsigned int cpuid,
struct task_struct *tidle)
{
int hartid = cpuid_to_hartid_map(cpuid);
/* Make sure tidle is updated */
smp_mb();
WRITE_ONCE(__cpu_up_stack_pointer[hartid],
task_stack_page(tidle) + THREAD_SIZE);
WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle);
}
#else
const struct cpu_operations cpu_ops_spinwait = {
.name = "",
.cpu_prepare = NULL,
.cpu_start = NULL,
};
#endif
void __init cpu_set_ops(int cpuid)
{
#if IS_ENABLED(CONFIG_RISCV_SBI)
if (sbi_probe_extension(SBI_EXT_HSM) > 0) {
if (!cpuid)
pr_info("SBI v0.2 HSM extension detected\n");
pr_info("SBI HSM extension detected\n");
cpu_ops[cpuid] = &cpu_ops_sbi;
} else
#endif
......
......@@ -7,13 +7,22 @@
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/sched/task_stack.h>
#include <asm/cpu_ops.h>
#include <asm/cpu_ops_sbi.h>
#include <asm/sbi.h>
#include <asm/smp.h>
extern char secondary_start_sbi[];
const struct cpu_operations cpu_ops_sbi;
/*
* Ordered booting via HSM brings one cpu at a time. However, cpu hotplug can
* be invoked from multiple threads in parallel. Define a per cpu data
* to handle that.
*/
DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data);
static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr,
unsigned long priv)
{
......@@ -55,14 +64,19 @@ static int sbi_hsm_hart_get_status(unsigned long hartid)
static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle)
{
int rc;
unsigned long boot_addr = __pa_symbol(secondary_start_sbi);
int hartid = cpuid_to_hartid_map(cpuid);
cpu_update_secondary_bootdata(cpuid, tidle);
rc = sbi_hsm_hart_start(hartid, boot_addr, 0);
return rc;
unsigned long hsm_data;
struct sbi_hart_boot_data *bdata = &per_cpu(boot_data, cpuid);
/* Make sure tidle is updated */
smp_mb();
bdata->task_ptr = tidle;
bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE;
/* Make sure boot data is updated */
smp_mb();
hsm_data = __pa(bdata);
return sbi_hsm_hart_start(hartid, boot_addr, hsm_data);
}
static int sbi_cpu_prepare(unsigned int cpuid)
......
......@@ -6,11 +6,36 @@
#include <linux/errno.h>
#include <linux/of.h>
#include <linux/string.h>
#include <linux/sched/task_stack.h>
#include <asm/cpu_ops.h>
#include <asm/sbi.h>
#include <asm/smp.h>
const struct cpu_operations cpu_ops_spinwait;
void *__cpu_spinwait_stack_pointer[NR_CPUS] __section(".data");
void *__cpu_spinwait_task_pointer[NR_CPUS] __section(".data");
static void cpu_update_secondary_bootdata(unsigned int cpuid,
struct task_struct *tidle)
{
int hartid = cpuid_to_hartid_map(cpuid);
/*
* The hartid must be less than NR_CPUS to avoid out-of-bound access
* errors for __cpu_spinwait_stack/task_pointer. That is not always possible
* for platforms with discontiguous hartid numbering scheme. That's why
* spinwait booting is not the recommended approach for any platforms
* booting Linux in S-mode and can be disabled in the future.
*/
if (hartid == INVALID_HARTID || hartid >= NR_CPUS)
return;
/* Make sure tidle is updated */
smp_mb();
WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid],
task_stack_page(tidle) + THREAD_SIZE);
WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle);
}
static int spinwait_cpu_prepare(unsigned int cpuid)
{
......@@ -28,7 +53,7 @@ static int spinwait_cpu_start(unsigned int cpuid, struct task_struct *tidle)
* selects the first cpu to boot the kernel and causes the remainder
* of the cpus to spin in a loop waiting for their stack pointer to be
* setup by that main cpu. Writing to bootdata
* (i.e __cpu_up_stack_pointer) signals to the spinning cpus that they
* (i.e __cpu_spinwait_stack_pointer) signals to the spinning cpus that they
* can continue the boot process.
*/
cpu_update_secondary_bootdata(cpuid, tidle);
......
......@@ -11,6 +11,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/csr.h>
#include <asm/cpu_ops_sbi.h>
#include <asm/hwcap.h>
#include <asm/image.h>
#include "efi-header.S"
......@@ -105,7 +106,8 @@ relocate:
/* Compute satp for kernel page tables, but don't load it yet */
srl a2, a0, PAGE_SHIFT
li a1, SATP_MODE
la a1, satp_mode
REG_L a1, 0(a1)
or a2, a2, a1
/*
......@@ -167,15 +169,15 @@ secondary_start_sbi:
la a3, .Lsecondary_park
csrw CSR_TVEC, a3
slli a3, a0, LGREG
la a4, __cpu_up_stack_pointer
XIP_FIXUP_OFFSET a4
la a5, __cpu_up_task_pointer
XIP_FIXUP_OFFSET a5
add a4, a3, a4
add a5, a3, a5
REG_L sp, (a4)
REG_L tp, (a5)
/* a0 contains the hartid & a1 contains boot data */
li a2, SBI_HART_BOOT_TASK_PTR_OFFSET
XIP_FIXUP_OFFSET a2
add a2, a2, a1
REG_L tp, (a2)
li a3, SBI_HART_BOOT_STACK_PTR_OFFSET
XIP_FIXUP_OFFSET a3
add a3, a3, a1
REG_L sp, (a3)
.Lsecondary_start_common:
......@@ -257,13 +259,13 @@ pmp_done:
li t0, SR_FS
csrc CSR_STATUS, t0
#ifdef CONFIG_SMP
#ifdef CONFIG_RISCV_BOOT_SPINWAIT
li t0, CONFIG_NR_CPUS
blt a0, t0, .Lgood_cores
tail .Lsecondary_park
.Lgood_cores:
#endif
/* The lottery system is only required for spinwait booting method */
#ifndef CONFIG_XIP_KERNEL
/* Pick one hart to run the main boot sequence */
la a3, hart_lottery
......@@ -282,6 +284,10 @@ pmp_done:
/* first time here if hart_lottery in RAM is not set */
beq t0, t1, .Lsecondary_start
#endif /* CONFIG_XIP */
#endif /* CONFIG_RISCV_BOOT_SPINWAIT */
#ifdef CONFIG_XIP_KERNEL
la sp, _end + THREAD_SIZE
XIP_FIXUP_OFFSET sp
mv s0, a0
......@@ -338,16 +344,16 @@ clear_bss_done:
call soc_early_init
tail start_kernel
#if CONFIG_RISCV_BOOT_SPINWAIT
.Lsecondary_start:
#ifdef CONFIG_SMP
/* Set trap vector to spin forever to help debug */
la a3, .Lsecondary_park
csrw CSR_TVEC, a3
slli a3, a0, LGREG
la a1, __cpu_up_stack_pointer
la a1, __cpu_spinwait_stack_pointer
XIP_FIXUP_OFFSET a1
la a2, __cpu_up_task_pointer
la a2, __cpu_spinwait_task_pointer
XIP_FIXUP_OFFSET a2
add a1, a3, a1
add a2, a3, a2
......@@ -365,7 +371,7 @@ clear_bss_done:
fence
tail .Lsecondary_start_common
#endif
#endif /* CONFIG_RISCV_BOOT_SPINWAIT */
END(_start_kernel)
......
......@@ -16,7 +16,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa);
asmlinkage void __init __copy_data(void);
#endif
extern void *__cpu_up_stack_pointer[];
extern void *__cpu_up_task_pointer[];
#ifdef CONFIG_RISCV_BOOT_SPINWAIT
extern void *__cpu_spinwait_stack_pointer[];
extern void *__cpu_spinwait_task_pointer[];
#endif
#endif /* __ASM_HEAD_H */
......@@ -42,12 +42,10 @@ static int riscv_gpr_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
int ret;
struct pt_regs *regs;
regs = task_pt_regs(target);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1);
return ret;
return user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1);
}
#ifdef CONFIG_FPU
......
......@@ -16,8 +16,8 @@ unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
EXPORT_SYMBOL(sbi_spec_version);
static void (*__sbi_set_timer)(uint64_t stime) __ro_after_init;
static int (*__sbi_send_ipi)(const unsigned long *hart_mask) __ro_after_init;
static int (*__sbi_rfence)(int fid, const unsigned long *hart_mask,
static int (*__sbi_send_ipi)(const struct cpumask *cpu_mask) __ro_after_init;
static int (*__sbi_rfence)(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5) __ro_after_init;
......@@ -67,6 +67,30 @@ int sbi_err_map_linux_errno(int err)
EXPORT_SYMBOL(sbi_err_map_linux_errno);
#ifdef CONFIG_RISCV_SBI_V01
static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mask)
{
unsigned long cpuid, hartid;
unsigned long hmask = 0;
/*
* There is no maximum hartid concept in RISC-V and NR_CPUS must not be
* associated with hartid. As SBI v0.1 is only kept for backward compatibility
* and will be removed in the future, there is no point in supporting hartid
* greater than BITS_PER_LONG (32 for RV32 and 64 for RV64). Ideally, SBI v0.2
* should be used for platforms with hartid greater than BITS_PER_LONG.
*/
for_each_cpu(cpuid, cpu_mask) {
hartid = cpuid_to_hartid_map(cpuid);
if (hartid >= BITS_PER_LONG) {
pr_warn("Unable to send any request to hartid > BITS_PER_LONG for SBI v0.1\n");
break;
}
hmask |= 1 << hartid;
}
return hmask;
}
/**
* sbi_console_putchar() - Writes given character to the console device.
* @ch: The data to be written to the console.
......@@ -132,33 +156,44 @@ static void __sbi_set_timer_v01(uint64_t stime_value)
#endif
}
static int __sbi_send_ipi_v01(const unsigned long *hart_mask)
static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask)
{
sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)hart_mask,
unsigned long hart_mask;
if (!cpu_mask)
cpu_mask = cpu_online_mask;
hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask);
sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)(&hart_mask),
0, 0, 0, 0, 0);
return 0;
}
static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask,
static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5)
{
int result = 0;
unsigned long hart_mask;
if (!cpu_mask)
cpu_mask = cpu_online_mask;
hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask);
/* v0.2 function IDs are equivalent to v0.1 extension IDs */
switch (fid) {
case SBI_EXT_RFENCE_REMOTE_FENCE_I:
sbi_ecall(SBI_EXT_0_1_REMOTE_FENCE_I, 0,
(unsigned long)hart_mask, 0, 0, 0, 0, 0);
(unsigned long)&hart_mask, 0, 0, 0, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA, 0,
(unsigned long)hart_mask, start, size,
(unsigned long)&hart_mask, start, size,
0, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID, 0,
(unsigned long)hart_mask, start, size,
(unsigned long)&hart_mask, start, size,
arg4, 0, 0);
break;
default:
......@@ -180,7 +215,7 @@ static void __sbi_set_timer_v01(uint64_t stime_value)
sbi_major_version(), sbi_minor_version());
}
static int __sbi_send_ipi_v01(const unsigned long *hart_mask)
static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask)
{
pr_warn("IPI extension is not available in SBI v%lu.%lu\n",
sbi_major_version(), sbi_minor_version());
......@@ -188,7 +223,7 @@ static int __sbi_send_ipi_v01(const unsigned long *hart_mask)
return 0;
}
static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask,
static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5)
{
......@@ -212,37 +247,33 @@ static void __sbi_set_timer_v02(uint64_t stime_value)
#endif
}
static int __sbi_send_ipi_v02(const unsigned long *hart_mask)
static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask)
{
unsigned long hartid, hmask_val, hbase;
struct cpumask tmask;
unsigned long hartid, cpuid, hmask = 0, hbase = 0;
struct sbiret ret = {0};
int result;
if (!hart_mask || !(*hart_mask)) {
riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask);
hart_mask = cpumask_bits(&tmask);
}
if (!cpu_mask)
cpu_mask = cpu_online_mask;
hmask_val = 0;
hbase = 0;
for_each_set_bit(hartid, hart_mask, NR_CPUS) {
if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) {
for_each_cpu(cpuid, cpu_mask) {
hartid = cpuid_to_hartid_map(cpuid);
if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) {
ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI,
hmask_val, hbase, 0, 0, 0, 0);
hmask, hbase, 0, 0, 0, 0);
if (ret.error)
goto ecall_failed;
hmask_val = 0;
hmask = 0;
hbase = 0;
}
if (!hmask_val)
if (!hmask)
hbase = hartid;
hmask_val |= 1UL << (hartid - hbase);
hmask |= 1UL << (hartid - hbase);
}
if (hmask_val) {
if (hmask) {
ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI,
hmask_val, hbase, 0, 0, 0, 0);
hmask, hbase, 0, 0, 0, 0);
if (ret.error)
goto ecall_failed;
}
......@@ -252,11 +283,11 @@ static int __sbi_send_ipi_v02(const unsigned long *hart_mask)
ecall_failed:
result = sbi_err_map_linux_errno(ret.error);
pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n",
__func__, hbase, hmask_val, result);
__func__, hbase, hmask, result);
return result;
}
static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val,
static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask,
unsigned long hbase, unsigned long start,
unsigned long size, unsigned long arg4,
unsigned long arg5)
......@@ -267,31 +298,31 @@ static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val,
switch (fid) {
case SBI_EXT_RFENCE_REMOTE_FENCE_I:
ret = sbi_ecall(ext, fid, hmask_val, hbase, 0, 0, 0, 0);
ret = sbi_ecall(ext, fid, hmask, hbase, 0, 0, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, arg4, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID:
ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, arg4, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA:
ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, 0, 0);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
ret = sbi_ecall(ext, fid, hmask_val, hbase, start,
ret = sbi_ecall(ext, fid, hmask, hbase, start,
size, arg4, 0);
break;
default:
......@@ -303,43 +334,39 @@ static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val,
if (ret.error) {
result = sbi_err_map_linux_errno(ret.error);
pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n",
__func__, hbase, hmask_val, result);
__func__, hbase, hmask, result);
}
return result;
}
static int __sbi_rfence_v02(int fid, const unsigned long *hart_mask,
static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5)
{
unsigned long hmask_val, hartid, hbase;
struct cpumask tmask;
unsigned long hartid, cpuid, hmask = 0, hbase = 0;
int result;
if (!hart_mask || !(*hart_mask)) {
riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask);
hart_mask = cpumask_bits(&tmask);
}
if (!cpu_mask)
cpu_mask = cpu_online_mask;
hmask_val = 0;
hbase = 0;
for_each_set_bit(hartid, hart_mask, NR_CPUS) {
if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) {
result = __sbi_rfence_v02_call(fid, hmask_val, hbase,
for_each_cpu(cpuid, cpu_mask) {
hartid = cpuid_to_hartid_map(cpuid);
if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) {
result = __sbi_rfence_v02_call(fid, hmask, hbase,
start, size, arg4, arg5);
if (result)
return result;
hmask_val = 0;
hmask = 0;
hbase = 0;
}
if (!hmask_val)
if (!hmask)
hbase = hartid;
hmask_val |= 1UL << (hartid - hbase);
hmask |= 1UL << (hartid - hbase);
}
if (hmask_val) {
result = __sbi_rfence_v02_call(fid, hmask_val, hbase,
if (hmask) {
result = __sbi_rfence_v02_call(fid, hmask, hbase,
start, size, arg4, arg5);
if (result)
return result;
......@@ -361,44 +388,44 @@ void sbi_set_timer(uint64_t stime_value)
/**
* sbi_send_ipi() - Send an IPI to any hart.
* @hart_mask: A cpu mask containing all the target harts.
* @cpu_mask: A cpu mask containing all the target harts.
*
* Return: 0 on success, appropriate linux error code otherwise.
*/
int sbi_send_ipi(const unsigned long *hart_mask)
int sbi_send_ipi(const struct cpumask *cpu_mask)
{
return __sbi_send_ipi(hart_mask);
return __sbi_send_ipi(cpu_mask);
}
EXPORT_SYMBOL(sbi_send_ipi);
/**
* sbi_remote_fence_i() - Execute FENCE.I instruction on given remote harts.
* @hart_mask: A cpu mask containing all the target harts.
* @cpu_mask: A cpu mask containing all the target harts.
*
* Return: 0 on success, appropriate linux error code otherwise.
*/
int sbi_remote_fence_i(const unsigned long *hart_mask)
int sbi_remote_fence_i(const struct cpumask *cpu_mask)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_FENCE_I,
hart_mask, 0, 0, 0, 0);
cpu_mask, 0, 0, 0, 0);
}
EXPORT_SYMBOL(sbi_remote_fence_i);
/**
* sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote
* harts for the specified virtual address range.
* @hart_mask: A cpu mask containing all the target harts.
* @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the virtual address
* @size: Total size of the virtual address range.
*
* Return: 0 on success, appropriate linux error code otherwise.
*/
int sbi_remote_sfence_vma(const unsigned long *hart_mask,
int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
hart_mask, start, size, 0, 0);
cpu_mask, start, size, 0, 0);
}
EXPORT_SYMBOL(sbi_remote_sfence_vma);
......@@ -406,38 +433,38 @@ EXPORT_SYMBOL(sbi_remote_sfence_vma);
* sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given
* remote harts for a virtual address range belonging to a specific ASID.
*
* @hart_mask: A cpu mask containing all the target harts.
* @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the virtual address
* @size: Total size of the virtual address range.
* @asid: The value of address space identifier (ASID).
*
* Return: 0 on success, appropriate linux error code otherwise.
*/
int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
hart_mask, start, size, asid, 0);
cpu_mask, start, size, asid, 0);
}
EXPORT_SYMBOL(sbi_remote_sfence_vma_asid);
/**
* sbi_remote_hfence_gvma() - Execute HFENCE.GVMA instructions on given remote
* harts for the specified guest physical address range.
* @hart_mask: A cpu mask containing all the target harts.
* @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the guest physical address
* @size: Total size of the guest physical address range.
*
* Return: None
*/
int sbi_remote_hfence_gvma(const unsigned long *hart_mask,
int sbi_remote_hfence_gvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA,
hart_mask, start, size, 0, 0);
cpu_mask, start, size, 0, 0);
}
EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma);
......@@ -445,38 +472,38 @@ EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma);
* sbi_remote_hfence_gvma_vmid() - Execute HFENCE.GVMA instructions on given
* remote harts for a guest physical address range belonging to a specific VMID.
*
* @hart_mask: A cpu mask containing all the target harts.
* @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the guest physical address
* @size: Total size of the guest physical address range.
* @vmid: The value of guest ID (VMID).
*
* Return: 0 if success, Error otherwise.
*/
int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask,
int sbi_remote_hfence_gvma_vmid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long vmid)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID,
hart_mask, start, size, vmid, 0);
cpu_mask, start, size, vmid, 0);
}
EXPORT_SYMBOL(sbi_remote_hfence_gvma_vmid);
/**
* sbi_remote_hfence_vvma() - Execute HFENCE.VVMA instructions on given remote
* harts for the current guest virtual address range.
* @hart_mask: A cpu mask containing all the target harts.
* @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the current guest virtual address
* @size: Total size of the current guest virtual address range.
*
* Return: None
*/
int sbi_remote_hfence_vvma(const unsigned long *hart_mask,
int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA,
hart_mask, start, size, 0, 0);
cpu_mask, start, size, 0, 0);
}
EXPORT_SYMBOL(sbi_remote_hfence_vvma);
......@@ -485,20 +512,20 @@ EXPORT_SYMBOL(sbi_remote_hfence_vvma);
* remote harts for current guest virtual address range belonging to a specific
* ASID.
*
* @hart_mask: A cpu mask containing all the target harts.
* @cpu_mask: A cpu mask containing all the target harts.
* @start: Start of the current guest virtual address
* @size: Total size of the current guest virtual address range.
* @asid: The value of address space identifier (ASID).
*
* Return: None
*/
int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask,
int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask,
unsigned long start,
unsigned long size,
unsigned long asid)
{
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID,
hart_mask, start, size, asid, 0);
cpu_mask, start, size, asid, 0);
}
EXPORT_SYMBOL(sbi_remote_hfence_vvma_asid);
......@@ -591,11 +618,7 @@ long sbi_get_mimpid(void)
static void sbi_send_cpumask_ipi(const struct cpumask *target)
{
struct cpumask hartid_mask;
riscv_cpuid_to_hartid_mask(target, &hartid_mask);
sbi_send_ipi(cpumask_bits(&hartid_mask));
sbi_send_ipi(target);
}
static const struct riscv_ipi_ops sbi_ipi_ops = {
......
......@@ -59,16 +59,6 @@ atomic_t hart_lottery __section(".sdata")
unsigned long boot_cpu_hartid;
static DEFINE_PER_CPU(struct cpu, cpu_devices);
void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out)
{
int cpu;
cpumask_clear(out);
for_each_cpu(cpu, in)
cpumask_set_cpu(cpuid_to_hartid_map(cpu), out);
}
EXPORT_SYMBOL_GPL(riscv_cpuid_to_hartid_mask);
/*
* Place kernel memory regions on the resource tree so that
* kexec-tools can retrieve them from /proc/iomem. While there
......
......@@ -96,7 +96,7 @@ void __init setup_smp(void)
if (cpuid >= NR_CPUS) {
pr_warn("Invalid cpuid [%d] for hartid [%d]\n",
cpuid, hart);
break;
continue;
}
cpuid_to_hartid_map(cpuid) = hart;
......
......@@ -114,7 +114,6 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr)
{
struct cpumask hmask;
unsigned long size = PAGE_SIZE;
struct kvm_vmid *vmid = &kvm->arch.vmid;
......@@ -127,8 +126,7 @@ static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr)
* where the Guest/VM is running.
*/
preempt_disable();
riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask);
sbi_remote_hfence_gvma_vmid(cpumask_bits(&hmask), addr, size,
sbi_remote_hfence_gvma_vmid(cpu_online_mask, addr, size,
READ_ONCE(vmid->vmid));
preempt_enable();
}
......
......@@ -82,7 +82,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
{
int ret = 0;
unsigned long i;
struct cpumask cm, hm;
struct cpumask cm;
struct kvm_vcpu *tmp;
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
unsigned long hmask = cp->a0;
......@@ -90,7 +90,6 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
unsigned long funcid = cp->a6;
cpumask_clear(&cm);
cpumask_clear(&hm);
kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
if (hbase != -1UL) {
if (tmp->vcpu_id < hbase)
......@@ -103,17 +102,15 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
cpumask_set_cpu(tmp->cpu, &cm);
}
riscv_cpuid_to_hartid_mask(&cm, &hm);
switch (funcid) {
case SBI_EXT_RFENCE_REMOTE_FENCE_I:
ret = sbi_remote_fence_i(cpumask_bits(&hm));
ret = sbi_remote_fence_i(&cm);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
ret = sbi_remote_hfence_vvma(cpumask_bits(&hm), cp->a2, cp->a3);
ret = sbi_remote_hfence_vvma(&cm, cp->a2, cp->a3);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
ret = sbi_remote_hfence_vvma_asid(cpumask_bits(&hm), cp->a2,
ret = sbi_remote_hfence_vvma_asid(&cm, cp->a2,
cp->a3, cp->a4);
break;
case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
......
......@@ -38,7 +38,7 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
int i, ret = 0;
u64 next_cycle;
struct kvm_vcpu *rvcpu;
struct cpumask cm, hm;
struct cpumask cm;
struct kvm *kvm = vcpu->kvm;
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
......@@ -101,15 +101,12 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
continue;
cpumask_set_cpu(rvcpu->cpu, &cm);
}
riscv_cpuid_to_hartid_mask(&cm, &hm);
if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I)
ret = sbi_remote_fence_i(cpumask_bits(&hm));
ret = sbi_remote_fence_i(&cm);
else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA)
ret = sbi_remote_hfence_vvma(cpumask_bits(&hm),
cp->a1, cp->a2);
ret = sbi_remote_hfence_vvma(&cm, cp->a1, cp->a2);
else
ret = sbi_remote_hfence_vvma_asid(cpumask_bits(&hm),
cp->a1, cp->a2, cp->a3);
ret = sbi_remote_hfence_vvma_asid(&cm, cp->a1, cp->a2, cp->a3);
break;
default:
ret = -EINVAL;
......
......@@ -67,7 +67,6 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
{
unsigned long i;
struct kvm_vcpu *v;
struct cpumask hmask;
struct kvm_vmid *vmid = &vcpu->kvm->arch.vmid;
if (!kvm_riscv_stage2_vmid_ver_changed(vmid))
......@@ -102,8 +101,7 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
* running, we force VM exits on all host CPUs using IPI and
* flush all Guest TLBs.
*/
riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask);
sbi_remote_hfence_gvma(cpumask_bits(&hmask), 0, 0);
sbi_remote_hfence_gvma(cpu_online_mask, 0, 0);
}
vmid->vmid = vmid_next;
......
......@@ -67,10 +67,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
*/
smp_mb();
} else if (IS_ENABLED(CONFIG_RISCV_SBI)) {
cpumask_t hartid_mask;
riscv_cpuid_to_hartid_mask(&others, &hartid_mask);
sbi_remote_fence_i(cpumask_bits(&hartid_mask));
sbi_remote_fence_i(&others);
} else {
on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1);
}
......
......@@ -192,7 +192,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu)
switch_mm_fast:
csr_write(CSR_SATP, virt_to_pfn(mm->pgd) |
((cntx & asid_mask) << SATP_ASID_SHIFT) |
SATP_MODE);
satp_mode);
if (need_flush_tlb)
local_flush_tlb_all();
......@@ -201,7 +201,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu)
static void set_mm_noasid(struct mm_struct *mm)
{
/* Switch the page table and blindly nuke entire local TLB */
csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | SATP_MODE);
csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | satp_mode);
local_flush_tlb_all();
}
......
......@@ -37,13 +37,19 @@ EXPORT_SYMBOL(kernel_map);
#define kernel_map (*(struct kernel_mapping *)XIP_FIXUP(&kernel_map))
#endif
#ifdef CONFIG_64BIT
u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_48 : SATP_MODE_39;
#else
u64 satp_mode = SATP_MODE_32;
#endif
EXPORT_SYMBOL(satp_mode);
bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
EXPORT_SYMBOL(pgtable_l4_enabled);
phys_addr_t phys_ram_base __ro_after_init;
EXPORT_SYMBOL(phys_ram_base);
#ifdef CONFIG_XIP_KERNEL
extern char _xiprom[], _exiprom[], __data_loc;
#endif
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
__page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
......@@ -53,15 +59,6 @@ extern char _start[];
void *_dtb_early_va __initdata;
uintptr_t _dtb_early_pa __initdata;
struct pt_alloc_ops {
pte_t *(*get_pte_virt)(phys_addr_t pa);
phys_addr_t (*alloc_pte)(uintptr_t va);
#ifndef __PAGETABLE_PMD_FOLDED
pmd_t *(*get_pmd_virt)(phys_addr_t pa);
phys_addr_t (*alloc_pmd)(uintptr_t va);
#endif
};
static phys_addr_t dma32_phys_limit __initdata;
static void __init zone_sizes_init(void)
......@@ -102,10 +99,14 @@ static void __init print_vm_layout(void)
(unsigned long)VMALLOC_END);
print_mlm("lowmem", (unsigned long)PAGE_OFFSET,
(unsigned long)high_memory);
#ifdef CONFIG_64BIT
if (IS_ENABLED(CONFIG_64BIT)) {
#ifdef CONFIG_KASAN
print_mlm("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END);
#endif
print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR,
(unsigned long)ADDRESS_SPACE_END);
#endif
}
}
#else
static void print_vm_layout(void) { }
......@@ -130,18 +131,8 @@ void __init mem_init(void)
print_vm_layout();
}
/*
* The default maximal physical memory size is -PAGE_OFFSET for 32-bit kernel,
* whereas for 64-bit kernel, the end of the virtual address space is occupied
* by the modules/BPF/kernel mappings which reduces the available size of the
* linear mapping.
* Limit the memory size via mem.
*/
#ifdef CONFIG_64BIT
static phys_addr_t memory_limit = -PAGE_OFFSET - SZ_4G;
#else
static phys_addr_t memory_limit = -PAGE_OFFSET;
#endif
/* Limit the memory size via mem. */
static phys_addr_t memory_limit;
static int __init early_mem(char *p)
{
......@@ -162,35 +153,31 @@ early_param("mem", early_mem);
static void __init setup_bootmem(void)
{
phys_addr_t vmlinux_end = __pa_symbol(&_end);
phys_addr_t vmlinux_start = __pa_symbol(&_start);
phys_addr_t __maybe_unused max_mapped_addr;
phys_addr_t phys_ram_end;
phys_addr_t max_mapped_addr;
phys_addr_t phys_ram_end, vmlinux_start;
#ifdef CONFIG_XIP_KERNEL
if (IS_ENABLED(CONFIG_XIP_KERNEL))
vmlinux_start = __pa_symbol(&_sdata);
#endif
else
vmlinux_start = __pa_symbol(&_start);
memblock_enforce_memory_limit(memory_limit);
/*
* Reserve from the start of the kernel to the end of the kernel
*/
#if defined(CONFIG_64BIT) && defined(CONFIG_STRICT_KERNEL_RWX)
/*
* Make sure we align the reservation on PMD_SIZE since we will
* map the kernel in the linear mapping as read-only: we do not want
* any allocation to happen between _end and the next pmd aligned page.
*/
if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK;
#endif
/*
* Reserve from the start of the kernel to the end of the kernel
*/
memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
phys_ram_end = memblock_end_of_DRAM();
#ifndef CONFIG_XIP_KERNEL
if (!IS_ENABLED(CONFIG_XIP_KERNEL))
phys_ram_base = memblock_start_of_DRAM();
#endif
#ifndef CONFIG_64BIT
/*
* memblock allocator is not aware of the fact that last 4K bytes of
* the addressable memory can not be mapped because of IS_ERR_VALUE
......@@ -200,10 +187,11 @@ static void __init setup_bootmem(void)
* address space is occupied by the kernel mapping then this check must
* be done as soon as the kernel mapping base address is determined.
*/
if (!IS_ENABLED(CONFIG_64BIT)) {
max_mapped_addr = __pa(~(ulong)0);
if (max_mapped_addr == (phys_ram_end - 1))
memblock_set_current_limit(max_mapped_addr - 4096);
#endif
}
min_low_pfn = PFN_UP(phys_ram_base);
max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end);
......@@ -229,13 +217,7 @@ static void __init setup_bootmem(void)
}
#ifdef CONFIG_MMU
static struct pt_alloc_ops _pt_ops __initdata;
#ifdef CONFIG_XIP_KERNEL
#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&_pt_ops))
#else
#define pt_ops _pt_ops
#endif
struct pt_alloc_ops pt_ops __initdata;
unsigned long riscv_pfn_base __ro_after_init;
EXPORT_SYMBOL(riscv_pfn_base);
......@@ -245,9 +227,11 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
#ifdef CONFIG_XIP_KERNEL
#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
#define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
#define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte))
#define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir))
......@@ -333,6 +317,16 @@ static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
#define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd))
#endif /* CONFIG_XIP_KERNEL */
static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss;
static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss;
static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
#ifdef CONFIG_XIP_KERNEL
#define trampoline_pud ((pud_t *)XIP_FIXUP(trampoline_pud))
#define fixmap_pud ((pud_t *)XIP_FIXUP(fixmap_pud))
#define early_pud ((pud_t *)XIP_FIXUP(early_pud))
#endif /* CONFIG_XIP_KERNEL */
static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)
{
/* Before MMU is enabled */
......@@ -352,7 +346,7 @@ static pmd_t *__init get_pmd_virt_late(phys_addr_t pa)
static phys_addr_t __init alloc_pmd_early(uintptr_t va)
{
BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT);
return (uintptr_t)early_pmd;
}
......@@ -399,21 +393,97 @@ static void __init create_pmd_mapping(pmd_t *pmdp,
create_pte_mapping(ptep, va, pa, sz, prot);
}
#define pgd_next_t pmd_t
#define alloc_pgd_next(__va) pt_ops.alloc_pmd(__va)
#define get_pgd_next_virt(__pa) pt_ops.get_pmd_virt(__pa)
static pud_t *__init get_pud_virt_early(phys_addr_t pa)
{
return (pud_t *)((uintptr_t)pa);
}
static pud_t *__init get_pud_virt_fixmap(phys_addr_t pa)
{
clear_fixmap(FIX_PUD);
return (pud_t *)set_fixmap_offset(FIX_PUD, pa);
}
static pud_t *__init get_pud_virt_late(phys_addr_t pa)
{
return (pud_t *)__va(pa);
}
static phys_addr_t __init alloc_pud_early(uintptr_t va)
{
/* Only one PUD is available for early mapping */
BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
return (uintptr_t)early_pud;
}
static phys_addr_t __init alloc_pud_fixmap(uintptr_t va)
{
return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
}
static phys_addr_t alloc_pud_late(uintptr_t va)
{
unsigned long vaddr;
vaddr = __get_free_page(GFP_KERNEL);
BUG_ON(!vaddr);
return __pa(vaddr);
}
static void __init create_pud_mapping(pud_t *pudp,
uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot)
{
pmd_t *nextp;
phys_addr_t next_phys;
uintptr_t pud_index = pud_index(va);
if (sz == PUD_SIZE) {
if (pud_val(pudp[pud_index]) == 0)
pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot);
return;
}
if (pud_val(pudp[pud_index]) == 0) {
next_phys = pt_ops.alloc_pmd(va);
pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE);
nextp = pt_ops.get_pmd_virt(next_phys);
memset(nextp, 0, PAGE_SIZE);
} else {
next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index]));
nextp = pt_ops.get_pmd_virt(next_phys);
}
create_pmd_mapping(nextp, va, pa, sz, prot);
}
#define pgd_next_t pud_t
#define alloc_pgd_next(__va) (pgtable_l4_enabled ? \
pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va))
#define get_pgd_next_virt(__pa) (pgtable_l4_enabled ? \
pt_ops.get_pud_virt(__pa) : (pgd_next_t *)pt_ops.get_pmd_virt(__pa))
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pmd_mapping(__nextp, __va, __pa, __sz, __prot)
#define fixmap_pgd_next fixmap_pmd
(pgtable_l4_enabled ? \
create_pud_mapping(__nextp, __va, __pa, __sz, __prot) : \
create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot))
#define fixmap_pgd_next (pgtable_l4_enabled ? \
(uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)
#define trampoline_pgd_next (pgtable_l4_enabled ? \
(uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)
#define early_dtb_pgd_next (pgtable_l4_enabled ? \
(uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)
#else
#define pgd_next_t pte_t
#define alloc_pgd_next(__va) pt_ops.alloc_pte(__va)
#define get_pgd_next_virt(__pa) pt_ops.get_pte_virt(__pa)
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
#define fixmap_pgd_next fixmap_pte
#define fixmap_pgd_next ((uintptr_t)fixmap_pte)
#define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd)
#define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot)
#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot)
#endif
#endif /* __PAGETABLE_PMD_FOLDED */
void __init create_pgd_mapping(pgd_t *pgdp,
uintptr_t va, phys_addr_t pa,
......@@ -452,6 +522,8 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
}
#ifdef CONFIG_XIP_KERNEL
extern char _xiprom[], _exiprom[], __data_loc;
/* called from head.S with MMU off */
asmlinkage void __init __copy_data(void)
{
......@@ -500,6 +572,57 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
}
#endif /* CONFIG_STRICT_KERNEL_RWX */
#ifdef CONFIG_64BIT
static void __init disable_pgtable_l4(void)
{
pgtable_l4_enabled = false;
kernel_map.page_offset = PAGE_OFFSET_L3;
satp_mode = SATP_MODE_39;
}
/*
* There is a simple way to determine if 4-level is supported by the
* underlying hardware: establish 1:1 mapping in 4-level page table mode
* then read SATP to see if the configuration was taken into account
* meaning sv48 is supported.
*/
static __init void set_satp_mode(void)
{
u64 identity_satp, hw_satp;
uintptr_t set_satp_mode_pmd;
set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
create_pgd_mapping(early_pg_dir,
set_satp_mode_pmd, (uintptr_t)early_pud,
PGDIR_SIZE, PAGE_TABLE);
create_pud_mapping(early_pud,
set_satp_mode_pmd, (uintptr_t)early_pmd,
PUD_SIZE, PAGE_TABLE);
/* Handle the case where set_satp_mode straddles 2 PMDs */
create_pmd_mapping(early_pmd,
set_satp_mode_pmd, set_satp_mode_pmd,
PMD_SIZE, PAGE_KERNEL_EXEC);
create_pmd_mapping(early_pmd,
set_satp_mode_pmd + PMD_SIZE,
set_satp_mode_pmd + PMD_SIZE,
PMD_SIZE, PAGE_KERNEL_EXEC);
identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode;
local_flush_tlb_all();
csr_write(CSR_SATP, identity_satp);
hw_satp = csr_swap(CSR_SATP, 0ULL);
local_flush_tlb_all();
if (hw_satp != identity_satp)
disable_pgtable_l4();
memset(early_pg_dir, 0, PAGE_SIZE);
memset(early_pud, 0, PAGE_SIZE);
memset(early_pmd, 0, PAGE_SIZE);
}
#endif
/*
* setup_vm() is called from head.S with MMU-off.
*
......@@ -564,10 +687,15 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);
create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
IS_ENABLED(CONFIG_64BIT) ? (uintptr_t)early_dtb_pmd : pa,
IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa,
PGDIR_SIZE,
IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
if (pgtable_l4_enabled) {
create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
(uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
}
if (IS_ENABLED(CONFIG_64BIT)) {
create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
pa, PMD_SIZE, PAGE_KERNEL);
......@@ -589,11 +717,64 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
dtb_early_pa = dtb_pa;
}
/*
* MMU is not enabled, the page tables are allocated directly using
* early_pmd/pud/p4d and the address returned is the physical one.
*/
void __init pt_ops_set_early(void)
{
pt_ops.alloc_pte = alloc_pte_early;
pt_ops.get_pte_virt = get_pte_virt_early;
#ifndef __PAGETABLE_PMD_FOLDED
pt_ops.alloc_pmd = alloc_pmd_early;
pt_ops.get_pmd_virt = get_pmd_virt_early;
pt_ops.alloc_pud = alloc_pud_early;
pt_ops.get_pud_virt = get_pud_virt_early;
#endif
}
/*
* MMU is enabled but page table setup is not complete yet.
* fixmap page table alloc functions must be used as a means to temporarily
* map the allocated physical pages since the linear mapping does not exist yet.
*
* Note that this is called with MMU disabled, hence kernel_mapping_pa_to_va,
* but it will be used as described above.
*/
void __init pt_ops_set_fixmap(void)
{
pt_ops.alloc_pte = kernel_mapping_pa_to_va((uintptr_t)alloc_pte_fixmap);
pt_ops.get_pte_virt = kernel_mapping_pa_to_va((uintptr_t)get_pte_virt_fixmap);
#ifndef __PAGETABLE_PMD_FOLDED
pt_ops.alloc_pmd = kernel_mapping_pa_to_va((uintptr_t)alloc_pmd_fixmap);
pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap);
pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap);
pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap);
#endif
}
/*
* MMU is enabled and page table setup is complete, so from now, we can use
* generic page allocation functions to setup page table.
*/
void __init pt_ops_set_late(void)
{
pt_ops.alloc_pte = alloc_pte_late;
pt_ops.get_pte_virt = get_pte_virt_late;
#ifndef __PAGETABLE_PMD_FOLDED
pt_ops.alloc_pmd = alloc_pmd_late;
pt_ops.get_pmd_virt = get_pmd_virt_late;
pt_ops.alloc_pud = alloc_pud_late;
pt_ops.get_pud_virt = get_pud_virt_late;
#endif
}
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd;
kernel_map.virt_addr = KERNEL_LINK_ADDR;
kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
#ifdef CONFIG_XIP_KERNEL
kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR;
......@@ -608,11 +789,24 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
kernel_map.phys_addr = (uintptr_t)(&_start);
kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr;
#endif
#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
set_satp_mode();
#endif
kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr;
kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr);
/*
* The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit
* kernel, whereas for 64-bit kernel, the end of the virtual address
* space is occupied by the modules/BPF/kernel mappings which reduces
* the available size of the linear mapping.
*/
memory_limit = KERN_VIRT_SIZE - (IS_ENABLED(CONFIG_64BIT) ? SZ_4G : 0);
/* Sanity check alignment and size */
BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0);
......@@ -625,23 +819,25 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K);
#endif
pt_ops.alloc_pte = alloc_pte_early;
pt_ops.get_pte_virt = get_pte_virt_early;
#ifndef __PAGETABLE_PMD_FOLDED
pt_ops.alloc_pmd = alloc_pmd_early;
pt_ops.get_pmd_virt = get_pmd_virt_early;
#endif
pt_ops_set_early();
/* Setup early PGD for fixmap */
create_pgd_mapping(early_pg_dir, FIXADDR_START,
(uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
#ifndef __PAGETABLE_PMD_FOLDED
/* Setup fixmap PMD */
/* Setup fixmap PUD and PMD */
if (pgtable_l4_enabled)
create_pud_mapping(fixmap_pud, FIXADDR_START,
(uintptr_t)fixmap_pmd, PUD_SIZE, PAGE_TABLE);
create_pmd_mapping(fixmap_pmd, FIXADDR_START,
(uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
/* Setup trampoline PGD and PMD */
create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
(uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE);
trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
if (pgtable_l4_enabled)
create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
(uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
#ifdef CONFIG_XIP_KERNEL
create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr,
kernel_map.xiprom, PMD_SIZE, PAGE_KERNEL_EXEC);
......@@ -669,7 +865,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
* Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
* range can not span multiple pmds.
*/
BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
!= (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
#ifndef __PAGETABLE_PMD_FOLDED
......@@ -694,6 +890,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN);
}
#endif
pt_ops_set_fixmap();
}
static void __init setup_vm_final(void)
......@@ -702,16 +900,6 @@ static void __init setup_vm_final(void)
phys_addr_t pa, start, end;
u64 i;
/**
* MMU is enabled at this point. But page table setup is not complete yet.
* fixmap page table alloc functions should be used at this point
*/
pt_ops.alloc_pte = alloc_pte_fixmap;
pt_ops.get_pte_virt = get_pte_virt_fixmap;
#ifndef __PAGETABLE_PMD_FOLDED
pt_ops.alloc_pmd = alloc_pmd_fixmap;
pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
#endif
/* Setup swapper PGD for fixmap */
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
__pa_symbol(fixmap_pgd_next),
......@@ -736,26 +924,24 @@ static void __init setup_vm_final(void)
}
}
#ifdef CONFIG_64BIT
/* Map the kernel */
if (IS_ENABLED(CONFIG_64BIT))
create_kernel_page_table(swapper_pg_dir, false);
#ifdef CONFIG_KASAN
kasan_swapper_init();
#endif
/* Clear fixmap PTE and PMD mappings */
clear_fixmap(FIX_PTE);
clear_fixmap(FIX_PMD);
clear_fixmap(FIX_PUD);
/* Move to swapper page table */
csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode);
local_flush_tlb_all();
/* generic page allocation functions must be used to setup page table */
pt_ops.alloc_pte = alloc_pte_late;
pt_ops.get_pte_virt = get_pte_virt_late;
#ifndef __PAGETABLE_PMD_FOLDED
pt_ops.alloc_pmd = alloc_pmd_late;
pt_ops.get_pmd_virt = get_pmd_virt_late;
#endif
pt_ops_set_late();
}
#else
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
......@@ -791,12 +977,10 @@ static void __init reserve_crashkernel(void)
* since it doesn't make much sense and we have limited memory
* resources.
*/
#ifdef CONFIG_CRASH_DUMP
if (is_kdump_kernel()) {
pr_info("crashkernel: ignoring reservation request\n");
return;
}
#endif
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
&crash_size, &crash_base);
......
......@@ -11,45 +11,27 @@
#include <asm/fixmap.h>
#include <asm/pgalloc.h>
extern pgd_t early_pg_dir[PTRS_PER_PGD];
asmlinkage void __init kasan_early_init(void)
{
uintptr_t i;
pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START);
BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
for (i = 0; i < PTRS_PER_PTE; ++i)
set_pte(kasan_early_shadow_pte + i,
mk_pte(virt_to_page(kasan_early_shadow_page),
PAGE_KERNEL));
for (i = 0; i < PTRS_PER_PMD; ++i)
set_pmd(kasan_early_shadow_pmd + i,
pfn_pmd(PFN_DOWN
(__pa((uintptr_t) kasan_early_shadow_pte)),
__pgprot(_PAGE_TABLE)));
for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
i += PGDIR_SIZE, ++pgd)
set_pgd(pgd,
pfn_pgd(PFN_DOWN
(__pa(((uintptr_t) kasan_early_shadow_pmd))),
__pgprot(_PAGE_TABLE)));
/* init for swapper_pg_dir */
pgd = pgd_offset_k(KASAN_SHADOW_START);
for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
i += PGDIR_SIZE, ++pgd)
set_pgd(pgd,
pfn_pgd(PFN_DOWN
(__pa(((uintptr_t) kasan_early_shadow_pmd))),
__pgprot(_PAGE_TABLE)));
/*
* Kasan shadow region must lie at a fixed address across sv39, sv48 and sv57
* which is right before the kernel.
*
* For sv39, the region is aligned on PGDIR_SIZE so we only need to populate
* the page global directory with kasan_early_shadow_pmd.
*
* For sv48 and sv57, the region is not aligned on PGDIR_SIZE so the mapping
* must be divided as follows:
* - the first PGD entry, although incomplete, is populated with
* kasan_early_shadow_pud/p4d
* - the PGD entries in the middle are populated with kasan_early_shadow_pud/p4d
* - the last PGD entry is shared with the kernel mapping so populated at the
* lower levels pud/p4d
*
* In addition, when shallow populating a kasan region (for example vmalloc),
* this region may also not be aligned on PGDIR size, so we must go down to the
* pud level too.
*/
local_flush_tlb_all();
}
extern pgd_t early_pg_dir[PTRS_PER_PGD];
static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end)
{
......@@ -73,15 +55,19 @@ static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned
set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(base_pte)), PAGE_TABLE));
}
static void __init kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned long end)
static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned long end)
{
phys_addr_t phys_addr;
pmd_t *pmdp, *base_pmd;
unsigned long next;
base_pmd = (pmd_t *)pgd_page_vaddr(*pgd);
if (pud_none(*pud)) {
base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
} else {
base_pmd = (pmd_t *)pud_pgtable(*pud);
if (base_pmd == lm_alias(kasan_early_shadow_pmd))
base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
}
pmdp = base_pmd + pmd_index(vaddr);
......@@ -105,59 +91,207 @@ static void __init kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned
* it entirely, memblock could allocate a page at a physical address
* where KASAN is not populated yet and then we'd get a page fault.
*/
set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
set_pud(pud, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
}
static void __init kasan_populate_pgd(unsigned long vaddr, unsigned long end)
static void __init kasan_populate_pud(pgd_t *pgd,
unsigned long vaddr, unsigned long end,
bool early)
{
phys_addr_t phys_addr;
pud_t *pudp, *base_pud;
unsigned long next;
if (early) {
/*
* We can't use pgd_page_vaddr here as it would return a linear
* mapping address but it is not mapped yet, but when populating
* early_pg_dir, we need the physical address and when populating
* swapper_pg_dir, we need the kernel virtual address so use
* pt_ops facility.
*/
base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd)));
} else {
base_pud = (pud_t *)pgd_page_vaddr(*pgd);
if (base_pud == lm_alias(kasan_early_shadow_pud))
base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE);
}
pudp = base_pud + pud_index(vaddr);
do {
next = pud_addr_end(vaddr, end);
if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) {
if (early) {
phys_addr = __pa(((uintptr_t)kasan_early_shadow_pmd));
set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_TABLE));
continue;
} else {
phys_addr = memblock_phys_alloc(PUD_SIZE, PUD_SIZE);
if (phys_addr) {
set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_KERNEL));
continue;
}
}
}
kasan_populate_pmd(pudp, vaddr, next);
} while (pudp++, vaddr = next, vaddr != end);
/*
* Wait for the whole PGD to be populated before setting the PGD in
* the page table, otherwise, if we did set the PGD before populating
* it entirely, memblock could allocate a page at a physical address
* where KASAN is not populated yet and then we'd get a page fault.
*/
if (!early)
set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pud)), PAGE_TABLE));
}
#define kasan_early_shadow_pgd_next (pgtable_l4_enabled ? \
(uintptr_t)kasan_early_shadow_pud : \
(uintptr_t)kasan_early_shadow_pmd)
#define kasan_populate_pgd_next(pgdp, vaddr, next, early) \
(pgtable_l4_enabled ? \
kasan_populate_pud(pgdp, vaddr, next, early) : \
kasan_populate_pmd((pud_t *)pgdp, vaddr, next))
static void __init kasan_populate_pgd(pgd_t *pgdp,
unsigned long vaddr, unsigned long end,
bool early)
{
phys_addr_t phys_addr;
pgd_t *pgdp = pgd_offset_k(vaddr);
unsigned long next;
do {
next = pgd_addr_end(vaddr, end);
if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) {
if (early) {
phys_addr = __pa((uintptr_t)kasan_early_shadow_pgd_next);
set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_TABLE));
continue;
} else if (pgd_page_vaddr(*pgdp) ==
(unsigned long)lm_alias(kasan_early_shadow_pgd_next)) {
/*
* pgdp can't be none since kasan_early_init initialized all KASAN
* shadow region with kasan_early_shadow_pmd: if this is stillthe case,
* that means we can try to allocate a hugepage as a replacement.
* pgdp can't be none since kasan_early_init
* initialized all KASAN shadow region with
* kasan_early_shadow_pud: if this is still the
* case, that means we can try to allocate a
* hugepage as a replacement.
*/
if (pgd_page_vaddr(*pgdp) == (unsigned long)lm_alias(kasan_early_shadow_pmd) &&
IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) {
phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE);
if (phys_addr) {
set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL));
continue;
}
}
}
kasan_populate_pmd(pgdp, vaddr, next);
kasan_populate_pgd_next(pgdp, vaddr, next, early);
} while (pgdp++, vaddr = next, vaddr != end);
}
asmlinkage void __init kasan_early_init(void)
{
uintptr_t i;
BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
for (i = 0; i < PTRS_PER_PTE; ++i)
set_pte(kasan_early_shadow_pte + i,
mk_pte(virt_to_page(kasan_early_shadow_page),
PAGE_KERNEL));
for (i = 0; i < PTRS_PER_PMD; ++i)
set_pmd(kasan_early_shadow_pmd + i,
pfn_pmd(PFN_DOWN
(__pa((uintptr_t)kasan_early_shadow_pte)),
PAGE_TABLE));
if (pgtable_l4_enabled) {
for (i = 0; i < PTRS_PER_PUD; ++i)
set_pud(kasan_early_shadow_pud + i,
pfn_pud(PFN_DOWN
(__pa(((uintptr_t)kasan_early_shadow_pmd))),
PAGE_TABLE));
}
kasan_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START),
KASAN_SHADOW_START, KASAN_SHADOW_END, true);
local_flush_tlb_all();
}
void __init kasan_swapper_init(void)
{
kasan_populate_pgd(pgd_offset_k(KASAN_SHADOW_START),
KASAN_SHADOW_START, KASAN_SHADOW_END, true);
local_flush_tlb_all();
}
static void __init kasan_populate(void *start, void *end)
{
unsigned long vaddr = (unsigned long)start & PAGE_MASK;
unsigned long vend = PAGE_ALIGN((unsigned long)end);
kasan_populate_pgd(vaddr, vend);
kasan_populate_pgd(pgd_offset_k(vaddr), vaddr, vend, false);
local_flush_tlb_all();
memset(start, KASAN_SHADOW_INIT, end - start);
}
static void __init kasan_shallow_populate_pud(pgd_t *pgdp,
unsigned long vaddr, unsigned long end,
bool kasan_populate)
{
unsigned long next;
pud_t *pudp, *base_pud;
pmd_t *base_pmd;
bool is_kasan_pmd;
base_pud = (pud_t *)pgd_page_vaddr(*pgdp);
pudp = base_pud + pud_index(vaddr);
if (kasan_populate)
memcpy(base_pud, (void *)kasan_early_shadow_pgd_next,
sizeof(pud_t) * PTRS_PER_PUD);
do {
next = pud_addr_end(vaddr, end);
is_kasan_pmd = (pud_pgtable(*pudp) == lm_alias(kasan_early_shadow_pmd));
if (is_kasan_pmd) {
base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
}
} while (pudp++, vaddr = next, vaddr != end);
}
static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end)
{
unsigned long next;
void *p;
pgd_t *pgd_k = pgd_offset_k(vaddr);
bool is_kasan_pgd_next;
do {
next = pgd_addr_end(vaddr, end);
if (pgd_page_vaddr(*pgd_k) == (unsigned long)lm_alias(kasan_early_shadow_pmd)) {
is_kasan_pgd_next = (pgd_page_vaddr(*pgd_k) ==
(unsigned long)lm_alias(kasan_early_shadow_pgd_next));
if (is_kasan_pgd_next) {
p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE));
}
if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE)
continue;
kasan_shallow_populate_pud(pgd_k, vaddr, next, is_kasan_pgd_next);
} while (pgd_k++, vaddr = next, vaddr != end);
}
......
......@@ -32,7 +32,6 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
unsigned long size, unsigned long stride)
{
struct cpumask *cmask = mm_cpumask(mm);
struct cpumask hmask;
unsigned int cpuid;
bool broadcast;
......@@ -46,9 +45,7 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
unsigned long asid = atomic_long_read(&mm->context.id);
if (broadcast) {
riscv_cpuid_to_hartid_mask(cmask, &hmask);
sbi_remote_sfence_vma_asid(cpumask_bits(&hmask),
start, size, asid);
sbi_remote_sfence_vma_asid(cmask, start, size, asid);
} else if (size <= stride) {
local_flush_tlb_page_asid(start, asid);
} else {
......@@ -56,9 +53,7 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
}
} else {
if (broadcast) {
riscv_cpuid_to_hartid_mask(cmask, &hmask);
sbi_remote_sfence_vma(cpumask_bits(&hmask),
start, size);
sbi_remote_sfence_vma(cmask, start, size);
} else if (size <= stride) {
local_flush_tlb_page(start);
} else {
......
......@@ -497,7 +497,7 @@ static int add_exception_handler(const struct bpf_insn *insn,
offset = pc - (long)&ex->insn;
if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
return -ERANGE;
ex->insn = pc;
ex->insn = offset;
/*
* Since the extable follows the program, the fixup offset is always
......
......@@ -40,6 +40,8 @@
#ifdef CONFIG_ARM64
# define EFI_RT_VIRTUAL_LIMIT DEFAULT_MAP_WINDOW_64
#elif defined(CONFIG_RISCV)
# define EFI_RT_VIRTUAL_LIMIT TASK_SIZE_MIN
#else
# define EFI_RT_VIRTUAL_LIMIT TASK_SIZE
#endif
......
......@@ -5,7 +5,6 @@ config SOC_K210_SYSCTL
depends on RISCV && SOC_CANAAN && OF
default SOC_CANAAN
select PM
select SYSCON
select MFD_SYSCON
help
Canaan Kendryte K210 SoC system controller driver.
......@@ -147,6 +147,15 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
#if CONFIG_PGTABLE_LEVELS > 3
static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
gfp_t gfp = GFP_PGTABLE_USER;
if (mm == &init_mm)
gfp = GFP_PGTABLE_KERNEL;
return (pud_t *)get_zeroed_page(gfp);
}
#ifndef __HAVE_ARCH_PUD_ALLOC_ONE
/**
* pud_alloc_one - allocate a page for PUD-level page table
......@@ -159,20 +168,23 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
*/
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
gfp_t gfp = GFP_PGTABLE_USER;
if (mm == &init_mm)
gfp = GFP_PGTABLE_KERNEL;
return (pud_t *)get_zeroed_page(gfp);
return __pud_alloc_one(mm, addr);
}
#endif
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
static inline void __pud_free(struct mm_struct *mm, pud_t *pud)
{
BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
free_page((unsigned long)pud);
}
#ifndef __HAVE_ARCH_PUD_FREE
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
{
__pud_free(mm, pud);
}
#endif
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
#ifndef __HAVE_ARCH_PGD_FREE
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment