Commit 2dc10ad8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 updates from Catalin Marinas:

 - "genirq: Introduce generic irq migration for cpu hotunplugged" patch
   merged from tip/irq/for-arm to allow the arm64-specific part to be
   upstreamed via the arm64 tree

 - CPU feature detection reworked to cope with heterogeneous systems
   where CPUs may not have exactly the same features.  The features
   reported by the kernel via internal data structures or ELF_HWCAP are
   delayed until all the CPUs are up (and before user space starts)

 - Support for 16KB pages, with the additional bonus of a 36-bit VA
   space, though the latter only depending on EXPERT

 - Implement native {relaxed, acquire, release} atomics for arm64

 - New ASID allocation algorithm which avoids IPI on roll-over, together
   with TLB invalidation optimisations (using local vs global where
   feasible)

 - KASan support for arm64

 - EFI_STUB clean-up and isolation for the kernel proper (required by
   KASan)

 - copy_{to,from,in}_user optimisations (sharing the memcpy template)

 - perf: moving arm64 to the arm32/64 shared PMU framework

 - L1_CACHE_BYTES increased to 128 to accommodate Cavium hardware

 - Support for the contiguous PTE hint on kernel mapping (16 consecutive
   entries may be able to use a single TLB entry)

 - Generic CONFIG_HZ now used on arm64

 - defconfig updates

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (91 commits)
  arm64/efi: fix libstub build under CONFIG_MODVERSIONS
  ARM64: Enable multi-core scheduler support by default
  arm64/efi: move arm64 specific stub C code to libstub
  arm64: page-align sections for DEBUG_RODATA
  arm64: Fix build with CONFIG_ZONE_DMA=n
  arm64: Fix compat register mappings
  arm64: Increase the max granular size
  arm64: remove bogus TASK_SIZE_64 check
  arm64: make Timer Interrupt Frequency selectable
  arm64/mm: use PAGE_ALIGNED instead of IS_ALIGNED
  arm64: cachetype: fix definitions of ICACHEF_* flags
  arm64: cpufeature: declare enable_cpu_capabilities as static
  genirq: Make the cpuhotplug migration code less noisy
  arm64: Constify hwcap name string arrays
  arm64/kvm: Make use of the system wide safe values
  arm64/debug: Make use of the system wide safe value
  arm64: Move FP/ASIMD hwcap handling to common code
  arm64/HWCAP: Use system wide safe values
  arm64/capabilities: Make use of system wide safe value
  arm64: Delay cpu feature capability checks
  ...
parents e627078a f8f8bdc4
......@@ -58,5 +58,3 @@ linux,uefi-mmap-desc-size | 32-bit | Size in bytes of each entry in the UEFI
--------------------------------------------------------------------------------
linux,uefi-mmap-desc-ver | 32-bit | Version of the mmap descriptor format.
--------------------------------------------------------------------------------
linux,uefi-stub-kern-ver | string | Copy of linux_banner from build.
--------------------------------------------------------------------------------
......@@ -104,7 +104,12 @@ Header notes:
- The flags field (introduced in v3.17) is a little-endian 64-bit field
composed as follows:
Bit 0: Kernel endianness. 1 if BE, 0 if LE.
Bits 1-63: Reserved.
Bit 1-2: Kernel Page size.
0 - Unspecified.
1 - 4K
2 - 16K
3 - 64K
Bits 3-63: Reserved.
- When image_size is zero, a bootloader should attempt to keep as much
memory as possible free for use by the kernel immediately after the
......
......@@ -8,6 +8,8 @@ Required properties:
- compatible : should be one of
"arm,armv8-pmuv3"
"arm.cortex-a57-pmu"
"arm.cortex-a53-pmu"
"arm,cortex-a17-pmu"
"arm,cortex-a15-pmu"
"arm,cortex-a12-pmu"
......
......@@ -9,7 +9,7 @@
| alpha: | TODO |
| arc: | TODO |
| arm: | TODO |
| arm64: | TODO |
| arm64: | ok |
| avr32: | TODO |
| blackfin: | TODO |
| c6x: | TODO |
......
......@@ -823,12 +823,13 @@ F: arch/arm/include/asm/floppy.h
ARM PMU PROFILING AND DEBUGGING
M: Will Deacon <will.deacon@arm.com>
R: Mark Rutland <mark.rutland@arm.com>
S: Maintained
F: arch/arm/kernel/perf_*
F: arch/arm*/kernel/perf_*
F: arch/arm/oprofile/common.c
F: arch/arm/kernel/hw_breakpoint.c
F: arch/arm/include/asm/hw_breakpoint.h
F: arch/arm/include/asm/perf_event.h
F: arch/arm*/kernel/hw_breakpoint.c
F: arch/arm*/include/asm/hw_breakpoint.h
F: arch/arm*/include/asm/perf_event.h
F: drivers/perf/arm_pmu.c
F: include/linux/perf/arm_pmu.h
......
......@@ -48,6 +48,7 @@ config ARM64
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_BITREVERSE
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP
select HAVE_ARCH_KGDB
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
......@@ -169,10 +170,12 @@ config FIX_EARLYCON_MEM
config PGTABLE_LEVELS
int
default 2 if ARM64_16K_PAGES && ARM64_VA_BITS_36
default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42
default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48
default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39
default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48
default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47
default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48
source "init/Kconfig"
......@@ -389,25 +392,37 @@ config ARM64_4K_PAGES
help
This feature enables 4KB pages support.
config ARM64_16K_PAGES
bool "16KB"
help
The system will use 16KB pages support. AArch32 emulation
requires applications compiled with 16K (or a multiple of 16K)
aligned segments.
config ARM64_64K_PAGES
bool "64KB"
help
This feature enables 64KB pages support (4KB by default)
allowing only two levels of page tables and faster TLB
look-up. AArch32 emulation is not available when this feature
is enabled.
look-up. AArch32 emulation requires applications compiled
with 64K aligned segments.
endchoice
choice
prompt "Virtual address space size"
default ARM64_VA_BITS_39 if ARM64_4K_PAGES
default ARM64_VA_BITS_47 if ARM64_16K_PAGES
default ARM64_VA_BITS_42 if ARM64_64K_PAGES
help
Allows choosing one of multiple possible virtual address
space sizes. The level of translation table is determined by
a combination of page size and virtual address space size.
config ARM64_VA_BITS_36
bool "36-bit" if EXPERT
depends on ARM64_16K_PAGES
config ARM64_VA_BITS_39
bool "39-bit"
depends on ARM64_4K_PAGES
......@@ -416,6 +431,10 @@ config ARM64_VA_BITS_42
bool "42-bit"
depends on ARM64_64K_PAGES
config ARM64_VA_BITS_47
bool "47-bit"
depends on ARM64_16K_PAGES
config ARM64_VA_BITS_48
bool "48-bit"
......@@ -423,8 +442,10 @@ endchoice
config ARM64_VA_BITS
int
default 36 if ARM64_VA_BITS_36
default 39 if ARM64_VA_BITS_39
default 42 if ARM64_VA_BITS_42
default 47 if ARM64_VA_BITS_47
default 48 if ARM64_VA_BITS_48
config CPU_BIG_ENDIAN
......@@ -454,15 +475,13 @@ config NR_CPUS
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
select GENERIC_IRQ_MIGRATION
help
Say Y here to experiment with turning CPUs off and on. CPUs
can be controlled through /sys/devices/system/cpu.
source kernel/Kconfig.preempt
config HZ
int
default 100
source kernel/Kconfig.hz
config ARCH_HAS_HOLES_MEMORYMODEL
def_bool y if SPARSEMEM
......@@ -481,12 +500,8 @@ config HAVE_ARCH_PFN_VALID
def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
config HW_PERF_EVENTS
bool "Enable hardware performance counter support for perf events"
depends on PERF_EVENTS
default y
help
Enable hardware performance counter support for perf events. If
disabled, perf events will use software events only.
def_bool y
depends on ARM_PMU
config SYS_SUPPORTS_HUGETLBFS
def_bool y
......@@ -495,7 +510,7 @@ config ARCH_WANT_GENERAL_HUGETLB
def_bool y
config ARCH_WANT_HUGE_PMD_SHARE
def_bool y if !ARM64_64K_PAGES
def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
config HAVE_ARCH_TRANSPARENT_HUGEPAGE
def_bool y
......@@ -532,7 +547,25 @@ config XEN
config FORCE_MAX_ZONEORDER
int
default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE)
default "12" if (ARM64_16K_PAGES && TRANSPARENT_HUGEPAGE)
default "11"
help
The kernel memory allocator divides physically contiguous memory
blocks into "zones", where each zone is a power of two number of
pages. This option selects the largest power of two that the kernel
keeps in the memory allocator. If you need to allocate very large
blocks of physically contiguous memory, then you may need to
increase this value.
This config option is actually maximum order plus one. For example,
a value of 11 means that the largest free memory block is 2^10 pages.
We make sure that we can allocate upto a HugePage size for each configuration.
Hence we have :
MAX_ORDER = (PMD_SHIFT - PAGE_SHIFT) + 1 => PAGE_SHIFT - 2
However for 4K, we choose a higher default value, 11 as opposed to 10, giving us
4M allocations matching the default size used by generic code.
menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions"
......@@ -707,7 +740,7 @@ source "fs/Kconfig.binfmt"
config COMPAT
bool "Kernel support for 32-bit EL0"
depends on !ARM64_64K_PAGES || EXPERT
depends on ARM64_4K_PAGES || EXPERT
select COMPAT_BINFMT_ELF
select HAVE_UID16
select OLD_SIGSUSPEND3
......@@ -718,9 +751,9 @@ config COMPAT
the user helper functions, VFP support and the ptrace interface are
handled appropriately by the kernel.
If you also enabled CONFIG_ARM64_64K_PAGES, please be aware that you
will only be able to execute AArch32 binaries that were compiled with
64k aligned segments.
If you use a page size other than 4KB (i.e, 16KB or 64KB), please be aware
that you will only be able to execute AArch32 binaries that were compiled
with page size aligned segments.
If you want to execute 32-bit userspace applications, say Y.
......
......@@ -77,7 +77,7 @@ config DEBUG_RODATA
If in doubt, say Y
config DEBUG_ALIGN_RODATA
depends on DEBUG_RODATA && !ARM64_64K_PAGES
depends on DEBUG_RODATA && ARM64_4K_PAGES
bool "Align linker sections up to SECTION_SIZE"
help
If this option is enabled, sections that may potentially be marked as
......
......@@ -55,6 +55,13 @@ else
TEXT_OFFSET := 0x00080000
endif
# KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - 3)) - (1 << 61)
# in 32-bit arithmetic
KASAN_SHADOW_OFFSET := $(shell printf "0x%08x00000000\n" $$(( \
(0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 32))) \
+ (1 << ($(CONFIG_ARM64_VA_BITS) - 32 - 3)) \
- (1 << (64 - 32 - 3)) )) )
export TEXT_OFFSET GZFLAGS
core-y += arch/arm64/kernel/ arch/arm64/mm/
......
......@@ -91,17 +91,21 @@ A53_L2: l2-cache1 {
};
};
pmu {
compatible = "arm,armv8-pmuv3";
pmu_a57 {
compatible = "arm,cortex-a57-pmu";
interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>;
interrupt-affinity = <&A57_0>,
<&A57_1>;
};
pmu_a53 {
compatible = "arm,cortex-a53-pmu";
interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
interrupt-affinity = <&A57_0>,
<&A57_1>,
<&A53_0>,
interrupt-affinity = <&A53_0>,
<&A53_1>,
<&A53_2>,
<&A53_3>;
......
......@@ -91,17 +91,21 @@ A53_L2: l2-cache1 {
};
};
pmu {
compatible = "arm,armv8-pmuv3";
pmu_a57 {
compatible = "arm,cortex-a57-pmu";
interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>;
interrupt-affinity = <&A57_0>,
<&A57_1>;
};
pmu_a53 {
compatible = "arm,cortex-a53-pmu";
interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
interrupt-affinity = <&A57_0>,
<&A57_1>,
<&A53_0>,
interrupt-affinity = <&A53_0>,
<&A53_1>,
<&A53_2>,
<&A53_3>;
......
......@@ -51,6 +51,7 @@ CONFIG_PCI=y
CONFIG_PCI_MSI=y
CONFIG_PCI_XGENE=y
CONFIG_SMP=y
CONFIG_SCHED_MC=y
CONFIG_PREEMPT=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
......@@ -109,6 +110,10 @@ CONFIG_SERIAL_8250_DW=y
CONFIG_SERIAL_8250_MT6577=y
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
CONFIG_SERIAL_SAMSUNG=y
CONFIG_SERIAL_SAMSUNG_UARTS_4=y
CONFIG_SERIAL_SAMSUNG_UARTS=4
CONFIG_SERIAL_SAMSUNG_CONSOLE=y
CONFIG_SERIAL_MSM=y
CONFIG_SERIAL_MSM_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
......@@ -145,6 +150,10 @@ CONFIG_MMC_ARMMMCI=y
CONFIG_MMC_SDHCI=y
CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_SPI=y
CONFIG_MMC_DW=y
CONFIG_MMC_DW_IDMAC=y
CONFIG_MMC_DW_PLTFM=y
CONFIG_MMC_DW_EXYNOS=y
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y
CONFIG_LEDS_SYSCON=y
......
......@@ -193,4 +193,15 @@ lr .req x30 // link register
str \src, [\tmp, :lo12:\sym]
.endm
/*
* Annotate a function as position independent, i.e., safe to be called before
* the kernel virtual mapping is activated.
*/
#define ENDPIPROC(x) \
.globl __pi_##x; \
.type __pi_##x, %function; \
.set __pi_##x, x; \
.size __pi_##x, . - x; \
ENDPROC(x)
#endif /* __ASM_ASSEMBLER_H */
......@@ -55,13 +55,42 @@
#define atomic_read(v) READ_ONCE((v)->counter)
#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
#define atomic_add_return_relaxed atomic_add_return_relaxed
#define atomic_add_return_acquire atomic_add_return_acquire
#define atomic_add_return_release atomic_add_return_release
#define atomic_add_return atomic_add_return
#define atomic_inc_return_relaxed(v) atomic_add_return_relaxed(1, (v))
#define atomic_inc_return_acquire(v) atomic_add_return_acquire(1, (v))
#define atomic_inc_return_release(v) atomic_add_return_release(1, (v))
#define atomic_inc_return(v) atomic_add_return(1, (v))
#define atomic_sub_return_relaxed atomic_sub_return_relaxed
#define atomic_sub_return_acquire atomic_sub_return_acquire
#define atomic_sub_return_release atomic_sub_return_release
#define atomic_sub_return atomic_sub_return
#define atomic_dec_return_relaxed(v) atomic_sub_return_relaxed(1, (v))
#define atomic_dec_return_acquire(v) atomic_sub_return_acquire(1, (v))
#define atomic_dec_return_release(v) atomic_sub_return_release(1, (v))
#define atomic_dec_return(v) atomic_sub_return(1, (v))
#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
#define atomic_xchg_acquire(v, new) xchg_acquire(&((v)->counter), (new))
#define atomic_xchg_release(v, new) xchg_release(&((v)->counter), (new))
#define atomic_xchg(v, new) xchg(&((v)->counter), (new))
#define atomic_cmpxchg_relaxed(v, old, new) \
cmpxchg_relaxed(&((v)->counter), (old), (new))
#define atomic_cmpxchg_acquire(v, old, new) \
cmpxchg_acquire(&((v)->counter), (old), (new))
#define atomic_cmpxchg_release(v, old, new) \
cmpxchg_release(&((v)->counter), (old), (new))
#define atomic_cmpxchg(v, old, new) cmpxchg(&((v)->counter), (old), (new))
#define atomic_inc(v) atomic_add(1, (v))
#define atomic_dec(v) atomic_sub(1, (v))
#define atomic_inc_return(v) atomic_add_return(1, (v))
#define atomic_dec_return(v) atomic_sub_return(1, (v))
#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
#define atomic_dec_and_test(v) (atomic_dec_return(v) == 0)
#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0)
......@@ -75,13 +104,39 @@
#define ATOMIC64_INIT ATOMIC_INIT
#define atomic64_read atomic_read
#define atomic64_set atomic_set
#define atomic64_add_return_relaxed atomic64_add_return_relaxed
#define atomic64_add_return_acquire atomic64_add_return_acquire
#define atomic64_add_return_release atomic64_add_return_release
#define atomic64_add_return atomic64_add_return
#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1, (v))
#define atomic64_inc_return_acquire(v) atomic64_add_return_acquire(1, (v))
#define atomic64_inc_return_release(v) atomic64_add_return_release(1, (v))
#define atomic64_inc_return(v) atomic64_add_return(1, (v))
#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
#define atomic64_sub_return_acquire atomic64_sub_return_acquire
#define atomic64_sub_return_release atomic64_sub_return_release
#define atomic64_sub_return atomic64_sub_return
#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1, (v))
#define atomic64_dec_return_acquire(v) atomic64_sub_return_acquire(1, (v))
#define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v))
#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
#define atomic64_xchg_relaxed atomic_xchg_relaxed
#define atomic64_xchg_acquire atomic_xchg_acquire
#define atomic64_xchg_release atomic_xchg_release
#define atomic64_xchg atomic_xchg
#define atomic64_cmpxchg_relaxed atomic_cmpxchg_relaxed
#define atomic64_cmpxchg_acquire atomic_cmpxchg_acquire
#define atomic64_cmpxchg_release atomic_cmpxchg_release
#define atomic64_cmpxchg atomic_cmpxchg
#define atomic64_inc(v) atomic64_add(1, (v))
#define atomic64_dec(v) atomic64_sub(1, (v))
#define atomic64_inc_return(v) atomic64_add_return(1, (v))
#define atomic64_dec_return(v) atomic64_sub_return(1, (v))
#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
#define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0)
#define atomic64_sub_and_test(i, v) (atomic64_sub_return((i), (v)) == 0)
......
......@@ -55,40 +55,47 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \
} \
__LL_SC_EXPORT(atomic_##op);
#define ATOMIC_OP_RETURN(op, asm_op) \
#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \
__LL_SC_INLINE int \
__LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v)) \
__LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v)) \
{ \
unsigned long tmp; \
int result; \
\
asm volatile("// atomic_" #op "_return\n" \
asm volatile("// atomic_" #op "_return" #name "\n" \
" prfm pstl1strm, %2\n" \
"1: ldxr %w0, %2\n" \
"1: ld" #acq "xr %w0, %2\n" \
" " #asm_op " %w0, %w0, %w3\n" \
" stlxr %w1, %w0, %2\n" \
" cbnz %w1, 1b" \
" st" #rel "xr %w1, %w0, %2\n" \
" cbnz %w1, 1b\n" \
" " #mb \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: "Ir" (i) \
: "memory"); \
: cl); \
\
smp_mb(); \
return result; \
} \
__LL_SC_EXPORT(atomic_##op##_return);
__LL_SC_EXPORT(atomic_##op##_return##name);
#define ATOMIC_OPS(...) \
ATOMIC_OP(__VA_ARGS__) \
ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__)
#define ATOMIC_OPS(op, asm_op) \
ATOMIC_OP(op, asm_op) \
ATOMIC_OP_RETURN(op, asm_op)
#define ATOMIC_OPS_RLX(...) \
ATOMIC_OPS(__VA_ARGS__) \
ATOMIC_OP_RETURN(_relaxed, , , , , __VA_ARGS__)\
ATOMIC_OP_RETURN(_acquire, , a, , "memory", __VA_ARGS__)\
ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__)
ATOMIC_OPS(add, add)
ATOMIC_OPS(sub, sub)
ATOMIC_OPS_RLX(add, add)
ATOMIC_OPS_RLX(sub, sub)
ATOMIC_OP(and, and)
ATOMIC_OP(andnot, bic)
ATOMIC_OP(or, orr)
ATOMIC_OP(xor, eor)
#undef ATOMIC_OPS_RLX
#undef ATOMIC_OPS
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
......@@ -111,40 +118,47 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \
} \
__LL_SC_EXPORT(atomic64_##op);
#define ATOMIC64_OP_RETURN(op, asm_op) \
#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \
__LL_SC_INLINE long \
__LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v)) \
__LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v)) \
{ \
long result; \
unsigned long tmp; \
\
asm volatile("// atomic64_" #op "_return\n" \
asm volatile("// atomic64_" #op "_return" #name "\n" \
" prfm pstl1strm, %2\n" \
"1: ldxr %0, %2\n" \
"1: ld" #acq "xr %0, %2\n" \
" " #asm_op " %0, %0, %3\n" \
" stlxr %w1, %0, %2\n" \
" cbnz %w1, 1b" \
" st" #rel "xr %w1, %0, %2\n" \
" cbnz %w1, 1b\n" \
" " #mb \
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
: "Ir" (i) \
: "memory"); \
: cl); \
\
smp_mb(); \
return result; \
} \
__LL_SC_EXPORT(atomic64_##op##_return);
__LL_SC_EXPORT(atomic64_##op##_return##name);
#define ATOMIC64_OPS(...) \
ATOMIC64_OP(__VA_ARGS__) \
ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__)
#define ATOMIC64_OPS(op, asm_op) \
ATOMIC64_OP(op, asm_op) \
ATOMIC64_OP_RETURN(op, asm_op)
#define ATOMIC64_OPS_RLX(...) \
ATOMIC64_OPS(__VA_ARGS__) \
ATOMIC64_OP_RETURN(_relaxed,, , , , __VA_ARGS__) \
ATOMIC64_OP_RETURN(_acquire,, a, , "memory", __VA_ARGS__) \
ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__)
ATOMIC64_OPS(add, add)
ATOMIC64_OPS(sub, sub)
ATOMIC64_OPS_RLX(add, add)
ATOMIC64_OPS_RLX(sub, sub)
ATOMIC64_OP(and, and)
ATOMIC64_OP(andnot, bic)
ATOMIC64_OP(or, orr)
ATOMIC64_OP(xor, eor)
#undef ATOMIC64_OPS_RLX
#undef ATOMIC64_OPS
#undef ATOMIC64_OP_RETURN
#undef ATOMIC64_OP
......@@ -172,7 +186,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
}
__LL_SC_EXPORT(atomic64_dec_if_positive);
#define __CMPXCHG_CASE(w, sz, name, mb, rel, cl) \
#define __CMPXCHG_CASE(w, sz, name, mb, acq, rel, cl) \
__LL_SC_INLINE unsigned long \
__LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \
unsigned long old, \
......@@ -182,7 +196,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \
\
asm volatile( \
" prfm pstl1strm, %[v]\n" \
"1: ldxr" #sz "\t%" #w "[oldval], %[v]\n" \
"1: ld" #acq "xr" #sz "\t%" #w "[oldval], %[v]\n" \
" eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \
" cbnz %" #w "[tmp], 2f\n" \
" st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n" \
......@@ -199,14 +213,22 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \
} \
__LL_SC_EXPORT(__cmpxchg_case_##name);
__CMPXCHG_CASE(w, b, 1, , , )
__CMPXCHG_CASE(w, h, 2, , , )
__CMPXCHG_CASE(w, , 4, , , )
__CMPXCHG_CASE( , , 8, , , )
__CMPXCHG_CASE(w, b, mb_1, dmb ish, l, "memory")
__CMPXCHG_CASE(w, h, mb_2, dmb ish, l, "memory")
__CMPXCHG_CASE(w, , mb_4, dmb ish, l, "memory")
__CMPXCHG_CASE( , , mb_8, dmb ish, l, "memory")
__CMPXCHG_CASE(w, b, 1, , , , )
__CMPXCHG_CASE(w, h, 2, , , , )
__CMPXCHG_CASE(w, , 4, , , , )
__CMPXCHG_CASE( , , 8, , , , )
__CMPXCHG_CASE(w, b, acq_1, , a, , "memory")
__CMPXCHG_CASE(w, h, acq_2, , a, , "memory")
__CMPXCHG_CASE(w, , acq_4, , a, , "memory")
__CMPXCHG_CASE( , , acq_8, , a, , "memory")
__CMPXCHG_CASE(w, b, rel_1, , , l, "memory")
__CMPXCHG_CASE(w, h, rel_2, , , l, "memory")
__CMPXCHG_CASE(w, , rel_4, , , l, "memory")
__CMPXCHG_CASE( , , rel_8, , , l, "memory")
__CMPXCHG_CASE(w, b, mb_1, dmb ish, , l, "memory")
__CMPXCHG_CASE(w, h, mb_2, dmb ish, , l, "memory")
__CMPXCHG_CASE(w, , mb_4, dmb ish, , l, "memory")
__CMPXCHG_CASE( , , mb_8, dmb ish, , l, "memory")
#undef __CMPXCHG_CASE
......
......@@ -75,24 +75,32 @@ static inline void atomic_add(int i, atomic_t *v)
: "x30");
}
static inline int atomic_add_return(int i, atomic_t *v)
{
register int w0 asm ("w0") = i;
register atomic_t *x1 asm ("x1") = v;
#define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \
static inline int atomic_add_return##name(int i, atomic_t *v) \
{ \
register int w0 asm ("w0") = i; \
register atomic_t *x1 asm ("x1") = v; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
" nop\n" \
__LL_SC_ATOMIC(add_return##name), \
/* LSE atomics */ \
" ldadd" #mb " %w[i], w30, %[v]\n" \
" add %w[i], %w[i], w30") \
: [i] "+r" (w0), [v] "+Q" (v->counter) \
: "r" (x1) \
: "x30" , ##cl); \
\
return w0; \
}
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" nop\n"
__LL_SC_ATOMIC(add_return),
/* LSE atomics */
" ldaddal %w[i], w30, %[v]\n"
" add %w[i], %w[i], w30")
: [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
: "x30", "memory");
ATOMIC_OP_ADD_RETURN(_relaxed, )
ATOMIC_OP_ADD_RETURN(_acquire, a, "memory")
ATOMIC_OP_ADD_RETURN(_release, l, "memory")
ATOMIC_OP_ADD_RETURN( , al, "memory")
return w0;
}
#undef ATOMIC_OP_ADD_RETURN
static inline void atomic_and(int i, atomic_t *v)
{
......@@ -128,27 +136,34 @@ static inline void atomic_sub(int i, atomic_t *v)
: "x30");
}
static inline int atomic_sub_return(int i, atomic_t *v)
{
register int w0 asm ("w0") = i;
register atomic_t *x1 asm ("x1") = v;
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" nop\n"
__LL_SC_ATOMIC(sub_return)
" nop",
/* LSE atomics */
" neg %w[i], %w[i]\n"
" ldaddal %w[i], w30, %[v]\n"
" add %w[i], %w[i], w30")
: [i] "+r" (w0), [v] "+Q" (v->counter)
: "r" (x1)
: "x30", "memory");
return w0;
#define ATOMIC_OP_SUB_RETURN(name, mb, cl...) \
static inline int atomic_sub_return##name(int i, atomic_t *v) \
{ \
register int w0 asm ("w0") = i; \
register atomic_t *x1 asm ("x1") = v; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
" nop\n" \
__LL_SC_ATOMIC(sub_return##name) \
" nop", \
/* LSE atomics */ \
" neg %w[i], %w[i]\n" \
" ldadd" #mb " %w[i], w30, %[v]\n" \
" add %w[i], %w[i], w30") \
: [i] "+r" (w0), [v] "+Q" (v->counter) \
: "r" (x1) \
: "x30" , ##cl); \
\
return w0; \
}
ATOMIC_OP_SUB_RETURN(_relaxed, )
ATOMIC_OP_SUB_RETURN(_acquire, a, "memory")
ATOMIC_OP_SUB_RETURN(_release, l, "memory")
ATOMIC_OP_SUB_RETURN( , al, "memory")
#undef ATOMIC_OP_SUB_RETURN
#undef __LL_SC_ATOMIC
#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op)
......@@ -201,24 +216,32 @@ static inline void atomic64_add(long i, atomic64_t *v)
: "x30");
}
static inline long atomic64_add_return(long i, atomic64_t *v)
{
register long x0 asm ("x0") = i;
register atomic64_t *x1 asm ("x1") = v;
#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \
static inline long atomic64_add_return##name(long i, atomic64_t *v) \
{ \
register long x0 asm ("x0") = i; \
register atomic64_t *x1 asm ("x1") = v; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
" nop\n" \
__LL_SC_ATOMIC64(add_return##name), \
/* LSE atomics */ \
" ldadd" #mb " %[i], x30, %[v]\n" \
" add %[i], %[i], x30") \
: [i] "+r" (x0), [v] "+Q" (v->counter) \
: "r" (x1) \
: "x30" , ##cl); \
\
return x0; \
}
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" nop\n"
__LL_SC_ATOMIC64(add_return),
/* LSE atomics */
" ldaddal %[i], x30, %[v]\n"
" add %[i], %[i], x30")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
: "x30", "memory");
ATOMIC64_OP_ADD_RETURN(_relaxed, )
ATOMIC64_OP_ADD_RETURN(_acquire, a, "memory")
ATOMIC64_OP_ADD_RETURN(_release, l, "memory")
ATOMIC64_OP_ADD_RETURN( , al, "memory")
return x0;
}
#undef ATOMIC64_OP_ADD_RETURN
static inline void atomic64_and(long i, atomic64_t *v)
{
......@@ -254,26 +277,34 @@ static inline void atomic64_sub(long i, atomic64_t *v)
: "x30");
}
static inline long atomic64_sub_return(long i, atomic64_t *v)
{
register long x0 asm ("x0") = i;
register atomic64_t *x1 asm ("x1") = v;
#define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \
static inline long atomic64_sub_return##name(long i, atomic64_t *v) \
{ \
register long x0 asm ("x0") = i; \
register atomic64_t *x1 asm ("x1") = v; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
" nop\n" \
__LL_SC_ATOMIC64(sub_return##name) \
" nop", \
/* LSE atomics */ \
" neg %[i], %[i]\n" \
" ldadd" #mb " %[i], x30, %[v]\n" \
" add %[i], %[i], x30") \
: [i] "+r" (x0), [v] "+Q" (v->counter) \
: "r" (x1) \
: "x30" , ##cl); \
\
return x0; \
}
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" nop\n"
__LL_SC_ATOMIC64(sub_return)
" nop",
/* LSE atomics */
" neg %[i], %[i]\n"
" ldaddal %[i], x30, %[v]\n"
" add %[i], %[i], x30")
: [i] "+r" (x0), [v] "+Q" (v->counter)
: "r" (x1)
: "x30", "memory");
ATOMIC64_OP_SUB_RETURN(_relaxed, )
ATOMIC64_OP_SUB_RETURN(_acquire, a, "memory")
ATOMIC64_OP_SUB_RETURN(_release, l, "memory")
ATOMIC64_OP_SUB_RETURN( , al, "memory")
return x0;
}
#undef ATOMIC64_OP_SUB_RETURN
static inline long atomic64_dec_if_positive(atomic64_t *v)
{
......@@ -333,14 +364,22 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \
return x0; \
}
__CMPXCHG_CASE(w, b, 1, )
__CMPXCHG_CASE(w, h, 2, )
__CMPXCHG_CASE(w, , 4, )
__CMPXCHG_CASE(x, , 8, )
__CMPXCHG_CASE(w, b, mb_1, al, "memory")
__CMPXCHG_CASE(w, h, mb_2, al, "memory")
__CMPXCHG_CASE(w, , mb_4, al, "memory")
__CMPXCHG_CASE(x, , mb_8, al, "memory")
__CMPXCHG_CASE(w, b, 1, )
__CMPXCHG_CASE(w, h, 2, )
__CMPXCHG_CASE(w, , 4, )
__CMPXCHG_CASE(x, , 8, )
__CMPXCHG_CASE(w, b, acq_1, a, "memory")
__CMPXCHG_CASE(w, h, acq_2, a, "memory")
__CMPXCHG_CASE(w, , acq_4, a, "memory")
__CMPXCHG_CASE(x, , acq_8, a, "memory")
__CMPXCHG_CASE(w, b, rel_1, l, "memory")
__CMPXCHG_CASE(w, h, rel_2, l, "memory")
__CMPXCHG_CASE(w, , rel_4, l, "memory")
__CMPXCHG_CASE(x, , rel_8, l, "memory")
__CMPXCHG_CASE(w, b, mb_1, al, "memory")
__CMPXCHG_CASE(w, h, mb_2, al, "memory")
__CMPXCHG_CASE(w, , mb_4, al, "memory")
__CMPXCHG_CASE(x, , mb_8, al, "memory")
#undef __LL_SC_CMPXCHG
#undef __CMPXCHG_CASE
......
......@@ -18,7 +18,7 @@
#include <asm/cachetype.h>
#define L1_CACHE_SHIFT 6
#define L1_CACHE_SHIFT 7
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
/*
......
......@@ -115,6 +115,13 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
extern void flush_dcache_page(struct page *);
static inline void __local_flush_icache_all(void)
{
asm("ic iallu");
dsb(nsh);
isb();
}
static inline void __flush_icache_all(void)
{
asm("ic ialluis");
......
......@@ -34,8 +34,8 @@
#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
#define ICACHEF_ALIASING BIT(0)
#define ICACHEF_AIVIVT BIT(1)
#define ICACHEF_ALIASING 0
#define ICACHEF_AIVIVT 1
extern unsigned long __icache_flags;
......
......@@ -25,154 +25,151 @@
#include <asm/barrier.h>
#include <asm/lse.h>
static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
{
unsigned long ret, tmp;
switch (size) {
case 1:
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" prfm pstl1strm, %2\n"
"1: ldxrb %w0, %2\n"
" stlxrb %w1, %w3, %2\n"
" cbnz %w1, 1b\n"
" dmb ish",
/* LSE atomics */
" nop\n"
" nop\n"
" swpalb %w3, %w0, %2\n"
" nop\n"
" nop")
: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)
: "r" (x)
: "memory");
break;
case 2:
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" prfm pstl1strm, %2\n"
"1: ldxrh %w0, %2\n"
" stlxrh %w1, %w3, %2\n"
" cbnz %w1, 1b\n"
" dmb ish",
/* LSE atomics */
" nop\n"
" nop\n"
" swpalh %w3, %w0, %2\n"
" nop\n"
" nop")
: "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr)
: "r" (x)
: "memory");
break;
case 4:
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" prfm pstl1strm, %2\n"
"1: ldxr %w0, %2\n"
" stlxr %w1, %w3, %2\n"
" cbnz %w1, 1b\n"
" dmb ish",
/* LSE atomics */
" nop\n"
" nop\n"
" swpal %w3, %w0, %2\n"
" nop\n"
" nop")
: "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr)
: "r" (x)
: "memory");
break;
case 8:
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
" stlxr %w1, %3, %2\n"
" cbnz %w1, 1b\n"
" dmb ish",
/* LSE atomics */
" nop\n"
" nop\n"
" swpal %3, %0, %2\n"
" nop\n"
" nop")
: "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr)
: "r" (x)
: "memory");
break;
default:
BUILD_BUG();
}
return ret;
/*
* We need separate acquire parameters for ll/sc and lse, since the full
* barrier case is generated as release+dmb for the former and
* acquire+release for the latter.
*/
#define __XCHG_CASE(w, sz, name, mb, nop_lse, acq, acq_lse, rel, cl) \
static inline unsigned long __xchg_case_##name(unsigned long x, \
volatile void *ptr) \
{ \
unsigned long ret, tmp; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
" prfm pstl1strm, %2\n" \
"1: ld" #acq "xr" #sz "\t%" #w "0, %2\n" \
" st" #rel "xr" #sz "\t%w1, %" #w "3, %2\n" \
" cbnz %w1, 1b\n" \
" " #mb, \
/* LSE atomics */ \
" nop\n" \
" nop\n" \
" swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n" \
" nop\n" \
" " #nop_lse) \
: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) \
: "r" (x) \
: cl); \
\
return ret; \
}
#define xchg(ptr,x) \
({ \
__typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \
__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \
__ret; \
__XCHG_CASE(w, b, 1, , , , , , )
__XCHG_CASE(w, h, 2, , , , , , )
__XCHG_CASE(w, , 4, , , , , , )
__XCHG_CASE( , , 8, , , , , , )
__XCHG_CASE(w, b, acq_1, , , a, a, , "memory")
__XCHG_CASE(w, h, acq_2, , , a, a, , "memory")
__XCHG_CASE(w, , acq_4, , , a, a, , "memory")
__XCHG_CASE( , , acq_8, , , a, a, , "memory")
__XCHG_CASE(w, b, rel_1, , , , , l, "memory")
__XCHG_CASE(w, h, rel_2, , , , , l, "memory")
__XCHG_CASE(w, , rel_4, , , , , l, "memory")
__XCHG_CASE( , , rel_8, , , , , l, "memory")
__XCHG_CASE(w, b, mb_1, dmb ish, nop, , a, l, "memory")
__XCHG_CASE(w, h, mb_2, dmb ish, nop, , a, l, "memory")
__XCHG_CASE(w, , mb_4, dmb ish, nop, , a, l, "memory")
__XCHG_CASE( , , mb_8, dmb ish, nop, , a, l, "memory")
#undef __XCHG_CASE
#define __XCHG_GEN(sfx) \
static inline unsigned long __xchg##sfx(unsigned long x, \
volatile void *ptr, \
int size) \
{ \
switch (size) { \
case 1: \
return __xchg_case##sfx##_1(x, ptr); \
case 2: \
return __xchg_case##sfx##_2(x, ptr); \
case 4: \
return __xchg_case##sfx##_4(x, ptr); \
case 8: \
return __xchg_case##sfx##_8(x, ptr); \
default: \
BUILD_BUG(); \
} \
\
unreachable(); \
}
__XCHG_GEN()
__XCHG_GEN(_acq)
__XCHG_GEN(_rel)
__XCHG_GEN(_mb)
#undef __XCHG_GEN
#define __xchg_wrapper(sfx, ptr, x) \
({ \
__typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \
__xchg##sfx((unsigned long)(x), (ptr), sizeof(*(ptr))); \
__ret; \
})
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
unsigned long new, int size)
{
switch (size) {
case 1:
return __cmpxchg_case_1(ptr, (u8)old, new);
case 2:
return __cmpxchg_case_2(ptr, (u16)old, new);
case 4:
return __cmpxchg_case_4(ptr, old, new);
case 8:
return __cmpxchg_case_8(ptr, old, new);
default:
BUILD_BUG();
}
unreachable();
/* xchg */
#define xchg_relaxed(...) __xchg_wrapper( , __VA_ARGS__)
#define xchg_acquire(...) __xchg_wrapper(_acq, __VA_ARGS__)
#define xchg_release(...) __xchg_wrapper(_rel, __VA_ARGS__)
#define xchg(...) __xchg_wrapper( _mb, __VA_ARGS__)
#define __CMPXCHG_GEN(sfx) \
static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
unsigned long old, \
unsigned long new, \
int size) \
{ \
switch (size) { \
case 1: \
return __cmpxchg_case##sfx##_1(ptr, (u8)old, new); \
case 2: \
return __cmpxchg_case##sfx##_2(ptr, (u16)old, new); \
case 4: \
return __cmpxchg_case##sfx##_4(ptr, old, new); \
case 8: \
return __cmpxchg_case##sfx##_8(ptr, old, new); \
default: \
BUILD_BUG(); \
} \
\
unreachable(); \
}
static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
unsigned long new, int size)
{
switch (size) {
case 1:
return __cmpxchg_case_mb_1(ptr, (u8)old, new);
case 2:
return __cmpxchg_case_mb_2(ptr, (u16)old, new);
case 4:
return __cmpxchg_case_mb_4(ptr, old, new);
case 8:
return __cmpxchg_case_mb_8(ptr, old, new);
default:
BUILD_BUG();
}
unreachable();
}
__CMPXCHG_GEN()
__CMPXCHG_GEN(_acq)
__CMPXCHG_GEN(_rel)
__CMPXCHG_GEN(_mb)
#define cmpxchg(ptr, o, n) \
({ \
__typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \
__cmpxchg_mb((ptr), (unsigned long)(o), (unsigned long)(n), \
sizeof(*(ptr))); \
__ret; \
})
#undef __CMPXCHG_GEN
#define cmpxchg_local(ptr, o, n) \
({ \
__typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \
__cmpxchg((ptr), (unsigned long)(o), \
(unsigned long)(n), sizeof(*(ptr))); \
__ret; \
#define __cmpxchg_wrapper(sfx, ptr, o, n) \
({ \
__typeof__(*(ptr)) __ret; \
__ret = (__typeof__(*(ptr))) \
__cmpxchg##sfx((ptr), (unsigned long)(o), \
(unsigned long)(n), sizeof(*(ptr))); \
__ret; \
})
/* cmpxchg */
#define cmpxchg_relaxed(...) __cmpxchg_wrapper( , __VA_ARGS__)
#define cmpxchg_acquire(...) __cmpxchg_wrapper(_acq, __VA_ARGS__)
#define cmpxchg_release(...) __cmpxchg_wrapper(_rel, __VA_ARGS__)
#define cmpxchg(...) __cmpxchg_wrapper( _mb, __VA_ARGS__)
#define cmpxchg_local cmpxchg_relaxed
/* cmpxchg64 */
#define cmpxchg64_relaxed cmpxchg_relaxed
#define cmpxchg64_acquire cmpxchg_acquire
#define cmpxchg64_release cmpxchg_release
#define cmpxchg64 cmpxchg
#define cmpxchg64_local cmpxchg_local
/* cmpxchg_double */
#define system_has_cmpxchg_double() 1
#define __cmpxchg_double_check(ptr1, ptr2) \
......@@ -202,6 +199,7 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
__ret; \
})
/* this_cpu_cmpxchg */
#define _protect_cmpxchg_local(pcp, o, n) \
({ \
typeof(*raw_cpu_ptr(&(pcp))) __ret; \
......@@ -227,9 +225,4 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
__ret; \
})
#define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n))
#define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n))
#define cmpxchg64_relaxed(ptr,o,n) cmpxchg_local((ptr),(o),(n))
#endif /* __ASM_CMPXCHG_H */
......@@ -63,4 +63,8 @@ DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
void cpuinfo_store_cpu(void);
void __init cpuinfo_store_boot_cpu(void);
void __init init_cpu_features(struct cpuinfo_arm64 *info);
void update_cpu_features(int cpu, struct cpuinfo_arm64 *info,
struct cpuinfo_arm64 *boot);
#endif /* __ASM_CPU_H */
......@@ -10,6 +10,7 @@
#define __ASM_CPUFEATURE_H
#include <asm/hwcap.h>
#include <asm/sysreg.h>
/*
* In the arm64 world (as in the ARM world), elf_hwcap is used both internally
......@@ -35,11 +36,42 @@
#include <linux/kernel.h>
/* CPU feature register tracking */
enum ftr_type {
FTR_EXACT, /* Use a predefined safe value */
FTR_LOWER_SAFE, /* Smaller value is safe */
FTR_HIGHER_SAFE,/* Bigger value is safe */
};
#define FTR_STRICT true /* SANITY check strict matching required */
#define FTR_NONSTRICT false /* SANITY check ignored */
struct arm64_ftr_bits {
bool strict; /* CPU Sanity check: strict matching required ? */
enum ftr_type type;
u8 shift;
u8 width;
s64 safe_val; /* safe value for discrete features */
};
/*
* @arm64_ftr_reg - Feature register
* @strict_mask Bits which should match across all CPUs for sanity.
* @sys_val Safe value across the CPUs (system view)
*/
struct arm64_ftr_reg {
u32 sys_id;
const char *name;
u64 strict_mask;
u64 sys_val;
struct arm64_ftr_bits *ftr_bits;
};
struct arm64_cpu_capabilities {
const char *desc;
u16 capability;
bool (*matches)(const struct arm64_cpu_capabilities *);
void (*enable)(void);
void (*enable)(void *); /* Called on all active CPUs */
union {
struct { /* To be used for erratum handling only */
u32 midr_model;
......@@ -47,8 +79,11 @@ struct arm64_cpu_capabilities {
};
struct { /* Feature register checking */
u32 sys_reg;
int field_pos;
int min_field_value;
int hwcap_type;
unsigned long hwcap;
};
};
};
......@@ -76,19 +111,59 @@ static inline void cpus_set_cap(unsigned int num)
__set_bit(num, cpu_hwcaps);
}
static inline int __attribute_const__ cpuid_feature_extract_field(u64 features,
int field)
static inline int __attribute_const__
cpuid_feature_extract_field_width(u64 features, int field, int width)
{
return (s64)(features << (64 - width - field)) >> (64 - width);
}
static inline int __attribute_const__
cpuid_feature_extract_field(u64 features, int field)
{
return cpuid_feature_extract_field_width(features, field, 4);
}
static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp)
{
return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
}
static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val)
{
return cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width);
}
static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
{
return (s64)(features << (64 - 4 - field)) >> (64 - 4);
return cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 ||
cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1;
}
void __init setup_cpu_features(void);
void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
const char *info);
void check_local_cpu_errata(void);
void check_local_cpu_features(void);
bool cpu_supports_mixed_endian_el0(void);
bool system_supports_mixed_endian_el0(void);
#ifdef CONFIG_HOTPLUG_CPU
void verify_local_cpu_capabilities(void);
#else
static inline void verify_local_cpu_capabilities(void)
{
}
#endif
u64 read_system_reg(u32 id);
static inline bool cpu_supports_mixed_endian_el0(void)
{
return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
}
static inline bool system_supports_mixed_endian_el0(void)
{
return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1));
}
#endif /* __ASSEMBLY__ */
......
......@@ -75,15 +75,6 @@
#define CAVIUM_CPU_PART_THUNDERX 0x0A1
#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16
#define ID_AA64MMFR0_BIGENDEL0_MASK (0xf << ID_AA64MMFR0_BIGENDEL0_SHIFT)
#define ID_AA64MMFR0_BIGENDEL0(mmfr0) \
(((mmfr0) & ID_AA64MMFR0_BIGENDEL0_MASK) >> ID_AA64MMFR0_BIGENDEL0_SHIFT)
#define ID_AA64MMFR0_BIGEND_SHIFT 8
#define ID_AA64MMFR0_BIGEND_MASK (0xf << ID_AA64MMFR0_BIGEND_SHIFT)
#define ID_AA64MMFR0_BIGEND(mmfr0) \
(((mmfr0) & ID_AA64MMFR0_BIGEND_MASK) >> ID_AA64MMFR0_BIGEND_SHIFT)
#ifndef __ASSEMBLY__
/*
......@@ -115,12 +106,6 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void)
{
return read_cpuid(CTR_EL0);
}
static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
{
return (ID_AA64MMFR0_BIGEND(mmfr0) == 0x1) ||
(ID_AA64MMFR0_BIGENDEL0(mmfr0) == 0x1);
}
#endif /* __ASSEMBLY__ */
#endif
......@@ -17,6 +17,7 @@
#ifndef __ASSEMBLY__
#include <linux/kernel.h>
#include <linux/sizes.h>
#include <asm/boot.h>
#include <asm/page.h>
......@@ -55,11 +56,7 @@ enum fixed_addresses {
* Temporary boot-time mappings, used by early_ioremap(),
* before ioremap() is functional.
*/
#ifdef CONFIG_ARM64_64K_PAGES
#define NR_FIX_BTMAPS 4
#else
#define NR_FIX_BTMAPS 64
#endif
#define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE)
#define FIX_BTMAPS_SLOTS 7
#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
......
......@@ -17,6 +17,7 @@
#define __ASM_HW_BREAKPOINT_H
#include <asm/cputype.h>
#include <asm/cpufeature.h>
#ifdef __KERNEL__
......@@ -137,13 +138,17 @@ extern struct pmu perf_ops_bp;
/* Determine number of BRP registers available. */
static inline int get_num_brps(void)
{
return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
return 1 +
cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
ID_AA64DFR0_BRPS_SHIFT);
}
/* Determine number of WRP registers available. */
static inline int get_num_wrps(void)
{
return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
return 1 +
cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
ID_AA64DFR0_WRPS_SHIFT);
}
#endif /* __KERNEL__ */
......
......@@ -52,6 +52,14 @@
extern unsigned int compat_elf_hwcap, compat_elf_hwcap2;
#endif
enum {
CAP_HWCAP = 1,
#ifdef CONFIG_COMPAT
CAP_COMPAT_HWCAP,
CAP_COMPAT_HWCAP2,
#endif
};
extern unsigned long elf_hwcap;
#endif
#endif
......@@ -7,7 +7,6 @@
struct pt_regs;
extern void migrate_irqs(void);
extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
static inline void acpi_irq_init(void)
......
#ifndef __ASM_KASAN_H
#define __ASM_KASAN_H
#ifndef __ASSEMBLY__
#ifdef CONFIG_KASAN
#include <linux/linkage.h>
#include <asm/memory.h>
/*
* KASAN_SHADOW_START: beginning of the kernel virtual addresses.
* KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses.
*/
#define KASAN_SHADOW_START (VA_START)
#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1UL << (VA_BITS - 3)))
/*
* This value is used to map an address to the corresponding shadow
* address by the following formula:
* shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET;
*
* (1 << 61) shadow addresses - [KASAN_SHADOW_OFFSET,KASAN_SHADOW_END]
* cover all 64-bits of virtual addresses. So KASAN_SHADOW_OFFSET
* should satisfy the following equation:
* KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - (1ULL << 61)
*/
#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3)))
void kasan_init(void);
asmlinkage void kasan_early_init(void);
#else
static inline void kasan_init(void) { }
#endif
#endif
#endif
/*
* Kernel page table mapping
*
* Copyright (C) 2015 ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_KERNEL_PGTABLE_H
#define __ASM_KERNEL_PGTABLE_H
/*
* The linear mapping and the start of memory are both 2M aligned (per
* the arm64 booting.txt requirements). Hence we can use section mapping
* with 4K (section size = 2M) but not with 16K (section size = 32M) or
* 64K (section size = 512M).
*/
#ifdef CONFIG_ARM64_4K_PAGES
#define ARM64_SWAPPER_USES_SECTION_MAPS 1
#else
#define ARM64_SWAPPER_USES_SECTION_MAPS 0
#endif
/*
* The idmap and swapper page tables need some space reserved in the kernel
* image. Both require pgd, pud (4 levels only) and pmd tables to (section)
* map the kernel. With the 64K page configuration, swapper and idmap need to
* map to pte level. The swapper also maps the FDT (see __create_page_tables
* for more information). Note that the number of ID map translation levels
* could be increased on the fly if system RAM is out of reach for the default
* VA range, so pages required to map highest possible PA are reserved in all
* cases.
*/
#if ARM64_SWAPPER_USES_SECTION_MAPS
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1)
#else
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT))
#endif
#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
#define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
/* Initial memory map size */
#if ARM64_SWAPPER_USES_SECTION_MAPS
#define SWAPPER_BLOCK_SHIFT SECTION_SHIFT
#define SWAPPER_BLOCK_SIZE SECTION_SIZE
#define SWAPPER_TABLE_SHIFT PUD_SHIFT
#else
#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT
#define SWAPPER_BLOCK_SIZE PAGE_SIZE
#define SWAPPER_TABLE_SHIFT PMD_SHIFT
#endif
/* The size of the initial kernel direct mapping */
#define SWAPPER_INIT_MAP_SIZE (_AC(1, UL) << SWAPPER_TABLE_SHIFT)
/*
* Initial memory map attributes.
*/
#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
#if ARM64_SWAPPER_USES_SECTION_MAPS
#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
#else
#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
#endif
#endif /* __ASM_KERNEL_PGTABLE_H */
......@@ -42,12 +42,14 @@
* PAGE_OFFSET - the virtual address of the start of the kernel image (top
* (VA_BITS - 1))
* VA_BITS - the maximum number of bits for virtual addresses.
* VA_START - the first kernel virtual address.
* TASK_SIZE - the maximum size of a user space task.
* TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
* The module space lives between the addresses given by TASK_SIZE
* and PAGE_OFFSET - it must be within 128MB of the kernel text.
*/
#define VA_BITS (CONFIG_ARM64_VA_BITS)
#define VA_START (UL(0xffffffffffffffff) << VA_BITS)
#define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1))
#define MODULES_END (PAGE_OFFSET)
#define MODULES_VADDR (MODULES_END - SZ_64M)
......@@ -68,10 +70,6 @@
#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4))
#if TASK_SIZE_64 > MODULES_VADDR
#error Top of 64-bit user space clashes with start of module space
#endif
/*
* Physical vs virtual RAM address space conversion. These are
* private definitions which should NOT be used outside memory.h
......
......@@ -17,15 +17,16 @@
#define __ASM_MMU_H
typedef struct {
unsigned int id;
raw_spinlock_t id_lock;
void *vdso;
atomic64_t id;
void *vdso;
} mm_context_t;
#define INIT_MM_CONTEXT(name) \
.context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock),
#define ASID(mm) ((mm)->context.id & 0xffff)
/*
* This macro is only used by the TLBI code, which cannot race with an
* ASID change and therefore doesn't need to reload the counter using
* atomic64_read.
*/
#define ASID(mm) ((mm)->context.id.counter & 0xffff)
extern void paging_init(void);
extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
......
......@@ -28,13 +28,6 @@
#include <asm/cputype.h>
#include <asm/pgtable.h>
#define MAX_ASID_BITS 16
extern unsigned int cpu_last_asid;
void __init_new_context(struct task_struct *tsk, struct mm_struct *mm);
void __new_context(struct mm_struct *mm);
#ifdef CONFIG_PID_IN_CONTEXTIDR
static inline void contextidr_thread_switch(struct task_struct *next)
{
......@@ -77,96 +70,38 @@ static inline bool __cpu_uses_extended_idmap(void)
unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
}
static inline void __cpu_set_tcr_t0sz(u64 t0sz)
{
unsigned long tcr;
if (__cpu_uses_extended_idmap())
asm volatile (
" mrs %0, tcr_el1 ;"
" bfi %0, %1, %2, %3 ;"
" msr tcr_el1, %0 ;"
" isb"
: "=&r" (tcr)
: "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
}
/*
* Set TCR.T0SZ to the value appropriate for activating the identity map.
*/
static inline void cpu_set_idmap_tcr_t0sz(void)
{
__cpu_set_tcr_t0sz(idmap_t0sz);
}
/*
* Set TCR.T0SZ to its default value (based on VA_BITS)
*/
static inline void cpu_set_default_tcr_t0sz(void)
{
__cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS));
}
static inline void switch_new_context(struct mm_struct *mm)
{
unsigned long flags;
__new_context(mm);
unsigned long tcr;
local_irq_save(flags);
cpu_switch_mm(mm->pgd, mm);
local_irq_restore(flags);
}
if (!__cpu_uses_extended_idmap())
return;
static inline void check_and_switch_context(struct mm_struct *mm,
struct task_struct *tsk)
{
/*
* Required during context switch to avoid speculative page table
* walking with the wrong TTBR.
*/
cpu_set_reserved_ttbr0();
if (!((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS))
/*
* The ASID is from the current generation, just switch to the
* new pgd. This condition is only true for calls from
* context_switch() and interrupts are already disabled.
*/
cpu_switch_mm(mm->pgd, mm);
else if (irqs_disabled())
/*
* Defer the new ASID allocation until after the context
* switch critical region since __new_context() cannot be
* called with interrupts disabled.
*/
set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM);
else
/*
* That is a direct call to switch_mm() or activate_mm() with
* interrupts enabled and a new context.
*/
switch_new_context(mm);
asm volatile (
" mrs %0, tcr_el1 ;"
" bfi %0, %1, %2, %3 ;"
" msr tcr_el1, %0 ;"
" isb"
: "=&r" (tcr)
: "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
}
#define init_new_context(tsk,mm) (__init_new_context(tsk,mm),0)
/*
* It would be nice to return ASIDs back to the allocator, but unfortunately
* that introduces a race with a generation rollover where we could erroneously
* free an ASID allocated in a future generation. We could workaround this by
* freeing the ASID from the context of the dying mm (e.g. in arch_exit_mmap),
* but we'd then need to make sure that we didn't dirty any TLBs afterwards.
* Setting a reserved TTBR0 or EPD0 would work, but it all gets ugly when you
* take CPU migration into account.
*/
#define destroy_context(mm) do { } while(0)
void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
#define finish_arch_post_lock_switch \
finish_arch_post_lock_switch
static inline void finish_arch_post_lock_switch(void)
{
if (test_and_clear_thread_flag(TIF_SWITCH_MM)) {
struct mm_struct *mm = current->mm;
unsigned long flags;
__new_context(mm);
local_irq_save(flags);
cpu_switch_mm(mm->pgd, mm);
local_irq_restore(flags);
}
}
#define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; })
/*
* This is called when "tsk" is about to enter lazy TLB mode.
......@@ -194,6 +129,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
{
unsigned int cpu = smp_processor_id();
if (prev == next)
return;
/*
* init_mm.pgd does not contain any user mappings and it is always
* active for kernel addresses in TTBR1. Just set the reserved TTBR0.
......@@ -203,8 +141,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
return;
}
if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next)
check_and_switch_context(next, tsk);
check_and_switch_context(next, cpu);
}
#define deactivate_mm(tsk,mm) do { } while (0)
......
......@@ -20,31 +20,22 @@
#define __ASM_PAGE_H
/* PAGE_SHIFT determines the page size */
/* CONT_SHIFT determines the number of pages which can be tracked together */
#ifdef CONFIG_ARM64_64K_PAGES
#define PAGE_SHIFT 16
#define CONT_SHIFT 5
#elif defined(CONFIG_ARM64_16K_PAGES)
#define PAGE_SHIFT 14
#define CONT_SHIFT 7
#else
#define PAGE_SHIFT 12
#define CONT_SHIFT 4
#endif
#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
/*
* The idmap and swapper page tables need some space reserved in the kernel
* image. Both require pgd, pud (4 levels only) and pmd tables to (section)
* map the kernel. With the 64K page configuration, swapper and idmap need to
* map to pte level. The swapper also maps the FDT (see __create_page_tables
* for more information). Note that the number of ID map translation levels
* could be increased on the fly if system RAM is out of reach for the default
* VA range, so 3 pages are reserved in all cases.
*/
#ifdef CONFIG_ARM64_64K_PAGES
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
#else
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
#endif
#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
#define IDMAP_DIR_SIZE (3 * PAGE_SIZE)
#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT))
#define CONT_MASK (~(CONT_SIZE-1))
#ifndef __ASSEMBLY__
......
......@@ -27,6 +27,7 @@
#define check_pgt_cache() do { } while (0)
#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
#if CONFIG_PGTABLE_LEVELS > 2
......
......@@ -16,13 +16,46 @@
#ifndef __ASM_PGTABLE_HWDEF_H
#define __ASM_PGTABLE_HWDEF_H
/*
* Number of page-table levels required to address 'va_bits' wide
* address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT)
* bits with (PAGE_SHIFT - 3) bits at each page table level. Hence:
*
* levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), (PAGE_SHIFT - 3))
*
* where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d))
*
* We cannot include linux/kernel.h which defines DIV_ROUND_UP here
* due to build issues. So we open code DIV_ROUND_UP here:
*
* ((((va_bits) - PAGE_SHIFT) + (PAGE_SHIFT - 3) - 1) / (PAGE_SHIFT - 3))
*
* which gets simplified as :
*/
#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3))
/*
* Size mapped by an entry at level n ( 0 <= n <= 3)
* We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
* in the final page. The maximum number of translation levels supported by
* the architecture is 4. Hence, starting at at level n, we have further
* ((4 - n) - 1) levels of translation excluding the offset within the page.
* So, the total number of bits mapped by an entry at level n is :
*
* ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT
*
* Rearranging it a bit we get :
* (4 - n) * (PAGE_SHIFT - 3) + 3
*/
#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3)
#define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3))
/*
* PMD_SHIFT determines the size a level 2 page table entry can map.
*/
#if CONFIG_PGTABLE_LEVELS > 2
#define PMD_SHIFT ((PAGE_SHIFT - 3) * 2 + 3)
#define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
#define PTRS_PER_PMD PTRS_PER_PTE
......@@ -32,7 +65,7 @@
* PUD_SHIFT determines the size a level 1 page table entry can map.
*/
#if CONFIG_PGTABLE_LEVELS > 3
#define PUD_SHIFT ((PAGE_SHIFT - 3) * 3 + 3)
#define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE-1))
#define PTRS_PER_PUD PTRS_PER_PTE
......@@ -42,7 +75,7 @@
* PGDIR_SHIFT determines the size a top-level page table entry can map
* (depending on the configuration, this level can be 0, 1 or 2).
*/
#define PGDIR_SHIFT ((PAGE_SHIFT - 3) * CONFIG_PGTABLE_LEVELS + 3)
#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT))
......@@ -54,6 +87,13 @@
#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT)
#define SECTION_MASK (~(SECTION_SIZE-1))
/*
* Contiguous page definitions.
*/
#define CONT_PTES (_AC(1, UL) << CONT_SHIFT)
/* the the numerical offset of the PTE within a range of CONT_PTES */
#define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1))
/*
* Hardware page table definitions.
*
......@@ -83,6 +123,7 @@
#define PMD_SECT_S (_AT(pmdval_t, 3) << 8)
#define PMD_SECT_AF (_AT(pmdval_t, 1) << 10)
#define PMD_SECT_NG (_AT(pmdval_t, 1) << 11)
#define PMD_SECT_CONT (_AT(pmdval_t, 1) << 52)
#define PMD_SECT_PXN (_AT(pmdval_t, 1) << 53)
#define PMD_SECT_UXN (_AT(pmdval_t, 1) << 54)
......@@ -105,6 +146,7 @@
#define PTE_AF (_AT(pteval_t, 1) << 10) /* Access Flag */
#define PTE_NG (_AT(pteval_t, 1) << 11) /* nG */
#define PTE_DBM (_AT(pteval_t, 1) << 51) /* Dirty Bit Management */
#define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */
#define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */
#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */
......
......@@ -41,7 +41,14 @@
* fixed mappings and modules
*/
#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
#define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS)
#ifndef CONFIG_KASAN
#define VMALLOC_START (VA_START)
#else
#include <asm/kasan.h>
#define VMALLOC_START (KASAN_SHADOW_END + SZ_64K)
#endif
#define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
#define vmemmap ((struct page *)(VMALLOC_END + SZ_64K))
......@@ -74,6 +81,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP)
#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
......@@ -142,6 +150,7 @@ extern struct page *empty_zero_page;
#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL))
#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
#define pte_exec(pte) (!(pte_val(pte) & PTE_UXN))
#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
#ifdef CONFIG_ARM64_HW_AFDBM
#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
......@@ -204,6 +213,16 @@ static inline pte_t pte_mkspecial(pte_t pte)
return set_pte_bit(pte, __pgprot(PTE_SPECIAL));
}
static inline pte_t pte_mkcont(pte_t pte)
{
return set_pte_bit(pte, __pgprot(PTE_CONT));
}
static inline pte_t pte_mknoncont(pte_t pte)
{
return clear_pte_bit(pte, __pgprot(PTE_CONT));
}
static inline void set_pte(pte_t *ptep, pte_t pte)
{
*ptep = pte;
......@@ -648,14 +667,17 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
/*
* set_pte() does not have a DSB for user mappings, so make sure that
* the page table write is visible.
* We don't do anything here, so there's a very small chance of
* us retaking a user fault which we just fixed up. The alternative
* is doing a dsb(ishst), but that penalises the fastpath.
*/
dsb(ishst);
}
#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
#define kc_vaddr_to_offset(v) ((v) & ~VA_START)
#define kc_offset_to_vaddr(o) ((o) | VA_START)
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_PGTABLE_H */
/*
* Based on arch/arm/include/asm/pmu.h
*
* Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles
* Copyright (C) 2012 ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __ASM_PMU_H
#define __ASM_PMU_H
#ifdef CONFIG_HW_PERF_EVENTS
/* The events for a given PMU register set. */
struct pmu_hw_events {
/*
* The events that are active on the PMU for the given index.
*/
struct perf_event **events;
/*
* A 1 bit for an index indicates that the counter is being used for
* an event. A 0 means that the counter can be used.
*/
unsigned long *used_mask;
/*
* Hardware lock to serialize accesses to PMU registers. Needed for the
* read/modify/write sequences.
*/
raw_spinlock_t pmu_lock;
};
struct arm_pmu {
struct pmu pmu;
cpumask_t active_irqs;
int *irq_affinity;
const char *name;
irqreturn_t (*handle_irq)(int irq_num, void *dev);
void (*enable)(struct hw_perf_event *evt, int idx);
void (*disable)(struct hw_perf_event *evt, int idx);
int (*get_event_idx)(struct pmu_hw_events *hw_events,
struct hw_perf_event *hwc);
int (*set_event_filter)(struct hw_perf_event *evt,
struct perf_event_attr *attr);
u32 (*read_counter)(int idx);
void (*write_counter)(int idx, u32 val);
void (*start)(void);
void (*stop)(void);
void (*reset)(void *);
int (*map_event)(struct perf_event *event);
int num_events;
atomic_t active_events;
struct mutex reserve_mutex;
u64 max_period;
struct platform_device *plat_device;
struct pmu_hw_events *(*get_hw_events)(void);
};
#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type);
u64 armpmu_event_update(struct perf_event *event,
struct hw_perf_event *hwc,
int idx);
int armpmu_event_set_period(struct perf_event *event,
struct hw_perf_event *hwc,
int idx);
#endif /* CONFIG_HW_PERF_EVENTS */
#endif /* __ASM_PMU_H */
......@@ -186,6 +186,6 @@ static inline void spin_lock_prefetch(const void *x)
#endif
void cpu_enable_pan(void);
void cpu_enable_pan(void *__unused);
#endif /* __ASM_PROCESSOR_H */
......@@ -83,14 +83,14 @@
#define compat_sp regs[13]
#define compat_lr regs[14]
#define compat_sp_hyp regs[15]
#define compat_sp_irq regs[16]
#define compat_lr_irq regs[17]
#define compat_sp_svc regs[18]
#define compat_lr_svc regs[19]
#define compat_sp_abt regs[20]
#define compat_lr_abt regs[21]
#define compat_sp_und regs[22]
#define compat_lr_und regs[23]
#define compat_lr_irq regs[16]
#define compat_sp_irq regs[17]
#define compat_lr_svc regs[18]
#define compat_sp_svc regs[19]
#define compat_lr_abt regs[20]
#define compat_sp_abt regs[21]
#define compat_lr_und regs[22]
#define compat_sp_und regs[23]
#define compat_r8_fiq regs[24]
#define compat_r9_fiq regs[25]
#define compat_r10_fiq regs[26]
......
......@@ -36,17 +36,33 @@ extern __kernel_size_t strnlen(const char *, __kernel_size_t);
#define __HAVE_ARCH_MEMCPY
extern void *memcpy(void *, const void *, __kernel_size_t);
extern void *__memcpy(void *, const void *, __kernel_size_t);
#define __HAVE_ARCH_MEMMOVE
extern void *memmove(void *, const void *, __kernel_size_t);
extern void *__memmove(void *, const void *, __kernel_size_t);
#define __HAVE_ARCH_MEMCHR
extern void *memchr(const void *, int, __kernel_size_t);
#define __HAVE_ARCH_MEMSET
extern void *memset(void *, int, __kernel_size_t);
extern void *__memset(void *, int, __kernel_size_t);
#define __HAVE_ARCH_MEMCMP
extern int memcmp(const void *, const void *, size_t);
#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
/*
* For files that are not instrumented (e.g. mm/slub.c) we
* should use not instrumented version of mem* functions.
*/
#define memcpy(dst, src, len) __memcpy(dst, src, len)
#define memmove(dst, src, len) __memmove(dst, src, len)
#define memset(s, c, n) __memset(s, c, n)
#endif
#endif
......@@ -22,9 +22,6 @@
#include <asm/opcodes.h>
#define SCTLR_EL1_CP15BEN (0x1 << 5)
#define SCTLR_EL1_SED (0x1 << 8)
/*
* ARMv8 ARM reserves the following encoding for system registers:
* (Ref: ARMv8 ARM, Section: "System instruction class encoding overview",
......@@ -38,12 +35,162 @@
#define sys_reg(op0, op1, crn, crm, op2) \
((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5))
#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4)
#define SCTLR_EL1_SPAN (1 << 23)
#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
#define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0)
#define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1)
#define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2)
#define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4)
#define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5)
#define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6)
#define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7)
#define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0)
#define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1)
#define SYS_ID_ISAR2_EL1 sys_reg(3, 0, 0, 2, 2)
#define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3)
#define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4)
#define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5)
#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6)
#define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0)
#define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1)
#define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2)
#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0)
#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1)
#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0)
#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1)
#define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0)
#define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1)
#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0)
#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1)
#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0)
#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1)
#define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7)
#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4)
#define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
(!!x)<<8 | 0x1f)
/* SCTLR_EL1 */
#define SCTLR_EL1_CP15BEN (0x1 << 5)
#define SCTLR_EL1_SED (0x1 << 8)
#define SCTLR_EL1_SPAN (0x1 << 23)
/* id_aa64isar0 */
#define ID_AA64ISAR0_RDM_SHIFT 28
#define ID_AA64ISAR0_ATOMICS_SHIFT 20
#define ID_AA64ISAR0_CRC32_SHIFT 16
#define ID_AA64ISAR0_SHA2_SHIFT 12
#define ID_AA64ISAR0_SHA1_SHIFT 8
#define ID_AA64ISAR0_AES_SHIFT 4
/* id_aa64pfr0 */
#define ID_AA64PFR0_GIC_SHIFT 24
#define ID_AA64PFR0_ASIMD_SHIFT 20
#define ID_AA64PFR0_FP_SHIFT 16
#define ID_AA64PFR0_EL3_SHIFT 12
#define ID_AA64PFR0_EL2_SHIFT 8
#define ID_AA64PFR0_EL1_SHIFT 4
#define ID_AA64PFR0_EL0_SHIFT 0
#define ID_AA64PFR0_FP_NI 0xf
#define ID_AA64PFR0_FP_SUPPORTED 0x0
#define ID_AA64PFR0_ASIMD_NI 0xf
#define ID_AA64PFR0_ASIMD_SUPPORTED 0x0
#define ID_AA64PFR0_EL1_64BIT_ONLY 0x1
#define ID_AA64PFR0_EL0_64BIT_ONLY 0x1
/* id_aa64mmfr0 */
#define ID_AA64MMFR0_TGRAN4_SHIFT 28
#define ID_AA64MMFR0_TGRAN64_SHIFT 24
#define ID_AA64MMFR0_TGRAN16_SHIFT 20
#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16
#define ID_AA64MMFR0_SNSMEM_SHIFT 12
#define ID_AA64MMFR0_BIGENDEL_SHIFT 8
#define ID_AA64MMFR0_ASID_SHIFT 4
#define ID_AA64MMFR0_PARANGE_SHIFT 0
#define ID_AA64MMFR0_TGRAN4_NI 0xf
#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0
#define ID_AA64MMFR0_TGRAN64_NI 0xf
#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0
#define ID_AA64MMFR0_TGRAN16_NI 0x0
#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1
/* id_aa64mmfr1 */
#define ID_AA64MMFR1_PAN_SHIFT 20
#define ID_AA64MMFR1_LOR_SHIFT 16
#define ID_AA64MMFR1_HPD_SHIFT 12
#define ID_AA64MMFR1_VHE_SHIFT 8
#define ID_AA64MMFR1_VMIDBITS_SHIFT 4
#define ID_AA64MMFR1_HADBS_SHIFT 0
/* id_aa64dfr0 */
#define ID_AA64DFR0_CTX_CMPS_SHIFT 28
#define ID_AA64DFR0_WRPS_SHIFT 20
#define ID_AA64DFR0_BRPS_SHIFT 12
#define ID_AA64DFR0_PMUVER_SHIFT 8
#define ID_AA64DFR0_TRACEVER_SHIFT 4
#define ID_AA64DFR0_DEBUGVER_SHIFT 0
#define ID_ISAR5_RDM_SHIFT 24
#define ID_ISAR5_CRC32_SHIFT 16
#define ID_ISAR5_SHA2_SHIFT 12
#define ID_ISAR5_SHA1_SHIFT 8
#define ID_ISAR5_AES_SHIFT 4
#define ID_ISAR5_SEVL_SHIFT 0
#define MVFR0_FPROUND_SHIFT 28
#define MVFR0_FPSHVEC_SHIFT 24
#define MVFR0_FPSQRT_SHIFT 20
#define MVFR0_FPDIVIDE_SHIFT 16
#define MVFR0_FPTRAP_SHIFT 12
#define MVFR0_FPDP_SHIFT 8
#define MVFR0_FPSP_SHIFT 4
#define MVFR0_SIMD_SHIFT 0
#define MVFR1_SIMDFMAC_SHIFT 28
#define MVFR1_FPHP_SHIFT 24
#define MVFR1_SIMDHP_SHIFT 20
#define MVFR1_SIMDSP_SHIFT 16
#define MVFR1_SIMDINT_SHIFT 12
#define MVFR1_SIMDLS_SHIFT 8
#define MVFR1_FPDNAN_SHIFT 4
#define MVFR1_FPFTZ_SHIFT 0
#define ID_AA64MMFR0_TGRAN4_SHIFT 28
#define ID_AA64MMFR0_TGRAN64_SHIFT 24
#define ID_AA64MMFR0_TGRAN16_SHIFT 20
#define ID_AA64MMFR0_TGRAN4_NI 0xf
#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0
#define ID_AA64MMFR0_TGRAN64_NI 0xf
#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0
#define ID_AA64MMFR0_TGRAN16_NI 0x0
#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1
#if defined(CONFIG_ARM64_4K_PAGES)
#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT
#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN4_SUPPORTED
#elif defined(CONFIG_ARM64_16K_PAGES)
#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT
#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN16_SUPPORTED
#elif defined(CONFIG_ARM64_64K_PAGES)
#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT
#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED
#endif
#ifdef __ASSEMBLY__
.irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
......
......@@ -23,8 +23,10 @@
#include <linux/compiler.h>
#ifndef CONFIG_ARM64_64K_PAGES
#ifdef CONFIG_ARM64_4K_PAGES
#define THREAD_SIZE_ORDER 2
#elif defined(CONFIG_ARM64_16K_PAGES)
#define THREAD_SIZE_ORDER 0
#endif
#define THREAD_SIZE 16384
......@@ -111,7 +113,6 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_RESTORE_SIGMASK 20
#define TIF_SINGLESTEP 21
#define TIF_32BIT 22 /* 32bit process */
#define TIF_SWITCH_MM 23 /* deferred switch_mm */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
......
......@@ -37,17 +37,21 @@ static inline void __tlb_remove_table(void *_table)
static inline void tlb_flush(struct mmu_gather *tlb)
{
if (tlb->fullmm) {
flush_tlb_mm(tlb->mm);
} else {
struct vm_area_struct vma = { .vm_mm = tlb->mm, };
/*
* The intermediate page table levels are already handled by
* the __(pte|pmd|pud)_free_tlb() functions, so last level
* TLBI is sufficient here.
*/
__flush_tlb_range(&vma, tlb->start, tlb->end, true);
}
struct vm_area_struct vma = { .vm_mm = tlb->mm, };
/*
* The ASID allocator will either invalidate the ASID or mark
* it as used.
*/
if (tlb->fullmm)
return;
/*
* The intermediate page table levels are already handled by
* the __(pte|pmd|pud)_free_tlb() functions, so last level
* TLBI is sufficient here.
*/
__flush_tlb_range(&vma, tlb->start, tlb->end, true);
}
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
......
......@@ -63,6 +63,14 @@
* only require the D-TLB to be invalidated.
* - kaddr - Kernel virtual memory address
*/
static inline void local_flush_tlb_all(void)
{
dsb(nshst);
asm("tlbi vmalle1");
dsb(nsh);
isb();
}
static inline void flush_tlb_all(void)
{
dsb(ishst);
......@@ -73,7 +81,7 @@ static inline void flush_tlb_all(void)
static inline void flush_tlb_mm(struct mm_struct *mm)
{
unsigned long asid = (unsigned long)ASID(mm) << 48;
unsigned long asid = ASID(mm) << 48;
dsb(ishst);
asm("tlbi aside1is, %0" : : "r" (asid));
......@@ -83,8 +91,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
static inline void flush_tlb_page(struct vm_area_struct *vma,
unsigned long uaddr)
{
unsigned long addr = uaddr >> 12 |
((unsigned long)ASID(vma->vm_mm) << 48);
unsigned long addr = uaddr >> 12 | (ASID(vma->vm_mm) << 48);
dsb(ishst);
asm("tlbi vale1is, %0" : : "r" (addr));
......@@ -101,7 +108,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
bool last_level)
{
unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48;
unsigned long asid = ASID(vma->vm_mm) << 48;
unsigned long addr;
if ((end - start) > MAX_TLB_RANGE) {
......@@ -154,9 +161,8 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
static inline void __flush_tlb_pgtable(struct mm_struct *mm,
unsigned long uaddr)
{
unsigned long addr = uaddr >> 12 | ((unsigned long)ASID(mm) << 48);
unsigned long addr = uaddr >> 12 | (ASID(mm) << 48);
dsb(ishst);
asm("tlbi vae1is, %0" : : "r" (addr));
dsb(ish);
}
......
......@@ -4,7 +4,6 @@
CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET)
AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
CFLAGS_armv8_deprecated.o := -I$(src)
CFLAGS_REMOVE_ftrace.o = -pg
......@@ -20,6 +19,12 @@ arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o
extra-$(CONFIG_EFI) := efi-entry.o
OBJCOPYFLAGS := --prefix-symbols=__efistub_
$(obj)/%.stub.o: $(obj)/%.o FORCE
$(call if_changed,objcopy)
arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
sys_compat.o entry32.o \
../../arm/kernel/opcodes.o
......@@ -32,7 +37,7 @@ arm64-obj-$(CONFIG_CPU_PM) += sleep.o suspend.o
arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o
arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o
arm64-obj-$(CONFIG_KGDB) += kgdb.o
arm64-obj-$(CONFIG_EFI) += efi.o efi-stub.o efi-entry.o
arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o
arm64-obj-$(CONFIG_PCI) += pci.o
arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o
arm64-obj-$(CONFIG_ACPI) += acpi.o
......@@ -40,7 +45,7 @@ arm64-obj-$(CONFIG_ACPI) += acpi.o
obj-y += $(arm64-obj-y) vdso/
obj-m += $(arm64-obj-m)
head-y := head.o
extra-y := $(head-y) vmlinux.lds
extra-y += $(head-y) vmlinux.lds
# vDSO - this must be built first to generate the symbol offsets
$(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h
......
......@@ -51,6 +51,9 @@ EXPORT_SYMBOL(strnlen);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(__memset);
EXPORT_SYMBOL(__memcpy);
EXPORT_SYMBOL(__memmove);
EXPORT_SYMBOL(memchr);
EXPORT_SYMBOL(memcmp);
......
......@@ -60,7 +60,7 @@ int main(void)
DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno));
DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
BLANK();
DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id));
DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter));
BLANK();
DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm));
DEFINE(VMA_VM_FLAGS, offsetof(struct vm_area_struct, vm_flags));
......
......@@ -97,5 +97,5 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
void check_local_cpu_errata(void)
{
check_cpu_capabilities(arm64_errata, "enabling workaround for");
update_cpu_capabilities(arm64_errata, "enabling workaround for");
}
......@@ -16,12 +16,567 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define pr_fmt(fmt) "alternatives: " fmt
#define pr_fmt(fmt) "CPU features: " fmt
#include <linux/bsearch.h>
#include <linux/sort.h>
#include <linux/types.h>
#include <asm/cpu.h>
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
#include <asm/processor.h>
#include <asm/sysreg.h>
unsigned long elf_hwcap __read_mostly;
EXPORT_SYMBOL_GPL(elf_hwcap);
#ifdef CONFIG_COMPAT
#define COMPAT_ELF_HWCAP_DEFAULT \
(COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\
COMPAT_HWCAP_LPAE)
unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
unsigned int compat_elf_hwcap2 __read_mostly;
#endif
DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
#define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
{ \
.strict = STRICT, \
.type = TYPE, \
.shift = SHIFT, \
.width = WIDTH, \
.safe_val = SAFE_VAL, \
}
#define ARM64_FTR_END \
{ \
.width = 0, \
}
static struct arm64_ftr_bits ftr_id_aa64isar0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* RAZ */
ARM64_FTR_END,
};
static struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
/* Linux doesn't care about the EL3 */
ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
ARM64_FTR_END,
};
static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
/* Linux shouldn't care about secure memory */
ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_ASID_SHIFT, 4, 0),
/*
* Differing PARange is fine as long as all peripherals and memory are mapped
* within the minimum PARange of all CPUs
*/
ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
ARM64_FTR_END,
};
static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VHE_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HADBS_SHIFT, 4, 0),
ARM64_FTR_END,
};
static struct arm64_ftr_bits ftr_ctr[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */
/*
* Linux can handle differing I-cache policies. Userspace JITs will
* make use of *minLine
*/
ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0), /* RAZ */
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */
ARM64_FTR_END,
};
static struct arm64_ftr_bits ftr_id_mmfr0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), /* InnerShr */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */
ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 4, 0), /* TCM */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* ShareLvl */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* OuterShr */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* PMSA */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* VMSA */
ARM64_FTR_END,
};
static struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
ARM64_FTR_END,
};
static struct arm64_ftr_bits ftr_mvfr2[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* FPMisc */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* SIMDMisc */
ARM64_FTR_END,
};
static struct arm64_ftr_bits ftr_dczid[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 5, 27, 0), /* RAZ */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */
ARM64_FTR_END,
};
static struct arm64_ftr_bits ftr_id_isar5[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 20, 4, 0), /* RAZ */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_AES_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SEVL_SHIFT, 4, 0),
ARM64_FTR_END,
};
static struct arm64_ftr_bits ftr_id_mmfr4[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* ac2 */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* RAZ */
ARM64_FTR_END,
};
static struct arm64_ftr_bits ftr_id_pfr0[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 16, 0), /* RAZ */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* State3 */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* State2 */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* State1 */
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* State0 */
ARM64_FTR_END,
};
/*
* Common ftr bits for a 32bit register with all hidden, strict
* attributes, with 4bit feature fields and a default safe value of
* 0. Covers the following 32bit registers:
* id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1]
*/
static struct arm64_ftr_bits ftr_generic_32bits[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
ARM64_FTR_END,
};
static struct arm64_ftr_bits ftr_generic[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0),
ARM64_FTR_END,
};
static struct arm64_ftr_bits ftr_generic32[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 32, 0),
ARM64_FTR_END,
};
static struct arm64_ftr_bits ftr_aa64raz[] = {
ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0),
ARM64_FTR_END,
};
#define ARM64_FTR_REG(id, table) \
{ \
.sys_id = id, \
.name = #id, \
.ftr_bits = &((table)[0]), \
}
static struct arm64_ftr_reg arm64_ftr_regs[] = {
/* Op1 = 0, CRn = 0, CRm = 1 */
ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0),
ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_generic_32bits),
ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_generic_32bits),
ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0),
ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits),
ARM64_FTR_REG(SYS_ID_MMFR2_EL1, ftr_generic_32bits),
ARM64_FTR_REG(SYS_ID_MMFR3_EL1, ftr_generic_32bits),
/* Op1 = 0, CRn = 0, CRm = 2 */
ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_generic_32bits),
ARM64_FTR_REG(SYS_ID_ISAR1_EL1, ftr_generic_32bits),
ARM64_FTR_REG(SYS_ID_ISAR2_EL1, ftr_generic_32bits),
ARM64_FTR_REG(SYS_ID_ISAR3_EL1, ftr_generic_32bits),
ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_generic_32bits),
ARM64_FTR_REG(SYS_ID_ISAR5_EL1, ftr_id_isar5),
ARM64_FTR_REG(SYS_ID_MMFR4_EL1, ftr_id_mmfr4),
/* Op1 = 0, CRn = 0, CRm = 3 */
ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_generic_32bits),
ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_generic_32bits),
ARM64_FTR_REG(SYS_MVFR2_EL1, ftr_mvfr2),
/* Op1 = 0, CRn = 0, CRm = 4 */
ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_aa64raz),
/* Op1 = 0, CRn = 0, CRm = 5 */
ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
ARM64_FTR_REG(SYS_ID_AA64DFR1_EL1, ftr_generic),
/* Op1 = 0, CRn = 0, CRm = 6 */
ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_aa64raz),
/* Op1 = 0, CRn = 0, CRm = 7 */
ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1),
/* Op1 = 3, CRn = 0, CRm = 0 */
ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr),
ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid),
/* Op1 = 3, CRn = 14, CRm = 0 */
ARM64_FTR_REG(SYS_CNTFRQ_EL0, ftr_generic32),
};
static int search_cmp_ftr_reg(const void *id, const void *regp)
{
return (int)(unsigned long)id - (int)((const struct arm64_ftr_reg *)regp)->sys_id;
}
/*
* get_arm64_ftr_reg - Lookup a feature register entry using its
* sys_reg() encoding. With the array arm64_ftr_regs sorted in the
* ascending order of sys_id , we use binary search to find a matching
* entry.
*
* returns - Upon success, matching ftr_reg entry for id.
* - NULL on failure. It is upto the caller to decide
* the impact of a failure.
*/
static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id)
{
return bsearch((const void *)(unsigned long)sys_id,
arm64_ftr_regs,
ARRAY_SIZE(arm64_ftr_regs),
sizeof(arm64_ftr_regs[0]),
search_cmp_ftr_reg);
}
static u64 arm64_ftr_set_value(struct arm64_ftr_bits *ftrp, s64 reg, s64 ftr_val)
{
u64 mask = arm64_ftr_mask(ftrp);
reg &= ~mask;
reg |= (ftr_val << ftrp->shift) & mask;
return reg;
}
static s64 arm64_ftr_safe_value(struct arm64_ftr_bits *ftrp, s64 new, s64 cur)
{
s64 ret = 0;
switch (ftrp->type) {
case FTR_EXACT:
ret = ftrp->safe_val;
break;
case FTR_LOWER_SAFE:
ret = new < cur ? new : cur;
break;
case FTR_HIGHER_SAFE:
ret = new > cur ? new : cur;
break;
default:
BUG();
}
return ret;
}
static int __init sort_cmp_ftr_regs(const void *a, const void *b)
{
return ((const struct arm64_ftr_reg *)a)->sys_id -
((const struct arm64_ftr_reg *)b)->sys_id;
}
static void __init swap_ftr_regs(void *a, void *b, int size)
{
struct arm64_ftr_reg tmp = *(struct arm64_ftr_reg *)a;
*(struct arm64_ftr_reg *)a = *(struct arm64_ftr_reg *)b;
*(struct arm64_ftr_reg *)b = tmp;
}
static void __init sort_ftr_regs(void)
{
/* Keep the array sorted so that we can do the binary search */
sort(arm64_ftr_regs,
ARRAY_SIZE(arm64_ftr_regs),
sizeof(arm64_ftr_regs[0]),
sort_cmp_ftr_regs,
swap_ftr_regs);
}
/*
* Initialise the CPU feature register from Boot CPU values.
* Also initiliases the strict_mask for the register.
*/
static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
{
u64 val = 0;
u64 strict_mask = ~0x0ULL;
struct arm64_ftr_bits *ftrp;
struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg);
BUG_ON(!reg);
for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
s64 ftr_new = arm64_ftr_value(ftrp, new);
val = arm64_ftr_set_value(ftrp, val, ftr_new);
if (!ftrp->strict)
strict_mask &= ~arm64_ftr_mask(ftrp);
}
reg->sys_val = val;
reg->strict_mask = strict_mask;
}
void __init init_cpu_features(struct cpuinfo_arm64 *info)
{
/* Before we start using the tables, make sure it is sorted */
sort_ftr_regs();
init_cpu_ftr_reg(SYS_CTR_EL0, info->reg_ctr);
init_cpu_ftr_reg(SYS_DCZID_EL0, info->reg_dczid);
init_cpu_ftr_reg(SYS_CNTFRQ_EL0, info->reg_cntfrq);
init_cpu_ftr_reg(SYS_ID_AA64DFR0_EL1, info->reg_id_aa64dfr0);
init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1);
init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0);
init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1);
init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
}
static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
{
struct arm64_ftr_bits *ftrp;
for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
s64 ftr_cur = arm64_ftr_value(ftrp, reg->sys_val);
s64 ftr_new = arm64_ftr_value(ftrp, new);
if (ftr_cur == ftr_new)
continue;
/* Find a safe value */
ftr_new = arm64_ftr_safe_value(ftrp, ftr_new, ftr_cur);
reg->sys_val = arm64_ftr_set_value(ftrp, reg->sys_val, ftr_new);
}
}
static int check_update_ftr_reg(u32 sys_id, int cpu, u64 val, u64 boot)
{
struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id);
BUG_ON(!regp);
update_cpu_ftr_reg(regp, val);
if ((boot & regp->strict_mask) == (val & regp->strict_mask))
return 0;
pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016llx, CPU%d: %#016llx\n",
regp->name, boot, cpu, val);
return 1;
}
/*
* Update system wide CPU feature registers with the values from a
* non-boot CPU. Also performs SANITY checks to make sure that there
* aren't any insane variations from that of the boot CPU.
*/
void update_cpu_features(int cpu,
struct cpuinfo_arm64 *info,
struct cpuinfo_arm64 *boot)
{
int taint = 0;
/*
* The kernel can handle differing I-cache policies, but otherwise
* caches should look identical. Userspace JITs will make use of
* *minLine.
*/
taint |= check_update_ftr_reg(SYS_CTR_EL0, cpu,
info->reg_ctr, boot->reg_ctr);
/*
* Userspace may perform DC ZVA instructions. Mismatched block sizes
* could result in too much or too little memory being zeroed if a
* process is preempted and migrated between CPUs.
*/
taint |= check_update_ftr_reg(SYS_DCZID_EL0, cpu,
info->reg_dczid, boot->reg_dczid);
/* If different, timekeeping will be broken (especially with KVM) */
taint |= check_update_ftr_reg(SYS_CNTFRQ_EL0, cpu,
info->reg_cntfrq, boot->reg_cntfrq);
/*
* The kernel uses self-hosted debug features and expects CPUs to
* support identical debug features. We presently need CTX_CMPs, WRPs,
* and BRPs to be identical.
* ID_AA64DFR1 is currently RES0.
*/
taint |= check_update_ftr_reg(SYS_ID_AA64DFR0_EL1, cpu,
info->reg_id_aa64dfr0, boot->reg_id_aa64dfr0);
taint |= check_update_ftr_reg(SYS_ID_AA64DFR1_EL1, cpu,
info->reg_id_aa64dfr1, boot->reg_id_aa64dfr1);
/*
* Even in big.LITTLE, processors should be identical instruction-set
* wise.
*/
taint |= check_update_ftr_reg(SYS_ID_AA64ISAR0_EL1, cpu,
info->reg_id_aa64isar0, boot->reg_id_aa64isar0);
taint |= check_update_ftr_reg(SYS_ID_AA64ISAR1_EL1, cpu,
info->reg_id_aa64isar1, boot->reg_id_aa64isar1);
/*
* Differing PARange support is fine as long as all peripherals and
* memory are mapped within the minimum PARange of all CPUs.
* Linux should not care about secure memory.
*/
taint |= check_update_ftr_reg(SYS_ID_AA64MMFR0_EL1, cpu,
info->reg_id_aa64mmfr0, boot->reg_id_aa64mmfr0);
taint |= check_update_ftr_reg(SYS_ID_AA64MMFR1_EL1, cpu,
info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1);
/*
* EL3 is not our concern.
* ID_AA64PFR1 is currently RES0.
*/
taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu,
info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0);
taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu,
info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1);
/*
* If we have AArch32, we care about 32-bit features for compat. These
* registers should be RES0 otherwise.
*/
taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu,
info->reg_id_dfr0, boot->reg_id_dfr0);
taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu,
info->reg_id_isar0, boot->reg_id_isar0);
taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu,
info->reg_id_isar1, boot->reg_id_isar1);
taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu,
info->reg_id_isar2, boot->reg_id_isar2);
taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu,
info->reg_id_isar3, boot->reg_id_isar3);
taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu,
info->reg_id_isar4, boot->reg_id_isar4);
taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu,
info->reg_id_isar5, boot->reg_id_isar5);
/*
* Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
* ACTLR formats could differ across CPUs and therefore would have to
* be trapped for virtualization anyway.
*/
taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu,
info->reg_id_mmfr0, boot->reg_id_mmfr0);
taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu,
info->reg_id_mmfr1, boot->reg_id_mmfr1);
taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu,
info->reg_id_mmfr2, boot->reg_id_mmfr2);
taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu,
info->reg_id_mmfr3, boot->reg_id_mmfr3);
taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu,
info->reg_id_pfr0, boot->reg_id_pfr0);
taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu,
info->reg_id_pfr1, boot->reg_id_pfr1);
taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu,
info->reg_mvfr0, boot->reg_mvfr0);
taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu,
info->reg_mvfr1, boot->reg_mvfr1);
taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu,
info->reg_mvfr2, boot->reg_mvfr2);
/*
* Mismatched CPU features are a recipe for disaster. Don't even
* pretend to support them.
*/
WARN_TAINT_ONCE(taint, TAINT_CPU_OUT_OF_SPEC,
"Unsupported CPU feature variation.\n");
}
u64 read_system_reg(u32 id)
{
struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id);
/* We shouldn't get a request for an unsupported register */
BUG_ON(!regp);
return regp->sys_val;
}
#include <linux/irqchip/arm-gic-v3.h>
......@@ -33,25 +588,20 @@ feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
return val >= entry->min_field_value;
}
#define __ID_FEAT_CHK(reg) \
static bool __maybe_unused \
has_##reg##_feature(const struct arm64_cpu_capabilities *entry) \
{ \
u64 val; \
\
val = read_cpuid(reg##_el1); \
return feature_matches(val, entry); \
}
static bool
has_cpuid_feature(const struct arm64_cpu_capabilities *entry)
{
u64 val;
__ID_FEAT_CHK(id_aa64pfr0);
__ID_FEAT_CHK(id_aa64mmfr1);
__ID_FEAT_CHK(id_aa64isar0);
val = read_system_reg(entry->sys_reg);
return feature_matches(val, entry);
}
static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry)
{
bool has_sre;
if (!has_id_aa64pfr0_feature(entry))
if (!has_cpuid_feature(entry))
return false;
has_sre = gic_enable_sre();
......@@ -67,15 +617,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.desc = "GIC system register CPU interface",
.capability = ARM64_HAS_SYSREG_GIC_CPUIF,
.matches = has_useable_gicv3_cpuif,
.field_pos = 24,
.sys_reg = SYS_ID_AA64PFR0_EL1,
.field_pos = ID_AA64PFR0_GIC_SHIFT,
.min_field_value = 1,
},
#ifdef CONFIG_ARM64_PAN
{
.desc = "Privileged Access Never",
.capability = ARM64_HAS_PAN,
.matches = has_id_aa64mmfr1_feature,
.field_pos = 20,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64MMFR1_EL1,
.field_pos = ID_AA64MMFR1_PAN_SHIFT,
.min_field_value = 1,
.enable = cpu_enable_pan,
},
......@@ -84,15 +636,101 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "LSE atomic instructions",
.capability = ARM64_HAS_LSE_ATOMICS,
.matches = has_id_aa64isar0_feature,
.field_pos = 20,
.matches = has_cpuid_feature,
.sys_reg = SYS_ID_AA64ISAR0_EL1,
.field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
.min_field_value = 2,
},
#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
{},
};
void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
#define HWCAP_CAP(reg, field, min_value, type, cap) \
{ \
.desc = #cap, \
.matches = has_cpuid_feature, \
.sys_reg = reg, \
.field_pos = field, \
.min_field_value = min_value, \
.hwcap_type = type, \
.hwcap = cap, \
}
static const struct arm64_cpu_capabilities arm64_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 2, CAP_HWCAP, HWCAP_PMULL),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 1, CAP_HWCAP, HWCAP_AES),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 1, CAP_HWCAP, HWCAP_SHA1),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 1, CAP_HWCAP, HWCAP_SHA2),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 1, CAP_HWCAP, HWCAP_CRC32),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 2, CAP_HWCAP, HWCAP_ATOMICS),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 0, CAP_HWCAP, HWCAP_FP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 0, CAP_HWCAP, HWCAP_ASIMD),
#ifdef CONFIG_COMPAT
HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
#endif
{},
};
static void cap_set_hwcap(const struct arm64_cpu_capabilities *cap)
{
switch (cap->hwcap_type) {
case CAP_HWCAP:
elf_hwcap |= cap->hwcap;
break;
#ifdef CONFIG_COMPAT
case CAP_COMPAT_HWCAP:
compat_elf_hwcap |= (u32)cap->hwcap;
break;
case CAP_COMPAT_HWCAP2:
compat_elf_hwcap2 |= (u32)cap->hwcap;
break;
#endif
default:
WARN_ON(1);
break;
}
}
/* Check if we have a particular HWCAP enabled */
static bool cpus_have_hwcap(const struct arm64_cpu_capabilities *cap)
{
bool rc;
switch (cap->hwcap_type) {
case CAP_HWCAP:
rc = (elf_hwcap & cap->hwcap) != 0;
break;
#ifdef CONFIG_COMPAT
case CAP_COMPAT_HWCAP:
rc = (compat_elf_hwcap & (u32)cap->hwcap) != 0;
break;
case CAP_COMPAT_HWCAP2:
rc = (compat_elf_hwcap2 & (u32)cap->hwcap) != 0;
break;
#endif
default:
WARN_ON(1);
rc = false;
}
return rc;
}
static void setup_cpu_hwcaps(void)
{
int i;
const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps;
for (i = 0; hwcaps[i].desc; i++)
if (hwcaps[i].matches(&hwcaps[i]))
cap_set_hwcap(&hwcaps[i]);
}
void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
const char *info)
{
int i;
......@@ -105,15 +743,178 @@ void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
pr_info("%s %s\n", info, caps[i].desc);
cpus_set_cap(caps[i].capability);
}
}
/*
* Run through the enabled capabilities and enable() it on all active
* CPUs
*/
static void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
{
int i;
for (i = 0; caps[i].desc; i++)
if (caps[i].enable && cpus_have_cap(caps[i].capability))
on_each_cpu(caps[i].enable, NULL, true);
}
#ifdef CONFIG_HOTPLUG_CPU
/*
* Flag to indicate if we have computed the system wide
* capabilities based on the boot time active CPUs. This
* will be used to determine if a new booting CPU should
* go through the verification process to make sure that it
* supports the system capabilities, without using a hotplug
* notifier.
*/
static bool sys_caps_initialised;
static inline void set_sys_caps_initialised(void)
{
sys_caps_initialised = true;
}
/*
* __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated.
*/
static u64 __raw_read_system_reg(u32 sys_id)
{
switch (sys_id) {
case SYS_ID_PFR0_EL1: return (u64)read_cpuid(ID_PFR0_EL1);
case SYS_ID_PFR1_EL1: return (u64)read_cpuid(ID_PFR1_EL1);
case SYS_ID_DFR0_EL1: return (u64)read_cpuid(ID_DFR0_EL1);
case SYS_ID_MMFR0_EL1: return (u64)read_cpuid(ID_MMFR0_EL1);
case SYS_ID_MMFR1_EL1: return (u64)read_cpuid(ID_MMFR1_EL1);
case SYS_ID_MMFR2_EL1: return (u64)read_cpuid(ID_MMFR2_EL1);
case SYS_ID_MMFR3_EL1: return (u64)read_cpuid(ID_MMFR3_EL1);
case SYS_ID_ISAR0_EL1: return (u64)read_cpuid(ID_ISAR0_EL1);
case SYS_ID_ISAR1_EL1: return (u64)read_cpuid(ID_ISAR1_EL1);
case SYS_ID_ISAR2_EL1: return (u64)read_cpuid(ID_ISAR2_EL1);
case SYS_ID_ISAR3_EL1: return (u64)read_cpuid(ID_ISAR3_EL1);
case SYS_ID_ISAR4_EL1: return (u64)read_cpuid(ID_ISAR4_EL1);
case SYS_ID_ISAR5_EL1: return (u64)read_cpuid(ID_ISAR4_EL1);
case SYS_MVFR0_EL1: return (u64)read_cpuid(MVFR0_EL1);
case SYS_MVFR1_EL1: return (u64)read_cpuid(MVFR1_EL1);
case SYS_MVFR2_EL1: return (u64)read_cpuid(MVFR2_EL1);
case SYS_ID_AA64PFR0_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1);
case SYS_ID_AA64PFR1_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1);
case SYS_ID_AA64DFR0_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1);
case SYS_ID_AA64DFR1_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1);
case SYS_ID_AA64MMFR0_EL1: return (u64)read_cpuid(ID_AA64MMFR0_EL1);
case SYS_ID_AA64MMFR1_EL1: return (u64)read_cpuid(ID_AA64MMFR1_EL1);
case SYS_ID_AA64ISAR0_EL1: return (u64)read_cpuid(ID_AA64ISAR0_EL1);
case SYS_ID_AA64ISAR1_EL1: return (u64)read_cpuid(ID_AA64ISAR1_EL1);
case SYS_CNTFRQ_EL0: return (u64)read_cpuid(CNTFRQ_EL0);
case SYS_CTR_EL0: return (u64)read_cpuid(CTR_EL0);
case SYS_DCZID_EL0: return (u64)read_cpuid(DCZID_EL0);
default:
BUG();
return 0;
}
}
/*
* Park the CPU which doesn't have the capability as advertised
* by the system.
*/
static void fail_incapable_cpu(char *cap_type,
const struct arm64_cpu_capabilities *cap)
{
int cpu = smp_processor_id();
pr_crit("CPU%d: missing %s : %s\n", cpu, cap_type, cap->desc);
/* Mark this CPU absent */
set_cpu_present(cpu, 0);
/* Check if we can park ourselves */
if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
cpu_ops[cpu]->cpu_die(cpu);
asm(
"1: wfe\n"
" wfi\n"
" b 1b");
}
/* second pass allows enable() to consider interacting capabilities */
/*
* Run through the enabled system capabilities and enable() it on this CPU.
* The capabilities were decided based on the available CPUs at the boot time.
* Any new CPU should match the system wide status of the capability. If the
* new CPU doesn't have a capability which the system now has enabled, we
* cannot do anything to fix it up and could cause unexpected failures. So
* we park the CPU.
*/
void verify_local_cpu_capabilities(void)
{
int i;
const struct arm64_cpu_capabilities *caps;
/*
* If we haven't computed the system capabilities, there is nothing
* to verify.
*/
if (!sys_caps_initialised)
return;
caps = arm64_features;
for (i = 0; caps[i].desc; i++) {
if (cpus_have_cap(caps[i].capability) && caps[i].enable)
caps[i].enable();
if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg)
continue;
/*
* If the new CPU misses an advertised feature, we cannot proceed
* further, park the cpu.
*/
if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
fail_incapable_cpu("arm64_features", &caps[i]);
if (caps[i].enable)
caps[i].enable(NULL);
}
for (i = 0, caps = arm64_hwcaps; caps[i].desc; i++) {
if (!cpus_have_hwcap(&caps[i]))
continue;
if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
fail_incapable_cpu("arm64_hwcaps", &caps[i]);
}
}
#else /* !CONFIG_HOTPLUG_CPU */
static inline void set_sys_caps_initialised(void)
{
}
#endif /* CONFIG_HOTPLUG_CPU */
static void setup_feature_capabilities(void)
{
update_cpu_capabilities(arm64_features, "detected feature:");
enable_cpu_capabilities(arm64_features);
}
void check_local_cpu_features(void)
void __init setup_cpu_features(void)
{
check_cpu_capabilities(arm64_features, "detected feature:");
u32 cwg;
int cls;
/* Set the CPU feature capabilies */
setup_feature_capabilities();
setup_cpu_hwcaps();
/* Advertise that we have computed the system capabilities */
set_sys_caps_initialised();
/*
* Check for sane CTR_EL0.CWG value.
*/
cwg = cache_type_cwg();
cls = cache_line_size();
if (!cwg)
pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n",
cls);
if (L1_CACHE_BYTES < cls)
pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
L1_CACHE_BYTES, cls);
}
......@@ -24,8 +24,11 @@
#include <linux/bug.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/personality.h>
#include <linux/preempt.h>
#include <linux/printk.h>
#include <linux/seq_file.h>
#include <linux/sched.h>
#include <linux/smp.h>
/*
......@@ -35,7 +38,6 @@
*/
DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data);
static struct cpuinfo_arm64 boot_cpu_data;
static bool mixed_endian_el0 = true;
static char *icache_policy_str[] = {
[ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN",
......@@ -46,157 +48,148 @@ static char *icache_policy_str[] = {
unsigned long __icache_flags;
static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
static const char *const hwcap_str[] = {
"fp",
"asimd",
"evtstrm",
"aes",
"pmull",
"sha1",
"sha2",
"crc32",
"atomics",
NULL
};
#ifdef CONFIG_COMPAT
static const char *const compat_hwcap_str[] = {
"swp",
"half",
"thumb",
"26bit",
"fastmult",
"fpa",
"vfp",
"edsp",
"java",
"iwmmxt",
"crunch",
"thumbee",
"neon",
"vfpv3",
"vfpv3d16",
"tls",
"vfpv4",
"idiva",
"idivt",
"vfpd32",
"lpae",
"evtstrm"
};
static const char *const compat_hwcap2_str[] = {
"aes",
"pmull",
"sha1",
"sha2",
"crc32",
NULL
};
#endif /* CONFIG_COMPAT */
static int c_show(struct seq_file *m, void *v)
{
unsigned int cpu = smp_processor_id();
u32 l1ip = CTR_L1IP(info->reg_ctr);
int i, j;
for_each_online_cpu(i) {
struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
u32 midr = cpuinfo->reg_midr;
if (l1ip != ICACHE_POLICY_PIPT) {
/*
* VIPT caches are non-aliasing if the VA always equals the PA
* in all bit positions that are covered by the index. This is
* the case if the size of a way (# of sets * line size) does
* not exceed PAGE_SIZE.
* glibc reads /proc/cpuinfo to determine the number of
* online processors, looking for lines beginning with
* "processor". Give glibc what it expects.
*/
u32 waysize = icache_get_numsets() * icache_get_linesize();
seq_printf(m, "processor\t: %d\n", i);
if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE)
set_bit(ICACHEF_ALIASING, &__icache_flags);
/*
* Dump out the common processor features in a single line.
* Userspace should read the hwcaps with getauxval(AT_HWCAP)
* rather than attempting to parse this, but there's a body of
* software which does already (at least for 32-bit).
*/
seq_puts(m, "Features\t:");
if (personality(current->personality) == PER_LINUX32) {
#ifdef CONFIG_COMPAT
for (j = 0; compat_hwcap_str[j]; j++)
if (compat_elf_hwcap & (1 << j))
seq_printf(m, " %s", compat_hwcap_str[j]);
for (j = 0; compat_hwcap2_str[j]; j++)
if (compat_elf_hwcap2 & (1 << j))
seq_printf(m, " %s", compat_hwcap2_str[j]);
#endif /* CONFIG_COMPAT */
} else {
for (j = 0; hwcap_str[j]; j++)
if (elf_hwcap & (1 << j))
seq_printf(m, " %s", hwcap_str[j]);
}
seq_puts(m, "\n");
seq_printf(m, "CPU implementer\t: 0x%02x\n",
MIDR_IMPLEMENTOR(midr));
seq_printf(m, "CPU architecture: 8\n");
seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
}
if (l1ip == ICACHE_POLICY_AIVIVT)
set_bit(ICACHEF_AIVIVT, &__icache_flags);
pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
}
bool cpu_supports_mixed_endian_el0(void)
{
return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
}
bool system_supports_mixed_endian_el0(void)
{
return mixed_endian_el0;
return 0;
}
static void update_mixed_endian_el0_support(struct cpuinfo_arm64 *info)
static void *c_start(struct seq_file *m, loff_t *pos)
{
mixed_endian_el0 &= id_aa64mmfr0_mixed_endian_el0(info->reg_id_aa64mmfr0);
return *pos < 1 ? (void *)1 : NULL;
}
static void update_cpu_features(struct cpuinfo_arm64 *info)
static void *c_next(struct seq_file *m, void *v, loff_t *pos)
{
update_mixed_endian_el0_support(info);
++*pos;
return NULL;
}
static int check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu)
static void c_stop(struct seq_file *m, void *v)
{
if ((boot & mask) == (cur & mask))
return 0;
pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016lx, CPU%d: %#016lx\n",
name, (unsigned long)boot, cpu, (unsigned long)cur);
return 1;
}
#define CHECK_MASK(field, mask, boot, cur, cpu) \
check_reg_mask(#field, mask, (boot)->reg_ ## field, (cur)->reg_ ## field, cpu)
#define CHECK(field, boot, cur, cpu) \
CHECK_MASK(field, ~0ULL, boot, cur, cpu)
const struct seq_operations cpuinfo_op = {
.start = c_start,
.next = c_next,
.stop = c_stop,
.show = c_show
};
/*
* Verify that CPUs don't have unexpected differences that will cause problems.
*/
static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur)
static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
{
unsigned int cpu = smp_processor_id();
struct cpuinfo_arm64 *boot = &boot_cpu_data;
unsigned int diff = 0;
/*
* The kernel can handle differing I-cache policies, but otherwise
* caches should look identical. Userspace JITs will make use of
* *minLine.
*/
diff |= CHECK_MASK(ctr, 0xffff3fff, boot, cur, cpu);
/*
* Userspace may perform DC ZVA instructions. Mismatched block sizes
* could result in too much or too little memory being zeroed if a
* process is preempted and migrated between CPUs.
*/
diff |= CHECK(dczid, boot, cur, cpu);
/* If different, timekeeping will be broken (especially with KVM) */
diff |= CHECK(cntfrq, boot, cur, cpu);
/*
* The kernel uses self-hosted debug features and expects CPUs to
* support identical debug features. We presently need CTX_CMPs, WRPs,
* and BRPs to be identical.
* ID_AA64DFR1 is currently RES0.
*/
diff |= CHECK(id_aa64dfr0, boot, cur, cpu);
diff |= CHECK(id_aa64dfr1, boot, cur, cpu);
/*
* Even in big.LITTLE, processors should be identical instruction-set
* wise.
*/
diff |= CHECK(id_aa64isar0, boot, cur, cpu);
diff |= CHECK(id_aa64isar1, boot, cur, cpu);
/*
* Differing PARange support is fine as long as all peripherals and
* memory are mapped within the minimum PARange of all CPUs.
* Linux should not care about secure memory.
* ID_AA64MMFR1 is currently RES0.
*/
diff |= CHECK_MASK(id_aa64mmfr0, 0xffffffffffff0ff0, boot, cur, cpu);
diff |= CHECK(id_aa64mmfr1, boot, cur, cpu);
/*
* EL3 is not our concern.
* ID_AA64PFR1 is currently RES0.
*/
diff |= CHECK_MASK(id_aa64pfr0, 0xffffffffffff0fff, boot, cur, cpu);
diff |= CHECK(id_aa64pfr1, boot, cur, cpu);
u32 l1ip = CTR_L1IP(info->reg_ctr);
/*
* If we have AArch32, we care about 32-bit features for compat. These
* registers should be RES0 otherwise.
*/
diff |= CHECK(id_dfr0, boot, cur, cpu);
diff |= CHECK(id_isar0, boot, cur, cpu);
diff |= CHECK(id_isar1, boot, cur, cpu);
diff |= CHECK(id_isar2, boot, cur, cpu);
diff |= CHECK(id_isar3, boot, cur, cpu);
diff |= CHECK(id_isar4, boot, cur, cpu);
diff |= CHECK(id_isar5, boot, cur, cpu);
/*
* Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
* ACTLR formats could differ across CPUs and therefore would have to
* be trapped for virtualization anyway.
*/
diff |= CHECK_MASK(id_mmfr0, 0xff0fffff, boot, cur, cpu);
diff |= CHECK(id_mmfr1, boot, cur, cpu);
diff |= CHECK(id_mmfr2, boot, cur, cpu);
diff |= CHECK(id_mmfr3, boot, cur, cpu);
diff |= CHECK(id_pfr0, boot, cur, cpu);
diff |= CHECK(id_pfr1, boot, cur, cpu);
if (l1ip != ICACHE_POLICY_PIPT) {
/*
* VIPT caches are non-aliasing if the VA always equals the PA
* in all bit positions that are covered by the index. This is
* the case if the size of a way (# of sets * line size) does
* not exceed PAGE_SIZE.
*/
u32 waysize = icache_get_numsets() * icache_get_linesize();
diff |= CHECK(mvfr0, boot, cur, cpu);
diff |= CHECK(mvfr1, boot, cur, cpu);
diff |= CHECK(mvfr2, boot, cur, cpu);
if (l1ip != ICACHE_POLICY_VIPT || waysize > PAGE_SIZE)
set_bit(ICACHEF_ALIASING, &__icache_flags);
}
if (l1ip == ICACHE_POLICY_AIVIVT)
set_bit(ICACHEF_AIVIVT, &__icache_flags);
/*
* Mismatched CPU features are a recipe for disaster. Don't even
* pretend to support them.
*/
WARN_TAINT_ONCE(diff, TAINT_CPU_OUT_OF_SPEC,
"Unsupported CPU feature variation.\n");
pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
}
static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
......@@ -236,15 +229,13 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
cpuinfo_detect_icache_policy(info);
check_local_cpu_errata();
check_local_cpu_features();
update_cpu_features(info);
}
void cpuinfo_store_cpu(void)
{
struct cpuinfo_arm64 *info = this_cpu_ptr(&cpu_data);
__cpuinfo_store_cpu(info);
cpuinfo_sanity_check(info);
update_cpu_features(smp_processor_id(), info, &boot_cpu_data);
}
void __init cpuinfo_store_boot_cpu(void)
......@@ -253,4 +244,5 @@ void __init cpuinfo_store_boot_cpu(void)
__cpuinfo_store_cpu(info);
boot_cpu_data = *info;
init_cpu_features(&boot_cpu_data);
}
......@@ -26,14 +26,16 @@
#include <linux/stat.h>
#include <linux/uaccess.h>
#include <asm/debug-monitors.h>
#include <asm/cpufeature.h>
#include <asm/cputype.h>
#include <asm/debug-monitors.h>
#include <asm/system_misc.h>
/* Determine debug architecture. */
u8 debug_monitors_arch(void)
{
return read_cpuid(ID_AA64DFR0_EL1) & 0xf;
return cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
ID_AA64DFR0_DEBUGVER_SHIFT);
}
/*
......
......@@ -29,7 +29,7 @@
* we want to be. The kernel image wants to be placed at TEXT_OFFSET
* from start of RAM.
*/
ENTRY(efi_stub_entry)
ENTRY(entry)
/*
* Create a stack frame to save FP/LR with extra space
* for image_addr variable passed to efi_entry().
......@@ -86,8 +86,8 @@ ENTRY(efi_stub_entry)
* entries for the VA range of the current image, so no maintenance is
* necessary.
*/
adr x0, efi_stub_entry
adr x1, efi_stub_entry_end
adr x0, entry
adr x1, entry_end
sub x1, x1, x0
bl __flush_dcache_area
......@@ -120,5 +120,5 @@ efi_load_fail:
ldp x29, x30, [sp], #32
ret
efi_stub_entry_end:
ENDPROC(efi_stub_entry)
entry_end:
ENDPROC(entry)
......@@ -48,7 +48,6 @@ static struct mm_struct efi_mm = {
.mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem),
.page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
.mmlist = LIST_HEAD_INIT(efi_mm.mmlist),
INIT_MM_CONTEXT(efi_mm)
};
static int __init is_normal_ram(efi_memory_desc_t *md)
......@@ -335,9 +334,9 @@ static void efi_set_pgd(struct mm_struct *mm)
else
cpu_switch_mm(mm->pgd, mm);
flush_tlb_all();
local_flush_tlb_all();
if (icache_is_aivivt())
__flush_icache_all();
__local_flush_icache_all();
}
void efi_virtmap_load(void)
......
......@@ -430,6 +430,8 @@ el0_sync_compat:
b.eq el0_fpsimd_acc
cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception
b.eq el0_fpsimd_exc
cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
b.eq el0_sp_pc
cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0
b.eq el0_undef
cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap
......
......@@ -332,21 +332,15 @@ static inline void fpsimd_hotplug_init(void) { }
*/
static int __init fpsimd_init(void)
{
u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
if (pfr & (0xf << 16)) {
if (elf_hwcap & HWCAP_FP) {
fpsimd_pm_init();
fpsimd_hotplug_init();
} else {
pr_notice("Floating-point is not implemented\n");
return 0;
}
elf_hwcap |= HWCAP_FP;
if (pfr & (0xf << 20))
if (!(elf_hwcap & HWCAP_ASIMD))
pr_notice("Advanced SIMD is not implemented\n");
else
elf_hwcap |= HWCAP_ASIMD;
fpsimd_pm_init();
fpsimd_hotplug_init();
return 0;
}
......
......@@ -29,11 +29,13 @@
#include <asm/asm-offsets.h>
#include <asm/cache.h>
#include <asm/cputype.h>
#include <asm/kernel-pgtable.h>
#include <asm/memory.h>
#include <asm/thread_info.h>
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
#include <asm/page.h>
#include <asm/sysreg.h>
#include <asm/thread_info.h>
#include <asm/virt.h>
#define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET)
......@@ -46,31 +48,9 @@
#error TEXT_OFFSET must be less than 2MB
#endif
#ifdef CONFIG_ARM64_64K_PAGES
#define BLOCK_SHIFT PAGE_SHIFT
#define BLOCK_SIZE PAGE_SIZE
#define TABLE_SHIFT PMD_SHIFT
#else
#define BLOCK_SHIFT SECTION_SHIFT
#define BLOCK_SIZE SECTION_SIZE
#define TABLE_SHIFT PUD_SHIFT
#endif
#define KERNEL_START _text
#define KERNEL_END _end
/*
* Initial memory map attributes.
*/
#define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF | PTE_SHARED
#define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S
#ifdef CONFIG_ARM64_64K_PAGES
#define MM_MMUFLAGS PTE_ATTRINDX(MT_NORMAL) | PTE_FLAGS
#else
#define MM_MMUFLAGS PMD_ATTRINDX(MT_NORMAL) | PMD_FLAGS
#endif
/*
* Kernel startup entry point.
* ---------------------------
......@@ -120,8 +100,8 @@ efi_head:
#endif
#ifdef CONFIG_EFI
.globl stext_offset
.set stext_offset, stext - efi_head
.globl __efistub_stext_offset
.set __efistub_stext_offset, stext - efi_head
.align 3
pe_header:
.ascii "PE"
......@@ -144,8 +124,8 @@ optional_header:
.long _end - stext // SizeOfCode
.long 0 // SizeOfInitializedData
.long 0 // SizeOfUninitializedData
.long efi_stub_entry - efi_head // AddressOfEntryPoint
.long stext_offset // BaseOfCode
.long __efistub_entry - efi_head // AddressOfEntryPoint
.long __efistub_stext_offset // BaseOfCode
extra_header_fields:
.quad 0 // ImageBase
......@@ -162,7 +142,7 @@ extra_header_fields:
.long _end - efi_head // SizeOfImage
// Everything before the kernel image is considered part of the header
.long stext_offset // SizeOfHeaders
.long __efistub_stext_offset // SizeOfHeaders
.long 0 // CheckSum
.short 0xa // Subsystem (EFI application)
.short 0 // DllCharacteristics
......@@ -207,9 +187,9 @@ section_table:
.byte 0
.byte 0 // end of 0 padding of section name
.long _end - stext // VirtualSize
.long stext_offset // VirtualAddress
.long __efistub_stext_offset // VirtualAddress
.long _edata - stext // SizeOfRawData
.long stext_offset // PointerToRawData
.long __efistub_stext_offset // PointerToRawData
.long 0 // PointerToRelocations (0 for executables)
.long 0 // PointerToLineNumbers (0 for executables)
......@@ -292,8 +272,11 @@ ENDPROC(preserve_boot_args)
*/
.macro create_pgd_entry, tbl, virt, tmp1, tmp2
create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
#if SWAPPER_PGTABLE_LEVELS == 3
create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
#if SWAPPER_PGTABLE_LEVELS > 3
create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2
#endif
#if SWAPPER_PGTABLE_LEVELS > 2
create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
#endif
.endm
......@@ -305,15 +288,15 @@ ENDPROC(preserve_boot_args)
* Corrupts: phys, start, end, pstate
*/
.macro create_block_map, tbl, flags, phys, start, end
lsr \phys, \phys, #BLOCK_SHIFT
lsr \start, \start, #BLOCK_SHIFT
lsr \phys, \phys, #SWAPPER_BLOCK_SHIFT
lsr \start, \start, #SWAPPER_BLOCK_SHIFT
and \start, \start, #PTRS_PER_PTE - 1 // table index
orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry
lsr \end, \end, #BLOCK_SHIFT
orr \phys, \flags, \phys, lsl #SWAPPER_BLOCK_SHIFT // table entry
lsr \end, \end, #SWAPPER_BLOCK_SHIFT
and \end, \end, #PTRS_PER_PTE - 1 // table end index
9999: str \phys, [\tbl, \start, lsl #3] // store the entry
add \start, \start, #1 // next entry
add \phys, \phys, #BLOCK_SIZE // next block
add \phys, \phys, #SWAPPER_BLOCK_SIZE // next block
cmp \start, \end
b.ls 9999b
.endm
......@@ -350,7 +333,7 @@ __create_page_tables:
cmp x0, x6
b.lo 1b
ldr x7, =MM_MMUFLAGS
ldr x7, =SWAPPER_MM_MMUFLAGS
/*
* Create the identity mapping.
......@@ -444,6 +427,9 @@ __mmap_switched:
str_l x21, __fdt_pointer, x5 // Save FDT pointer
str_l x24, memstart_addr, x6 // Save PHYS_OFFSET
mov x29, #0
#ifdef CONFIG_KASAN
bl kasan_early_init
#endif
b start_kernel
ENDPROC(__mmap_switched)
......@@ -630,10 +616,17 @@ ENDPROC(__secondary_switched)
* x0 = SCTLR_EL1 value for turning on the MMU.
* x27 = *virtual* address to jump to upon completion
*
* other registers depend on the function called upon completion
* Other registers depend on the function called upon completion.
*
* Checks if the selected granule size is supported by the CPU.
* If it isn't, park the CPU
*/
.section ".idmap.text", "ax"
__enable_mmu:
mrs x1, ID_AA64MMFR0_EL1
ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
b.ne __no_granule_support
ldr x5, =vectors
msr vbar_el1, x5
msr ttbr0_el1, x25 // load TTBR0
......@@ -651,3 +644,8 @@ __enable_mmu:
isb
br x27
ENDPROC(__enable_mmu)
__no_granule_support:
wfe
b __no_granule_support
ENDPROC(__no_granule_support)
......@@ -28,6 +28,7 @@
#include <linux/ptrace.h>
#include <linux/smp.h>
#include <asm/compat.h>
#include <asm/current.h>
#include <asm/debug-monitors.h>
#include <asm/hw_breakpoint.h>
......@@ -163,6 +164,20 @@ enum hw_breakpoint_ops {
HW_BREAKPOINT_RESTORE
};
static int is_compat_bp(struct perf_event *bp)
{
struct task_struct *tsk = bp->hw.target;
/*
* tsk can be NULL for per-cpu (non-ptrace) breakpoints.
* In this case, use the native interface, since we don't have
* the notion of a "compat CPU" and could end up relying on
* deprecated behaviour if we use unaligned watchpoints in
* AArch64 state.
*/
return tsk && is_compat_thread(task_thread_info(tsk));
}
/**
* hw_breakpoint_slot_setup - Find and setup a perf slot according to
* operations
......@@ -420,7 +435,7 @@ static int arch_build_bp_info(struct perf_event *bp)
* Watchpoints can be of length 1, 2, 4 or 8 bytes.
*/
if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
if (is_compat_task()) {
if (is_compat_bp(bp)) {
if (info->ctrl.len != ARM_BREAKPOINT_LEN_2 &&
info->ctrl.len != ARM_BREAKPOINT_LEN_4)
return -EINVAL;
......@@ -477,7 +492,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
* AArch32 tasks expect some simple alignment fixups, so emulate
* that here.
*/
if (is_compat_task()) {
if (is_compat_bp(bp)) {
if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
alignment_mask = 0x7;
else
......
......@@ -47,7 +47,10 @@
#define __HEAD_FLAG_BE 0
#endif
#define __HEAD_FLAGS (__HEAD_FLAG_BE << 0)
#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2)
#define __HEAD_FLAGS ((__HEAD_FLAG_BE << 0) | \
(__HEAD_FLAG_PAGE_SIZE << 1))
/*
* These will output as part of the Image header, which should be little-endian
......@@ -59,4 +62,37 @@
_kernel_offset_le = DATA_LE64(TEXT_OFFSET); \
_kernel_flags_le = DATA_LE64(__HEAD_FLAGS);
#ifdef CONFIG_EFI
/*
* The EFI stub has its own symbol namespace prefixed by __efistub_, to
* isolate it from the kernel proper. The following symbols are legally
* accessed by the stub, so provide some aliases to make them accessible.
* Only include data symbols here, or text symbols of functions that are
* guaranteed to be safe when executed at another offset than they were
* linked at. The routines below are all implemented in assembler in a
* position independent manner
*/
__efistub_memcmp = __pi_memcmp;
__efistub_memchr = __pi_memchr;
__efistub_memcpy = __pi_memcpy;
__efistub_memmove = __pi_memmove;
__efistub_memset = __pi_memset;
__efistub_strlen = __pi_strlen;
__efistub_strcmp = __pi_strcmp;
__efistub_strncmp = __pi_strncmp;
__efistub___flush_dcache_area = __pi___flush_dcache_area;
#ifdef CONFIG_KASAN
__efistub___memcpy = __pi_memcpy;
__efistub___memmove = __pi_memmove;
__efistub___memset = __pi_memset;
#endif
__efistub__text = _text;
__efistub__end = _end;
__efistub__edata = _edata;
#endif
#endif /* __ASM_IMAGE_H */
......@@ -27,7 +27,6 @@
#include <linux/init.h>
#include <linux/irqchip.h>
#include <linux/seq_file.h>
#include <linux/ratelimit.h>
unsigned long irq_err_count;
......@@ -54,64 +53,3 @@ void __init init_IRQ(void)
if (!handle_arch_irq)
panic("No interrupt controller found.");
}
#ifdef CONFIG_HOTPLUG_CPU
static bool migrate_one_irq(struct irq_desc *desc)
{
struct irq_data *d = irq_desc_get_irq_data(desc);
const struct cpumask *affinity = irq_data_get_affinity_mask(d);
struct irq_chip *c;
bool ret = false;
/*
* If this is a per-CPU interrupt, or the affinity does not
* include this CPU, then we have nothing to do.
*/
if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity))
return false;
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
affinity = cpu_online_mask;
ret = true;
}
c = irq_data_get_irq_chip(d);
if (!c->irq_set_affinity)
pr_debug("IRQ%u: unable to set affinity\n", d->irq);
else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret)
cpumask_copy(irq_data_get_affinity_mask(d), affinity);
return ret;
}
/*
* The current CPU has been marked offline. Migrate IRQs off this CPU.
* If the affinity settings do not allow other CPUs, force them onto any
* available CPU.
*
* Note: we must iterate over all IRQs, whether they have an attached
* action structure or not, as we need to get chained interrupts too.
*/
void migrate_irqs(void)
{
unsigned int i;
struct irq_desc *desc;
unsigned long flags;
local_irq_save(flags);
for_each_irq_desc(i, desc) {
bool affinity_broken;
raw_spin_lock(&desc->lock);
affinity_broken = migrate_one_irq(desc);
raw_spin_unlock(&desc->lock);
if (affinity_broken)
pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n",
i, smp_processor_id());
}
local_irq_restore(flags);
}
#endif /* CONFIG_HOTPLUG_CPU */
......@@ -21,6 +21,7 @@
#include <linux/bitops.h>
#include <linux/elf.h>
#include <linux/gfp.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/moduleloader.h>
......@@ -34,9 +35,18 @@
void *module_alloc(unsigned long size)
{
return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
NUMA_NO_NODE, __builtin_return_address(0));
void *p;
p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
NUMA_NO_NODE, __builtin_return_address(0));
if (p && (kasan_module_alloc(p, size) < 0)) {
vfree(p);
return NULL;
}
return p;
}
enum aarch64_reloc_op {
......
......@@ -18,651 +18,12 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#define pr_fmt(fmt) "hw perfevents: " fmt
#include <linux/bitmap.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/of_device.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
#include <asm/cputype.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
#include <asm/pmu.h>
/*
* ARMv8 supports a maximum of 32 events.
* The cycle counter is included in this total.
*/
#define ARMPMU_MAX_HWEVENTS 32
static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
/* Set at runtime when we know what CPU type we are. */
static struct arm_pmu *cpu_pmu;
int
armpmu_get_max_events(void)
{
int max_events = 0;
if (cpu_pmu != NULL)
max_events = cpu_pmu->num_events;
return max_events;
}
EXPORT_SYMBOL_GPL(armpmu_get_max_events);
int perf_num_counters(void)
{
return armpmu_get_max_events();
}
EXPORT_SYMBOL_GPL(perf_num_counters);
#define HW_OP_UNSUPPORTED 0xFFFF
#define C(_x) \
PERF_COUNT_HW_CACHE_##_x
#define CACHE_OP_UNSUPPORTED 0xFFFF
#define PERF_MAP_ALL_UNSUPPORTED \
[0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED
#define PERF_CACHE_MAP_ALL_UNSUPPORTED \
[0 ... C(MAX) - 1] = { \
[0 ... C(OP_MAX) - 1] = { \
[0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \
}, \
}
static int
armpmu_map_cache_event(const unsigned (*cache_map)
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX],
u64 config)
{
unsigned int cache_type, cache_op, cache_result, ret;
cache_type = (config >> 0) & 0xff;
if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
return -EINVAL;
cache_op = (config >> 8) & 0xff;
if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
return -EINVAL;
cache_result = (config >> 16) & 0xff;
if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
return -EINVAL;
ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
if (ret == CACHE_OP_UNSUPPORTED)
return -ENOENT;
return ret;
}
static int
armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
{
int mapping;
if (config >= PERF_COUNT_HW_MAX)
return -EINVAL;
mapping = (*event_map)[config];
return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
}
static int
armpmu_map_raw_event(u32 raw_event_mask, u64 config)
{
return (int)(config & raw_event_mask);
}
static int map_cpu_event(struct perf_event *event,
const unsigned (*event_map)[PERF_COUNT_HW_MAX],
const unsigned (*cache_map)
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX],
u32 raw_event_mask)
{
u64 config = event->attr.config;
switch (event->attr.type) {
case PERF_TYPE_HARDWARE:
return armpmu_map_event(event_map, config);
case PERF_TYPE_HW_CACHE:
return armpmu_map_cache_event(cache_map, config);
case PERF_TYPE_RAW:
return armpmu_map_raw_event(raw_event_mask, config);
}
return -ENOENT;
}
int
armpmu_event_set_period(struct perf_event *event,
struct hw_perf_event *hwc,
int idx)
{
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
s64 left = local64_read(&hwc->period_left);
s64 period = hwc->sample_period;
int ret = 0;
if (unlikely(left <= -period)) {
left = period;
local64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
if (unlikely(left <= 0)) {
left += period;
local64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
/*
* Limit the maximum period to prevent the counter value
* from overtaking the one we are about to program. In
* effect we are reducing max_period to account for
* interrupt latency (and we are being very conservative).
*/
if (left > (armpmu->max_period >> 1))
left = armpmu->max_period >> 1;
local64_set(&hwc->prev_count, (u64)-left);
armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
perf_event_update_userpage(event);
return ret;
}
u64
armpmu_event_update(struct perf_event *event,
struct hw_perf_event *hwc,
int idx)
{
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
u64 delta, prev_raw_count, new_raw_count;
again:
prev_raw_count = local64_read(&hwc->prev_count);
new_raw_count = armpmu->read_counter(idx);
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
goto again;
delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
local64_add(delta, &event->count);
local64_sub(delta, &hwc->period_left);
return new_raw_count;
}
static void
armpmu_read(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
/* Don't read disabled counters! */
if (hwc->idx < 0)
return;
armpmu_event_update(event, hwc, hwc->idx);
}
static void
armpmu_stop(struct perf_event *event, int flags)
{
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
/*
* ARM pmu always has to update the counter, so ignore
* PERF_EF_UPDATE, see comments in armpmu_start().
*/
if (!(hwc->state & PERF_HES_STOPPED)) {
armpmu->disable(hwc, hwc->idx);
barrier(); /* why? */
armpmu_event_update(event, hwc, hwc->idx);
hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
}
}
static void
armpmu_start(struct perf_event *event, int flags)
{
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
/*
* ARM pmu always has to reprogram the period, so ignore
* PERF_EF_RELOAD, see the comment below.
*/
if (flags & PERF_EF_RELOAD)
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
hwc->state = 0;
/*
* Set the period again. Some counters can't be stopped, so when we
* were stopped we simply disabled the IRQ source and the counter
* may have been left counting. If we don't do this step then we may
* get an interrupt too soon or *way* too late if the overflow has
* happened since disabling.
*/
armpmu_event_set_period(event, hwc, hwc->idx);
armpmu->enable(hwc, hwc->idx);
}
static void
armpmu_del(struct perf_event *event, int flags)
{
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct pmu_hw_events *hw_events = armpmu->get_hw_events();
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
WARN_ON(idx < 0);
armpmu_stop(event, PERF_EF_UPDATE);
hw_events->events[idx] = NULL;
clear_bit(idx, hw_events->used_mask);
perf_event_update_userpage(event);
}
static int
armpmu_add(struct perf_event *event, int flags)
{
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct pmu_hw_events *hw_events = armpmu->get_hw_events();
struct hw_perf_event *hwc = &event->hw;
int idx;
int err = 0;
perf_pmu_disable(event->pmu);
/* If we don't have a space for the counter then finish early. */
idx = armpmu->get_event_idx(hw_events, hwc);
if (idx < 0) {
err = idx;
goto out;
}
/*
* If there is an event in the counter we are going to use then make
* sure it is disabled.
*/
event->hw.idx = idx;
armpmu->disable(hwc, idx);
hw_events->events[idx] = event;
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
if (flags & PERF_EF_START)
armpmu_start(event, PERF_EF_RELOAD);
/* Propagate our changes to the userspace mapping. */
perf_event_update_userpage(event);
out:
perf_pmu_enable(event->pmu);
return err;
}
static int
validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
struct perf_event *event)
{
struct arm_pmu *armpmu;
struct hw_perf_event fake_event = event->hw;
struct pmu *leader_pmu = event->group_leader->pmu;
if (is_software_event(event))
return 1;
/*
* Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
* core perf code won't check that the pmu->ctx == leader->ctx
* until after pmu->event_init(event).
*/
if (event->pmu != pmu)
return 0;
if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF)
return 1;
if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
return 1;
armpmu = to_arm_pmu(event->pmu);
return armpmu->get_event_idx(hw_events, &fake_event) >= 0;
}
static int
validate_group(struct perf_event *event)
{
struct perf_event *sibling, *leader = event->group_leader;
struct pmu_hw_events fake_pmu;
DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS);
/*
* Initialise the fake PMU. We only need to populate the
* used_mask for the purposes of validation.
*/
memset(fake_used_mask, 0, sizeof(fake_used_mask));
fake_pmu.used_mask = fake_used_mask;
if (!validate_event(event->pmu, &fake_pmu, leader))
return -EINVAL;
list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
if (!validate_event(event->pmu, &fake_pmu, sibling))
return -EINVAL;
}
if (!validate_event(event->pmu, &fake_pmu, event))
return -EINVAL;
return 0;
}
static void
armpmu_disable_percpu_irq(void *data)
{
unsigned int irq = *(unsigned int *)data;
disable_percpu_irq(irq);
}
static void
armpmu_release_hardware(struct arm_pmu *armpmu)
{
int irq;
unsigned int i, irqs;
struct platform_device *pmu_device = armpmu->plat_device;
irqs = min(pmu_device->num_resources, num_possible_cpus());
if (!irqs)
return;
irq = platform_get_irq(pmu_device, 0);
if (irq <= 0)
return;
if (irq_is_percpu(irq)) {
on_each_cpu(armpmu_disable_percpu_irq, &irq, 1);
free_percpu_irq(irq, &cpu_hw_events);
} else {
for (i = 0; i < irqs; ++i) {
int cpu = i;
if (armpmu->irq_affinity)
cpu = armpmu->irq_affinity[i];
if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs))
continue;
irq = platform_get_irq(pmu_device, i);
if (irq > 0)
free_irq(irq, armpmu);
}
}
}
static void
armpmu_enable_percpu_irq(void *data)
{
unsigned int irq = *(unsigned int *)data;
enable_percpu_irq(irq, IRQ_TYPE_NONE);
}
static int
armpmu_reserve_hardware(struct arm_pmu *armpmu)
{
int err, irq;
unsigned int i, irqs;
struct platform_device *pmu_device = armpmu->plat_device;
if (!pmu_device)
return -ENODEV;
irqs = min(pmu_device->num_resources, num_possible_cpus());
if (!irqs) {
pr_err("no irqs for PMUs defined\n");
return -ENODEV;
}
irq = platform_get_irq(pmu_device, 0);
if (irq <= 0) {
pr_err("failed to get valid irq for PMU device\n");
return -ENODEV;
}
if (irq_is_percpu(irq)) {
err = request_percpu_irq(irq, armpmu->handle_irq,
"arm-pmu", &cpu_hw_events);
if (err) {
pr_err("unable to request percpu IRQ%d for ARM PMU counters\n",
irq);
armpmu_release_hardware(armpmu);
return err;
}
on_each_cpu(armpmu_enable_percpu_irq, &irq, 1);
} else {
for (i = 0; i < irqs; ++i) {
int cpu = i;
err = 0;
irq = platform_get_irq(pmu_device, i);
if (irq <= 0)
continue;
if (armpmu->irq_affinity)
cpu = armpmu->irq_affinity[i];
/*
* If we have a single PMU interrupt that we can't shift,
* assume that we're running on a uniprocessor machine and
* continue. Otherwise, continue without this interrupt.
*/
if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
irq, cpu);
continue;
}
err = request_irq(irq, armpmu->handle_irq,
IRQF_NOBALANCING | IRQF_NO_THREAD,
"arm-pmu", armpmu);
if (err) {
pr_err("unable to request IRQ%d for ARM PMU counters\n",
irq);
armpmu_release_hardware(armpmu);
return err;
}
cpumask_set_cpu(cpu, &armpmu->active_irqs);
}
}
return 0;
}
static void
hw_perf_event_destroy(struct perf_event *event)
{
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
atomic_t *active_events = &armpmu->active_events;
struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
armpmu_release_hardware(armpmu);
mutex_unlock(pmu_reserve_mutex);
}
}
static int
event_requires_mode_exclusion(struct perf_event_attr *attr)
{
return attr->exclude_idle || attr->exclude_user ||
attr->exclude_kernel || attr->exclude_hv;
}
static int
__hw_perf_event_init(struct perf_event *event)
{
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
int mapping, err;
mapping = armpmu->map_event(event);
if (mapping < 0) {
pr_debug("event %x:%llx not supported\n", event->attr.type,
event->attr.config);
return mapping;
}
/*
* We don't assign an index until we actually place the event onto
* hardware. Use -1 to signify that we haven't decided where to put it
* yet. For SMP systems, each core has it's own PMU so we can't do any
* clever allocation or constraints checking at this point.
*/
hwc->idx = -1;
hwc->config_base = 0;
hwc->config = 0;
hwc->event_base = 0;
/*
* Check whether we need to exclude the counter from certain modes.
*/
if ((!armpmu->set_event_filter ||
armpmu->set_event_filter(hwc, &event->attr)) &&
event_requires_mode_exclusion(&event->attr)) {
pr_debug("ARM performance counters do not support mode exclusion\n");
return -EPERM;
}
/*
* Store the event encoding into the config_base field.
*/
hwc->config_base |= (unsigned long)mapping;
if (!hwc->sample_period) {
/*
* For non-sampling runs, limit the sample_period to half
* of the counter width. That way, the new counter value
* is far less likely to overtake the previous one unless
* you have some serious IRQ latency issues.
*/
hwc->sample_period = armpmu->max_period >> 1;
hwc->last_period = hwc->sample_period;
local64_set(&hwc->period_left, hwc->sample_period);
}
err = 0;
if (event->group_leader != event) {
err = validate_group(event);
if (err)
return -EINVAL;
}
return err;
}
static int armpmu_event_init(struct perf_event *event)
{
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
int err = 0;
atomic_t *active_events = &armpmu->active_events;
if (armpmu->map_event(event) == -ENOENT)
return -ENOENT;
event->destroy = hw_perf_event_destroy;
if (!atomic_inc_not_zero(active_events)) {
mutex_lock(&armpmu->reserve_mutex);
if (atomic_read(active_events) == 0)
err = armpmu_reserve_hardware(armpmu);
if (!err)
atomic_inc(active_events);
mutex_unlock(&armpmu->reserve_mutex);
}
if (err)
return err;
err = __hw_perf_event_init(event);
if (err)
hw_perf_event_destroy(event);
return err;
}
static void armpmu_enable(struct pmu *pmu)
{
struct arm_pmu *armpmu = to_arm_pmu(pmu);
struct pmu_hw_events *hw_events = armpmu->get_hw_events();
int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
if (enabled)
armpmu->start();
}
static void armpmu_disable(struct pmu *pmu)
{
struct arm_pmu *armpmu = to_arm_pmu(pmu);
armpmu->stop();
}
static void __init armpmu_init(struct arm_pmu *armpmu)
{
atomic_set(&armpmu->active_events, 0);
mutex_init(&armpmu->reserve_mutex);
armpmu->pmu = (struct pmu) {
.pmu_enable = armpmu_enable,
.pmu_disable = armpmu_disable,
.event_init = armpmu_event_init,
.add = armpmu_add,
.del = armpmu_del,
.start = armpmu_start,
.stop = armpmu_stop,
.read = armpmu_read,
};
}
int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type)
{
armpmu_init(armpmu);
return perf_pmu_register(&armpmu->pmu, name, type);
}
#include <linux/of.h>
#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>
/*
* ARMv8 PMUv3 Performance Events handling code.
......@@ -708,6 +69,21 @@ enum armv8_pmuv3_perf_types {
ARMV8_PMUV3_PERFCTR_BUS_CYCLES = 0x1D,
};
/* ARMv8 Cortex-A53 specific event types. */
enum armv8_a53_pmu_perf_types {
ARMV8_A53_PERFCTR_PREFETCH_LINEFILL = 0xC2,
};
/* ARMv8 Cortex-A57 specific event types. */
enum armv8_a57_perf_types {
ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD = 0x40,
ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST = 0x41,
ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD = 0x42,
ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST = 0x43,
ARMV8_A57_PERFCTR_DTLB_REFILL_LD = 0x4c,
ARMV8_A57_PERFCTR_DTLB_REFILL_ST = 0x4d,
};
/* PMUv3 HW events mapping. */
static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
PERF_MAP_ALL_UNSUPPORTED,
......@@ -718,6 +94,28 @@ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
};
/* ARM Cortex-A53 HW events mapping. */
static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = {
PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
[PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
};
static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = {
PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
[PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
[PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
};
static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
......@@ -734,12 +132,60 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
};
static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
PERF_CACHE_MAP_ALL_UNSUPPORTED,
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
[C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREFETCH_LINEFILL,
[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
[C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
[C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
[C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
};
static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
PERF_CACHE_MAP_ALL_UNSUPPORTED,
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD,
[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD,
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST,
[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST,
[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
[C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_LD,
[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_ST,
[C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
[C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
};
/*
* Perf Events' indices
*/
#define ARMV8_IDX_CYCLE_COUNTER 0
#define ARMV8_IDX_COUNTER0 1
#define ARMV8_IDX_COUNTER_LAST (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
#define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \
(ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
#define ARMV8_MAX_COUNTERS 32
#define ARMV8_COUNTER_MASK (ARMV8_MAX_COUNTERS - 1)
......@@ -805,49 +251,34 @@ static inline int armv8pmu_has_overflowed(u32 pmovsr)
return pmovsr & ARMV8_OVERFLOWED_MASK;
}
static inline int armv8pmu_counter_valid(int idx)
static inline int armv8pmu_counter_valid(struct arm_pmu *cpu_pmu, int idx)
{
return idx >= ARMV8_IDX_CYCLE_COUNTER && idx <= ARMV8_IDX_COUNTER_LAST;
return idx >= ARMV8_IDX_CYCLE_COUNTER &&
idx <= ARMV8_IDX_COUNTER_LAST(cpu_pmu);
}
static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
{
int ret = 0;
u32 counter;
if (!armv8pmu_counter_valid(idx)) {
pr_err("CPU%u checking wrong counter %d overflow status\n",
smp_processor_id(), idx);
} else {
counter = ARMV8_IDX_TO_COUNTER(idx);
ret = pmnc & BIT(counter);
}
return ret;
return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx));
}
static inline int armv8pmu_select_counter(int idx)
{
u32 counter;
if (!armv8pmu_counter_valid(idx)) {
pr_err("CPU%u selecting wrong PMNC counter %d\n",
smp_processor_id(), idx);
return -EINVAL;
}
counter = ARMV8_IDX_TO_COUNTER(idx);
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
asm volatile("msr pmselr_el0, %0" :: "r" (counter));
isb();
return idx;
}
static inline u32 armv8pmu_read_counter(int idx)
static inline u32 armv8pmu_read_counter(struct perf_event *event)
{
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
u32 value = 0;
if (!armv8pmu_counter_valid(idx))
if (!armv8pmu_counter_valid(cpu_pmu, idx))
pr_err("CPU%u reading wrong counter %d\n",
smp_processor_id(), idx);
else if (idx == ARMV8_IDX_CYCLE_COUNTER)
......@@ -858,9 +289,13 @@ static inline u32 armv8pmu_read_counter(int idx)
return value;
}
static inline void armv8pmu_write_counter(int idx, u32 value)
static inline void armv8pmu_write_counter(struct perf_event *event, u32 value)
{
if (!armv8pmu_counter_valid(idx))
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
if (!armv8pmu_counter_valid(cpu_pmu, idx))
pr_err("CPU%u writing wrong counter %d\n",
smp_processor_id(), idx);
else if (idx == ARMV8_IDX_CYCLE_COUNTER)
......@@ -879,65 +314,34 @@ static inline void armv8pmu_write_evtype(int idx, u32 val)
static inline int armv8pmu_enable_counter(int idx)
{
u32 counter;
if (!armv8pmu_counter_valid(idx)) {
pr_err("CPU%u enabling wrong PMNC counter %d\n",
smp_processor_id(), idx);
return -EINVAL;
}
counter = ARMV8_IDX_TO_COUNTER(idx);
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter)));
return idx;
}
static inline int armv8pmu_disable_counter(int idx)
{
u32 counter;
if (!armv8pmu_counter_valid(idx)) {
pr_err("CPU%u disabling wrong PMNC counter %d\n",
smp_processor_id(), idx);
return -EINVAL;
}
counter = ARMV8_IDX_TO_COUNTER(idx);
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter)));
return idx;
}
static inline int armv8pmu_enable_intens(int idx)
{
u32 counter;
if (!armv8pmu_counter_valid(idx)) {
pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
smp_processor_id(), idx);
return -EINVAL;
}
counter = ARMV8_IDX_TO_COUNTER(idx);
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter)));
return idx;
}
static inline int armv8pmu_disable_intens(int idx)
{
u32 counter;
if (!armv8pmu_counter_valid(idx)) {
pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
smp_processor_id(), idx);
return -EINVAL;
}
counter = ARMV8_IDX_TO_COUNTER(idx);
u32 counter = ARMV8_IDX_TO_COUNTER(idx);
asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter)));
isb();
/* Clear the overflow flag in case an interrupt is pending. */
asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter)));
isb();
return idx;
}
......@@ -955,10 +359,13 @@ static inline u32 armv8pmu_getreset_flags(void)
return value;
}
static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx)
static void armv8pmu_enable_event(struct perf_event *event)
{
unsigned long flags;
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
struct hw_perf_event *hwc = &event->hw;
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
/*
* Enable counter and interrupt, and set the counter to count
......@@ -989,10 +396,13 @@ static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx)
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void armv8pmu_disable_event(struct hw_perf_event *hwc, int idx)
static void armv8pmu_disable_event(struct perf_event *event)
{
unsigned long flags;
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
struct hw_perf_event *hwc = &event->hw;
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
int idx = hwc->idx;
/*
* Disable counter and interrupt
......@@ -1016,7 +426,8 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
{
u32 pmovsr;
struct perf_sample_data data;
struct pmu_hw_events *cpuc;
struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
struct pt_regs *regs;
int idx;
......@@ -1036,7 +447,6 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
*/
regs = get_irq_regs();
cpuc = this_cpu_ptr(&cpu_hw_events);
for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc;
......@@ -1053,13 +463,13 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
continue;
hwc = &event->hw;
armpmu_event_update(event, hwc, idx);
armpmu_event_update(event);
perf_sample_data_init(&data, 0, hwc->last_period);
if (!armpmu_event_set_period(event, hwc, idx))
if (!armpmu_event_set_period(event))
continue;
if (perf_event_overflow(event, &data, regs))
cpu_pmu->disable(hwc, idx);
cpu_pmu->disable(event);
}
/*
......@@ -1074,10 +484,10 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
return IRQ_HANDLED;
}
static void armv8pmu_start(void)
static void armv8pmu_start(struct arm_pmu *cpu_pmu)
{
unsigned long flags;
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Enable all counters */
......@@ -1085,10 +495,10 @@ static void armv8pmu_start(void)
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
}
static void armv8pmu_stop(void)
static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
{
unsigned long flags;
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
raw_spin_lock_irqsave(&events->pmu_lock, flags);
/* Disable all counters */
......@@ -1097,10 +507,12 @@ static void armv8pmu_stop(void)
}
static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
struct hw_perf_event *event)
struct perf_event *event)
{
int idx;
unsigned long evtype = event->config_base & ARMV8_EVTYPE_EVENT;
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
unsigned long evtype = hwc->config_base & ARMV8_EVTYPE_EVENT;
/* Always place a cycle counter into the cycle counter. */
if (evtype == ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES) {
......@@ -1151,11 +563,14 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
static void armv8pmu_reset(void *info)
{
struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
u32 idx, nb_cnt = cpu_pmu->num_events;
/* The counter and interrupt enable registers are unknown at reset. */
for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx)
armv8pmu_disable_event(NULL, idx);
for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
armv8pmu_disable_counter(idx);
armv8pmu_disable_intens(idx);
}
/* Initialize & Reset PMNC: C and P bits. */
armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C);
......@@ -1166,169 +581,104 @@ static void armv8pmu_reset(void *info)
static int armv8_pmuv3_map_event(struct perf_event *event)
{
return map_cpu_event(event, &armv8_pmuv3_perf_map,
return armpmu_map_event(event, &armv8_pmuv3_perf_map,
&armv8_pmuv3_perf_cache_map,
ARMV8_EVTYPE_EVENT);
}
static struct arm_pmu armv8pmu = {
.handle_irq = armv8pmu_handle_irq,
.enable = armv8pmu_enable_event,
.disable = armv8pmu_disable_event,
.read_counter = armv8pmu_read_counter,
.write_counter = armv8pmu_write_counter,
.get_event_idx = armv8pmu_get_event_idx,
.start = armv8pmu_start,
.stop = armv8pmu_stop,
.reset = armv8pmu_reset,
.max_period = (1LLU << 32) - 1,
};
static int armv8_a53_map_event(struct perf_event *event)
{
return armpmu_map_event(event, &armv8_a53_perf_map,
&armv8_a53_perf_cache_map,
ARMV8_EVTYPE_EVENT);
}
static u32 __init armv8pmu_read_num_pmnc_events(void)
static int armv8_a57_map_event(struct perf_event *event)
{
u32 nb_cnt;
return armpmu_map_event(event, &armv8_a57_perf_map,
&armv8_a57_perf_cache_map,
ARMV8_EVTYPE_EVENT);
}
static void armv8pmu_read_num_pmnc_events(void *info)
{
int *nb_cnt = info;
/* Read the nb of CNTx counters supported from PMNC */
nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK;
*nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK;
/* Add the CPU cycles counter and return */
return nb_cnt + 1;
/* Add the CPU cycles counter */
*nb_cnt += 1;
}
static struct arm_pmu *__init armv8_pmuv3_pmu_init(void)
static int armv8pmu_probe_num_events(struct arm_pmu *arm_pmu)
{
armv8pmu.name = "arm/armv8-pmuv3";
armv8pmu.map_event = armv8_pmuv3_map_event;
armv8pmu.num_events = armv8pmu_read_num_pmnc_events();
armv8pmu.set_event_filter = armv8pmu_set_event_filter;
return &armv8pmu;
return smp_call_function_any(&arm_pmu->supported_cpus,
armv8pmu_read_num_pmnc_events,
&arm_pmu->num_events, 1);
}
/*
* Ensure the PMU has sane values out of reset.
* This requires SMP to be available, so exists as a separate initcall.
*/
static int __init
cpu_pmu_reset(void)
static void armv8_pmu_init(struct arm_pmu *cpu_pmu)
{
if (cpu_pmu && cpu_pmu->reset)
return on_each_cpu(cpu_pmu->reset, NULL, 1);
return 0;
cpu_pmu->handle_irq = armv8pmu_handle_irq,
cpu_pmu->enable = armv8pmu_enable_event,
cpu_pmu->disable = armv8pmu_disable_event,
cpu_pmu->read_counter = armv8pmu_read_counter,
cpu_pmu->write_counter = armv8pmu_write_counter,
cpu_pmu->get_event_idx = armv8pmu_get_event_idx,
cpu_pmu->start = armv8pmu_start,
cpu_pmu->stop = armv8pmu_stop,
cpu_pmu->reset = armv8pmu_reset,
cpu_pmu->max_period = (1LLU << 32) - 1,
cpu_pmu->set_event_filter = armv8pmu_set_event_filter;
}
arch_initcall(cpu_pmu_reset);
/*
* PMU platform driver and devicetree bindings.
*/
static const struct of_device_id armpmu_of_device_ids[] = {
{.compatible = "arm,armv8-pmuv3"},
{},
};
static int armpmu_device_probe(struct platform_device *pdev)
static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu)
{
int i, irq, *irqs;
if (!cpu_pmu)
return -ENODEV;
/* Don't bother with PPIs; they're already affine */
irq = platform_get_irq(pdev, 0);
if (irq >= 0 && irq_is_percpu(irq))
goto out;
irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
if (!irqs)
return -ENOMEM;
for (i = 0; i < pdev->num_resources; ++i) {
struct device_node *dn;
int cpu;
dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity",
i);
if (!dn) {
pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
of_node_full_name(pdev->dev.of_node), i);
break;
}
for_each_possible_cpu(cpu)
if (dn == of_cpu_device_node_get(cpu))
break;
if (cpu >= nr_cpu_ids) {
pr_warn("Failed to find logical CPU for %s\n",
dn->name);
of_node_put(dn);
break;
}
of_node_put(dn);
irqs[i] = cpu;
}
if (i == pdev->num_resources)
cpu_pmu->irq_affinity = irqs;
else
kfree(irqs);
out:
cpu_pmu->plat_device = pdev;
return 0;
armv8_pmu_init(cpu_pmu);
cpu_pmu->name = "armv8_pmuv3";
cpu_pmu->map_event = armv8_pmuv3_map_event;
return armv8pmu_probe_num_events(cpu_pmu);
}
static struct platform_driver armpmu_driver = {
.driver = {
.name = "arm-pmu",
.of_match_table = armpmu_of_device_ids,
},
.probe = armpmu_device_probe,
};
static int __init register_pmu_driver(void)
static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
{
return platform_driver_register(&armpmu_driver);
armv8_pmu_init(cpu_pmu);
cpu_pmu->name = "armv8_cortex_a53";
cpu_pmu->map_event = armv8_a53_map_event;
return armv8pmu_probe_num_events(cpu_pmu);
}
device_initcall(register_pmu_driver);
static struct pmu_hw_events *armpmu_get_cpu_events(void)
static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
{
return this_cpu_ptr(&cpu_hw_events);
armv8_pmu_init(cpu_pmu);
cpu_pmu->name = "armv8_cortex_a57";
cpu_pmu->map_event = armv8_a57_map_event;
return armv8pmu_probe_num_events(cpu_pmu);
}
static void __init cpu_pmu_init(struct arm_pmu *armpmu)
{
int cpu;
for_each_possible_cpu(cpu) {
struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
events->events = per_cpu(hw_events, cpu);
events->used_mask = per_cpu(used_mask, cpu);
raw_spin_lock_init(&events->pmu_lock);
}
armpmu->get_hw_events = armpmu_get_cpu_events;
}
static const struct of_device_id armv8_pmu_of_device_ids[] = {
{.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init},
{.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init},
{.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init},
{},
};
static int __init init_hw_perf_events(void)
static int armv8_pmu_device_probe(struct platform_device *pdev)
{
u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
switch ((dfr >> 8) & 0xf) {
case 0x1: /* PMUv3 */
cpu_pmu = armv8_pmuv3_pmu_init();
break;
}
return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL);
}
if (cpu_pmu) {
pr_info("enabled with %s PMU driver, %d counters available\n",
cpu_pmu->name, cpu_pmu->num_events);
cpu_pmu_init(cpu_pmu);
armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW);
} else {
pr_info("no hardware support available\n");
}
static struct platform_driver armv8_pmu_driver = {
.driver = {
.name = "armv8-pmu",
.of_match_table = armv8_pmu_of_device_ids,
},
.probe = armv8_pmu_device_probe,
};
return 0;
static int __init register_armv8_pmu_driver(void)
{
return platform_driver_register(&armv8_pmu_driver);
}
early_initcall(init_hw_perf_events);
device_initcall(register_armv8_pmu_driver);
......@@ -44,6 +44,7 @@
#include <linux/hw_breakpoint.h>
#include <linux/personality.h>
#include <linux/notifier.h>
#include <trace/events/power.h>
#include <asm/compat.h>
#include <asm/cacheflush.h>
......@@ -75,8 +76,10 @@ void arch_cpu_idle(void)
* This should do all the clock switching and wait for interrupt
* tricks
*/
trace_cpu_idle_rcuidle(1, smp_processor_id());
cpu_do_idle();
local_irq_enable();
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
}
#ifdef CONFIG_HOTPLUG_CPU
......
......@@ -28,7 +28,6 @@
#include <linux/console.h>
#include <linux/cache.h>
#include <linux/bootmem.h>
#include <linux/seq_file.h>
#include <linux/screen_info.h>
#include <linux/init.h>
#include <linux/kexec.h>
......@@ -44,7 +43,6 @@
#include <linux/of_fdt.h>
#include <linux/of_platform.h>
#include <linux/efi.h>
#include <linux/personality.h>
#include <linux/psci.h>
#include <asm/acpi.h>
......@@ -54,6 +52,7 @@
#include <asm/elf.h>
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
#include <asm/kasan.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/smp_plat.h>
......@@ -64,23 +63,6 @@
#include <asm/efi.h>
#include <asm/xen/hypervisor.h>
unsigned long elf_hwcap __read_mostly;
EXPORT_SYMBOL_GPL(elf_hwcap);
#ifdef CONFIG_COMPAT
#define COMPAT_ELF_HWCAP_DEFAULT \
(COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\
COMPAT_HWCAP_LPAE)
unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
unsigned int compat_elf_hwcap2 __read_mostly;
#endif
DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
phys_addr_t __fdt_pointer __initdata;
/*
......@@ -195,104 +177,6 @@ static void __init smp_build_mpidr_hash(void)
__flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash));
}
static void __init setup_processor(void)
{
u64 features;
s64 block;
u32 cwg;
int cls;
printk("CPU: AArch64 Processor [%08x] revision %d\n",
read_cpuid_id(), read_cpuid_id() & 15);
sprintf(init_utsname()->machine, ELF_PLATFORM);
elf_hwcap = 0;
cpuinfo_store_boot_cpu();
/*
* Check for sane CTR_EL0.CWG value.
*/
cwg = cache_type_cwg();
cls = cache_line_size();
if (!cwg)
pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n",
cls);
if (L1_CACHE_BYTES < cls)
pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
L1_CACHE_BYTES, cls);
/*
* ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks.
* The blocks we test below represent incremental functionality
* for non-negative values. Negative values are reserved.
*/
features = read_cpuid(ID_AA64ISAR0_EL1);
block = cpuid_feature_extract_field(features, 4);
if (block > 0) {
switch (block) {
default:
case 2:
elf_hwcap |= HWCAP_PMULL;
case 1:
elf_hwcap |= HWCAP_AES;
case 0:
break;
}
}
if (cpuid_feature_extract_field(features, 8) > 0)
elf_hwcap |= HWCAP_SHA1;
if (cpuid_feature_extract_field(features, 12) > 0)
elf_hwcap |= HWCAP_SHA2;
if (cpuid_feature_extract_field(features, 16) > 0)
elf_hwcap |= HWCAP_CRC32;
block = cpuid_feature_extract_field(features, 20);
if (block > 0) {
switch (block) {
default:
case 2:
elf_hwcap |= HWCAP_ATOMICS;
case 1:
/* RESERVED */
case 0:
break;
}
}
#ifdef CONFIG_COMPAT
/*
* ID_ISAR5_EL1 carries similar information as above, but pertaining to
* the AArch32 32-bit execution state.
*/
features = read_cpuid(ID_ISAR5_EL1);
block = cpuid_feature_extract_field(features, 4);
if (block > 0) {
switch (block) {
default:
case 2:
compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL;
case 1:
compat_elf_hwcap2 |= COMPAT_HWCAP2_AES;
case 0:
break;
}
}
if (cpuid_feature_extract_field(features, 8) > 0)
compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1;
if (cpuid_feature_extract_field(features, 12) > 0)
compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2;
if (cpuid_feature_extract_field(features, 16) > 0)
compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32;
#endif
}
static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
void *dt_virt = fixmap_remap_fdt(dt_phys);
......@@ -406,8 +290,9 @@ u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
void __init setup_arch(char **cmdline_p)
{
setup_processor();
pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
sprintf(init_utsname()->machine, ELF_PLATFORM);
init_mm.start_code = (unsigned long) _text;
init_mm.end_code = (unsigned long) _etext;
init_mm.end_data = (unsigned long) _edata;
......@@ -436,6 +321,9 @@ void __init setup_arch(char **cmdline_p)
paging_init();
relocate_initrd();
kasan_init();
request_standard_resources();
early_ioremap_reset();
......@@ -493,124 +381,3 @@ static int __init topology_init(void)
return 0;
}
subsys_initcall(topology_init);
static const char *hwcap_str[] = {
"fp",
"asimd",
"evtstrm",
"aes",
"pmull",
"sha1",
"sha2",
"crc32",
"atomics",
NULL
};
#ifdef CONFIG_COMPAT
static const char *compat_hwcap_str[] = {
"swp",
"half",
"thumb",
"26bit",
"fastmult",
"fpa",
"vfp",
"edsp",
"java",
"iwmmxt",
"crunch",
"thumbee",
"neon",
"vfpv3",
"vfpv3d16",
"tls",
"vfpv4",
"idiva",
"idivt",
"vfpd32",
"lpae",
"evtstrm"
};
static const char *compat_hwcap2_str[] = {
"aes",
"pmull",
"sha1",
"sha2",
"crc32",
NULL
};
#endif /* CONFIG_COMPAT */
static int c_show(struct seq_file *m, void *v)
{
int i, j;
for_each_online_cpu(i) {
struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
u32 midr = cpuinfo->reg_midr;
/*
* glibc reads /proc/cpuinfo to determine the number of
* online processors, looking for lines beginning with
* "processor". Give glibc what it expects.
*/
seq_printf(m, "processor\t: %d\n", i);
/*
* Dump out the common processor features in a single line.
* Userspace should read the hwcaps with getauxval(AT_HWCAP)
* rather than attempting to parse this, but there's a body of
* software which does already (at least for 32-bit).
*/
seq_puts(m, "Features\t:");
if (personality(current->personality) == PER_LINUX32) {
#ifdef CONFIG_COMPAT
for (j = 0; compat_hwcap_str[j]; j++)
if (compat_elf_hwcap & (1 << j))
seq_printf(m, " %s", compat_hwcap_str[j]);
for (j = 0; compat_hwcap2_str[j]; j++)
if (compat_elf_hwcap2 & (1 << j))
seq_printf(m, " %s", compat_hwcap2_str[j]);
#endif /* CONFIG_COMPAT */
} else {
for (j = 0; hwcap_str[j]; j++)
if (elf_hwcap & (1 << j))
seq_printf(m, " %s", hwcap_str[j]);
}
seq_puts(m, "\n");
seq_printf(m, "CPU implementer\t: 0x%02x\n",
MIDR_IMPLEMENTOR(midr));
seq_printf(m, "CPU architecture: 8\n");
seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
}
return 0;
}
static void *c_start(struct seq_file *m, loff_t *pos)
{
return *pos < 1 ? (void *)1 : NULL;
}
static void *c_next(struct seq_file *m, void *v, loff_t *pos)
{
++*pos;
return NULL;
}
static void c_stop(struct seq_file *m, void *v)
{
}
const struct seq_operations cpuinfo_op = {
.start = c_start,
.next = c_next,
.stop = c_stop,
.show = c_show
};
......@@ -142,22 +142,27 @@ asmlinkage void secondary_start_kernel(void)
*/
atomic_inc(&mm->mm_count);
current->active_mm = mm;
cpumask_set_cpu(cpu, mm_cpumask(mm));
set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
printk("CPU%u: Booted secondary processor\n", cpu);
/*
* TTBR0 is only used for the identity mapping at this stage. Make it
* point to zero page to avoid speculatively fetching new entries.
*/
cpu_set_reserved_ttbr0();
flush_tlb_all();
local_flush_tlb_all();
cpu_set_default_tcr_t0sz();
preempt_disable();
trace_hardirqs_off();
/*
* If the system has established the capabilities, make sure
* this CPU ticks all of those. If it doesn't, the CPU will
* fail to come online.
*/
verify_local_cpu_capabilities();
if (cpu_ops[cpu]->cpu_postboot)
cpu_ops[cpu]->cpu_postboot();
......@@ -178,6 +183,8 @@ asmlinkage void secondary_start_kernel(void)
* the CPU migration code to notice that the CPU is online
* before we continue.
*/
pr_info("CPU%u: Booted secondary processor [%08x]\n",
cpu, read_cpuid_id());
set_cpu_online(cpu, true);
complete(&cpu_running);
......@@ -232,12 +239,7 @@ int __cpu_disable(void)
/*
* OK - migrate IRQs away from this CPU
*/
migrate_irqs();
/*
* Remove this CPU from the vm mask set of all processes.
*/
clear_tasks_mm_cpumask(cpu);
irq_migrate_all_off_this_cpu();
return 0;
}
......@@ -325,12 +327,14 @@ static void __init hyp_mode_check(void)
void __init smp_cpus_done(unsigned int max_cpus)
{
pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
setup_cpu_features();
hyp_mode_check();
apply_alternatives_all();
}
void __init smp_prepare_boot_cpu(void)
{
cpuinfo_store_boot_cpu();
set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
}
......
......@@ -90,7 +90,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
* restoration before returning.
*/
cpu_set_reserved_ttbr0();
flush_tlb_all();
local_flush_tlb_all();
cpu_set_default_tcr_t0sz();
if (mm != &init_mm)
......
......@@ -103,12 +103,12 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
set_fs(fs);
}
static void dump_backtrace_entry(unsigned long where, unsigned long stack)
static void dump_backtrace_entry(unsigned long where)
{
/*
* Note that 'where' can have a physical address, but it's not handled.
*/
print_ip_sym(where);
if (in_exception_text(where))
dump_mem("", "Exception stack", stack,
stack + sizeof(struct pt_regs), false);
}
static void dump_instr(const char *lvl, struct pt_regs *regs)
......@@ -172,12 +172,17 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
pr_emerg("Call trace:\n");
while (1) {
unsigned long where = frame.pc;
unsigned long stack;
int ret;
dump_backtrace_entry(where);
ret = unwind_frame(&frame);
if (ret < 0)
break;
dump_backtrace_entry(where, frame.sp);
stack = frame.sp;
if (in_exception_text(where))
dump_mem("", "Exception stack", stack,
stack + sizeof(struct pt_regs), false);
}
}
......
......@@ -5,6 +5,7 @@
*/
#include <asm-generic/vmlinux.lds.h>
#include <asm/kernel-pgtable.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/page.h>
......@@ -60,9 +61,12 @@ PECOFF_FILE_ALIGNMENT = 0x200;
#define PECOFF_EDATA_PADDING
#endif
#ifdef CONFIG_DEBUG_ALIGN_RODATA
#if defined(CONFIG_DEBUG_ALIGN_RODATA)
#define ALIGN_DEBUG_RO . = ALIGN(1<<SECTION_SHIFT);
#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO
#elif defined(CONFIG_DEBUG_RODATA)
#define ALIGN_DEBUG_RO . = ALIGN(1<<PAGE_SHIFT);
#define ALIGN_DEBUG_RO_MIN(min) ALIGN_DEBUG_RO
#else
#define ALIGN_DEBUG_RO
#define ALIGN_DEBUG_RO_MIN(min) . = ALIGN(min);
......
......@@ -22,6 +22,7 @@ config KVM_ARM_VGIC_V3
config KVM
bool "Kernel-based Virtual Machine (KVM) support"
depends on OF
depends on !ARM64_16K_PAGES
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select ANON_INODES
......@@ -37,6 +38,8 @@ config KVM
select KVM_ARM_VGIC_V3
---help---
Support hosting virtualized guest machines.
We don't support KVM with 16K page tables yet, due to the multiple
levels of fake page tables.
If unsure, say N.
......
......@@ -53,7 +53,7 @@ static bool cpu_has_32bit_el1(void)
{
u64 pfr0;
pfr0 = read_cpuid(ID_AA64PFR0_EL1);
pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1);
return !!(pfr0 & 0x20);
}
......
......@@ -693,13 +693,13 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu,
if (p->is_write) {
return ignore_write(vcpu, p);
} else {
u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
u32 el3 = !!((pfr >> 12) & 0xf);
u64 dfr = read_system_reg(SYS_ID_AA64DFR0_EL1);
u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1);
u32 el3 = !!cpuid_feature_extract_field(pfr, ID_AA64PFR0_EL3_SHIFT);
*vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) |
(((dfr >> 12) & 0xf) << 24) |
(((dfr >> 28) & 0xf) << 20) |
*vcpu_reg(vcpu, p->Rt) = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
(((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
(((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20) |
(6 << 16) | (el3 << 14) | (el3 << 12));
return true;
}
......
......@@ -18,6 +18,7 @@
#include <asm/alternative.h>
#include <asm/assembler.h>
#include <asm/cache.h>
#include <asm/cpufeature.h>
#include <asm/sysreg.h>
......@@ -31,49 +32,58 @@
* Returns:
* x0 - bytes not copied
*/
.macro ldrb1 ptr, regB, val
USER(9998f, ldrb \ptr, [\regB], \val)
.endm
.macro strb1 ptr, regB, val
strb \ptr, [\regB], \val
.endm
.macro ldrh1 ptr, regB, val
USER(9998f, ldrh \ptr, [\regB], \val)
.endm
.macro strh1 ptr, regB, val
strh \ptr, [\regB], \val
.endm
.macro ldr1 ptr, regB, val
USER(9998f, ldr \ptr, [\regB], \val)
.endm
.macro str1 ptr, regB, val
str \ptr, [\regB], \val
.endm
.macro ldp1 ptr, regB, regC, val
USER(9998f, ldp \ptr, \regB, [\regC], \val)
.endm
.macro stp1 ptr, regB, regC, val
stp \ptr, \regB, [\regC], \val
.endm
end .req x5
ENTRY(__copy_from_user)
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN)
add x5, x1, x2 // upper user buffer boundary
subs x2, x2, #16
b.mi 1f
0:
USER(9f, ldp x3, x4, [x1], #16)
subs x2, x2, #16
stp x3, x4, [x0], #16
b.pl 0b
1: adds x2, x2, #8
b.mi 2f
USER(9f, ldr x3, [x1], #8 )
sub x2, x2, #8
str x3, [x0], #8
2: adds x2, x2, #4
b.mi 3f
USER(9f, ldr w3, [x1], #4 )
sub x2, x2, #4
str w3, [x0], #4
3: adds x2, x2, #2
b.mi 4f
USER(9f, ldrh w3, [x1], #2 )
sub x2, x2, #2
strh w3, [x0], #2
4: adds x2, x2, #1
b.mi 5f
USER(9f, ldrb w3, [x1] )
strb w3, [x0]
5: mov x0, #0
add end, x0, x2
#include "copy_template.S"
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN)
mov x0, #0 // Nothing to copy
ret
ENDPROC(__copy_from_user)
.section .fixup,"ax"
.align 2
9: sub x2, x5, x1
mov x3, x2
10: strb wzr, [x0], #1 // zero remaining buffer space
subs x3, x3, #1
b.ne 10b
mov x0, x2 // bytes not copied
9998:
sub x0, end, dst
9999:
strb wzr, [dst], #1 // zero remaining buffer space
cmp dst, end
b.lo 9999b
ret
.previous
......@@ -20,6 +20,7 @@
#include <asm/alternative.h>
#include <asm/assembler.h>
#include <asm/cache.h>
#include <asm/cpufeature.h>
#include <asm/sysreg.h>
......@@ -33,44 +34,52 @@
* Returns:
* x0 - bytes not copied
*/
.macro ldrb1 ptr, regB, val
USER(9998f, ldrb \ptr, [\regB], \val)
.endm
.macro strb1 ptr, regB, val
USER(9998f, strb \ptr, [\regB], \val)
.endm
.macro ldrh1 ptr, regB, val
USER(9998f, ldrh \ptr, [\regB], \val)
.endm
.macro strh1 ptr, regB, val
USER(9998f, strh \ptr, [\regB], \val)
.endm
.macro ldr1 ptr, regB, val
USER(9998f, ldr \ptr, [\regB], \val)
.endm
.macro str1 ptr, regB, val
USER(9998f, str \ptr, [\regB], \val)
.endm
.macro ldp1 ptr, regB, regC, val
USER(9998f, ldp \ptr, \regB, [\regC], \val)
.endm
.macro stp1 ptr, regB, regC, val
USER(9998f, stp \ptr, \regB, [\regC], \val)
.endm
end .req x5
ENTRY(__copy_in_user)
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN)
add x5, x0, x2 // upper user buffer boundary
subs x2, x2, #16
b.mi 1f
0:
USER(9f, ldp x3, x4, [x1], #16)
subs x2, x2, #16
USER(9f, stp x3, x4, [x0], #16)
b.pl 0b
1: adds x2, x2, #8
b.mi 2f
USER(9f, ldr x3, [x1], #8 )
sub x2, x2, #8
USER(9f, str x3, [x0], #8 )
2: adds x2, x2, #4
b.mi 3f
USER(9f, ldr w3, [x1], #4 )
sub x2, x2, #4
USER(9f, str w3, [x0], #4 )
3: adds x2, x2, #2
b.mi 4f
USER(9f, ldrh w3, [x1], #2 )
sub x2, x2, #2
USER(9f, strh w3, [x0], #2 )
4: adds x2, x2, #1
b.mi 5f
USER(9f, ldrb w3, [x1] )
USER(9f, strb w3, [x0] )
5: mov x0, #0
add end, x0, x2
#include "copy_template.S"
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN)
mov x0, #0
ret
ENDPROC(__copy_in_user)
.section .fixup,"ax"
.align 2
9: sub x0, x5, x0 // bytes not copied
9998: sub x0, end, dst // bytes not copied
ret
.previous
/*
* Copyright (C) 2013 ARM Ltd.
* Copyright (C) 2013 Linaro.
*
* This code is based on glibc cortex strings work originally authored by Linaro
* and re-licensed under GPLv2 for the Linux kernel. The original code can
* be found @
*
* http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
* files/head:/src/aarch64/
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* Copy a buffer from src to dest (alignment handled by the hardware)
*
* Parameters:
* x0 - dest
* x1 - src
* x2 - n
* Returns:
* x0 - dest
*/
dstin .req x0
src .req x1
count .req x2
tmp1 .req x3
tmp1w .req w3
tmp2 .req x4
tmp2w .req w4
dst .req x6
A_l .req x7
A_h .req x8
B_l .req x9
B_h .req x10
C_l .req x11
C_h .req x12
D_l .req x13
D_h .req x14
mov dst, dstin
cmp count, #16
/*When memory length is less than 16, the accessed are not aligned.*/
b.lo .Ltiny15
neg tmp2, src
ands tmp2, tmp2, #15/* Bytes to reach alignment. */
b.eq .LSrcAligned
sub count, count, tmp2
/*
* Copy the leading memory data from src to dst in an increasing
* address order.By this way,the risk of overwritting the source
* memory data is eliminated when the distance between src and
* dst is less than 16. The memory accesses here are alignment.
*/
tbz tmp2, #0, 1f
ldrb1 tmp1w, src, #1
strb1 tmp1w, dst, #1
1:
tbz tmp2, #1, 2f
ldrh1 tmp1w, src, #2
strh1 tmp1w, dst, #2
2:
tbz tmp2, #2, 3f
ldr1 tmp1w, src, #4
str1 tmp1w, dst, #4
3:
tbz tmp2, #3, .LSrcAligned
ldr1 tmp1, src, #8
str1 tmp1, dst, #8
.LSrcAligned:
cmp count, #64
b.ge .Lcpy_over64
/*
* Deal with small copies quickly by dropping straight into the
* exit block.
*/
.Ltail63:
/*
* Copy up to 48 bytes of data. At this point we only need the
* bottom 6 bits of count to be accurate.
*/
ands tmp1, count, #0x30
b.eq .Ltiny15
cmp tmp1w, #0x20
b.eq 1f
b.lt 2f
ldp1 A_l, A_h, src, #16
stp1 A_l, A_h, dst, #16
1:
ldp1 A_l, A_h, src, #16
stp1 A_l, A_h, dst, #16
2:
ldp1 A_l, A_h, src, #16
stp1 A_l, A_h, dst, #16
.Ltiny15:
/*
* Prefer to break one ldp/stp into several load/store to access
* memory in an increasing address order,rather than to load/store 16
* bytes from (src-16) to (dst-16) and to backward the src to aligned
* address,which way is used in original cortex memcpy. If keeping
* the original memcpy process here, memmove need to satisfy the
* precondition that src address is at least 16 bytes bigger than dst
* address,otherwise some source data will be overwritten when memove
* call memcpy directly. To make memmove simpler and decouple the
* memcpy's dependency on memmove, withdrew the original process.
*/
tbz count, #3, 1f
ldr1 tmp1, src, #8
str1 tmp1, dst, #8
1:
tbz count, #2, 2f
ldr1 tmp1w, src, #4
str1 tmp1w, dst, #4
2:
tbz count, #1, 3f
ldrh1 tmp1w, src, #2
strh1 tmp1w, dst, #2
3:
tbz count, #0, .Lexitfunc
ldrb1 tmp1w, src, #1
strb1 tmp1w, dst, #1
b .Lexitfunc
.Lcpy_over64:
subs count, count, #128
b.ge .Lcpy_body_large
/*
* Less than 128 bytes to copy, so handle 64 here and then jump
* to the tail.
*/
ldp1 A_l, A_h, src, #16
stp1 A_l, A_h, dst, #16
ldp1 B_l, B_h, src, #16
ldp1 C_l, C_h, src, #16
stp1 B_l, B_h, dst, #16
stp1 C_l, C_h, dst, #16
ldp1 D_l, D_h, src, #16
stp1 D_l, D_h, dst, #16
tst count, #0x3f
b.ne .Ltail63
b .Lexitfunc
/*
* Critical loop. Start at a new cache line boundary. Assuming
* 64 bytes per line this ensures the entire loop is in one line.
*/
.p2align L1_CACHE_SHIFT
.Lcpy_body_large:
/* pre-get 64 bytes data. */
ldp1 A_l, A_h, src, #16
ldp1 B_l, B_h, src, #16
ldp1 C_l, C_h, src, #16
ldp1 D_l, D_h, src, #16
1:
/*
* interlace the load of next 64 bytes data block with store of the last
* loaded 64 bytes data.
*/
stp1 A_l, A_h, dst, #16
ldp1 A_l, A_h, src, #16
stp1 B_l, B_h, dst, #16
ldp1 B_l, B_h, src, #16
stp1 C_l, C_h, dst, #16
ldp1 C_l, C_h, src, #16
stp1 D_l, D_h, dst, #16
ldp1 D_l, D_h, src, #16
subs count, count, #64
b.ge 1b
stp1 A_l, A_h, dst, #16
stp1 B_l, B_h, dst, #16
stp1 C_l, C_h, dst, #16
stp1 D_l, D_h, dst, #16
tst count, #0x3f
b.ne .Ltail63
.Lexitfunc:
......@@ -18,6 +18,7 @@
#include <asm/alternative.h>
#include <asm/assembler.h>
#include <asm/cache.h>
#include <asm/cpufeature.h>
#include <asm/sysreg.h>
......@@ -31,44 +32,52 @@
* Returns:
* x0 - bytes not copied
*/
.macro ldrb1 ptr, regB, val
ldrb \ptr, [\regB], \val
.endm
.macro strb1 ptr, regB, val
USER(9998f, strb \ptr, [\regB], \val)
.endm
.macro ldrh1 ptr, regB, val
ldrh \ptr, [\regB], \val
.endm
.macro strh1 ptr, regB, val
USER(9998f, strh \ptr, [\regB], \val)
.endm
.macro ldr1 ptr, regB, val
ldr \ptr, [\regB], \val
.endm
.macro str1 ptr, regB, val
USER(9998f, str \ptr, [\regB], \val)
.endm
.macro ldp1 ptr, regB, regC, val
ldp \ptr, \regB, [\regC], \val
.endm
.macro stp1 ptr, regB, regC, val
USER(9998f, stp \ptr, \regB, [\regC], \val)
.endm
end .req x5
ENTRY(__copy_to_user)
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN)
add x5, x0, x2 // upper user buffer boundary
subs x2, x2, #16
b.mi 1f
0:
ldp x3, x4, [x1], #16
subs x2, x2, #16
USER(9f, stp x3, x4, [x0], #16)
b.pl 0b
1: adds x2, x2, #8
b.mi 2f
ldr x3, [x1], #8
sub x2, x2, #8
USER(9f, str x3, [x0], #8 )
2: adds x2, x2, #4
b.mi 3f
ldr w3, [x1], #4
sub x2, x2, #4
USER(9f, str w3, [x0], #4 )
3: adds x2, x2, #2
b.mi 4f
ldrh w3, [x1], #2
sub x2, x2, #2
USER(9f, strh w3, [x0], #2 )
4: adds x2, x2, #1
b.mi 5f
ldrb w3, [x1]
USER(9f, strb w3, [x0] )
5: mov x0, #0
add end, x0, x2
#include "copy_template.S"
ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN)
mov x0, #0
ret
ENDPROC(__copy_to_user)
.section .fixup,"ax"
.align 2
9: sub x0, x5, x0 // bytes not copied
9998: sub x0, end, dst // bytes not copied
ret
.previous
......@@ -41,4 +41,4 @@ ENTRY(memchr)
ret
2: mov x0, #0
ret
ENDPROC(memchr)
ENDPIPROC(memchr)
......@@ -255,4 +255,4 @@ CPU_LE( rev data2, data2 )
.Lret0:
mov result, #0
ret
ENDPROC(memcmp)
ENDPIPROC(memcmp)
......@@ -36,166 +36,42 @@
* Returns:
* x0 - dest
*/
dstin .req x0
src .req x1
count .req x2
tmp1 .req x3
tmp1w .req w3
tmp2 .req x4
tmp2w .req w4
tmp3 .req x5
tmp3w .req w5
dst .req x6
.macro ldrb1 ptr, regB, val
ldrb \ptr, [\regB], \val
.endm
A_l .req x7
A_h .req x8
B_l .req x9
B_h .req x10
C_l .req x11
C_h .req x12
D_l .req x13
D_h .req x14
.macro strb1 ptr, regB, val
strb \ptr, [\regB], \val
.endm
ENTRY(memcpy)
mov dst, dstin
cmp count, #16
/*When memory length is less than 16, the accessed are not aligned.*/
b.lo .Ltiny15
.macro ldrh1 ptr, regB, val
ldrh \ptr, [\regB], \val
.endm
neg tmp2, src
ands tmp2, tmp2, #15/* Bytes to reach alignment. */
b.eq .LSrcAligned
sub count, count, tmp2
/*
* Copy the leading memory data from src to dst in an increasing
* address order.By this way,the risk of overwritting the source
* memory data is eliminated when the distance between src and
* dst is less than 16. The memory accesses here are alignment.
*/
tbz tmp2, #0, 1f
ldrb tmp1w, [src], #1
strb tmp1w, [dst], #1
1:
tbz tmp2, #1, 2f
ldrh tmp1w, [src], #2
strh tmp1w, [dst], #2
2:
tbz tmp2, #2, 3f
ldr tmp1w, [src], #4
str tmp1w, [dst], #4
3:
tbz tmp2, #3, .LSrcAligned
ldr tmp1, [src],#8
str tmp1, [dst],#8
.macro strh1 ptr, regB, val
strh \ptr, [\regB], \val
.endm
.LSrcAligned:
cmp count, #64
b.ge .Lcpy_over64
/*
* Deal with small copies quickly by dropping straight into the
* exit block.
*/
.Ltail63:
/*
* Copy up to 48 bytes of data. At this point we only need the
* bottom 6 bits of count to be accurate.
*/
ands tmp1, count, #0x30
b.eq .Ltiny15
cmp tmp1w, #0x20
b.eq 1f
b.lt 2f
ldp A_l, A_h, [src], #16
stp A_l, A_h, [dst], #16
1:
ldp A_l, A_h, [src], #16
stp A_l, A_h, [dst], #16
2:
ldp A_l, A_h, [src], #16
stp A_l, A_h, [dst], #16
.Ltiny15:
/*
* Prefer to break one ldp/stp into several load/store to access
* memory in an increasing address order,rather than to load/store 16
* bytes from (src-16) to (dst-16) and to backward the src to aligned
* address,which way is used in original cortex memcpy. If keeping
* the original memcpy process here, memmove need to satisfy the
* precondition that src address is at least 16 bytes bigger than dst
* address,otherwise some source data will be overwritten when memove
* call memcpy directly. To make memmove simpler and decouple the
* memcpy's dependency on memmove, withdrew the original process.
*/
tbz count, #3, 1f
ldr tmp1, [src], #8
str tmp1, [dst], #8
1:
tbz count, #2, 2f
ldr tmp1w, [src], #4
str tmp1w, [dst], #4
2:
tbz count, #1, 3f
ldrh tmp1w, [src], #2
strh tmp1w, [dst], #2
3:
tbz count, #0, .Lexitfunc
ldrb tmp1w, [src]
strb tmp1w, [dst]
.macro ldr1 ptr, regB, val
ldr \ptr, [\regB], \val
.endm
.Lexitfunc:
ret
.macro str1 ptr, regB, val
str \ptr, [\regB], \val
.endm
.Lcpy_over64:
subs count, count, #128
b.ge .Lcpy_body_large
/*
* Less than 128 bytes to copy, so handle 64 here and then jump
* to the tail.
*/
ldp A_l, A_h, [src],#16
stp A_l, A_h, [dst],#16
ldp B_l, B_h, [src],#16
ldp C_l, C_h, [src],#16
stp B_l, B_h, [dst],#16
stp C_l, C_h, [dst],#16
ldp D_l, D_h, [src],#16
stp D_l, D_h, [dst],#16
.macro ldp1 ptr, regB, regC, val
ldp \ptr, \regB, [\regC], \val
.endm
tst count, #0x3f
b.ne .Ltail63
ret
.macro stp1 ptr, regB, regC, val
stp \ptr, \regB, [\regC], \val
.endm
/*
* Critical loop. Start at a new cache line boundary. Assuming
* 64 bytes per line this ensures the entire loop is in one line.
*/
.p2align L1_CACHE_SHIFT
.Lcpy_body_large:
/* pre-get 64 bytes data. */
ldp A_l, A_h, [src],#16
ldp B_l, B_h, [src],#16
ldp C_l, C_h, [src],#16
ldp D_l, D_h, [src],#16
1:
/*
* interlace the load of next 64 bytes data block with store of the last
* loaded 64 bytes data.
*/
stp A_l, A_h, [dst],#16
ldp A_l, A_h, [src],#16
stp B_l, B_h, [dst],#16
ldp B_l, B_h, [src],#16
stp C_l, C_h, [dst],#16
ldp C_l, C_h, [src],#16
stp D_l, D_h, [dst],#16
ldp D_l, D_h, [src],#16
subs count, count, #64
b.ge 1b
stp A_l, A_h, [dst],#16
stp B_l, B_h, [dst],#16
stp C_l, C_h, [dst],#16
stp D_l, D_h, [dst],#16
tst count, #0x3f
b.ne .Ltail63
.weak memcpy
ENTRY(__memcpy)
ENTRY(memcpy)
#include "copy_template.S"
ret
ENDPROC(memcpy)
ENDPIPROC(memcpy)
ENDPROC(__memcpy)
......@@ -57,12 +57,14 @@ C_h .req x12
D_l .req x13
D_h .req x14
.weak memmove
ENTRY(__memmove)
ENTRY(memmove)
cmp dstin, src
b.lo memcpy
b.lo __memcpy
add tmp1, src, count
cmp dstin, tmp1
b.hs memcpy /* No overlap. */
b.hs __memcpy /* No overlap. */
add dst, dstin, count
add src, src, count
......@@ -194,4 +196,5 @@ ENTRY(memmove)
tst count, #0x3f
b.ne .Ltail63
ret
ENDPROC(memmove)
ENDPIPROC(memmove)
ENDPROC(__memmove)
......@@ -54,6 +54,8 @@ dst .req x8
tmp3w .req w9
tmp3 .req x9
.weak memset
ENTRY(__memset)
ENTRY(memset)
mov dst, dstin /* Preserve return value. */
and A_lw, val, #255
......@@ -213,4 +215,5 @@ ENTRY(memset)
ands count, count, zva_bits_x
b.ne .Ltail_maybe_long
ret
ENDPROC(memset)
ENDPIPROC(memset)
ENDPROC(__memset)
......@@ -231,4 +231,4 @@ CPU_BE( orr syndrome, diff, has_nul )
lsr data1, data1, #56
sub result, data1, data2, lsr #56
ret
ENDPROC(strcmp)
ENDPIPROC(strcmp)
......@@ -123,4 +123,4 @@ CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
csinv data1, data1, xzr, le
csel data2, data2, data2a, le
b .Lrealigned
ENDPROC(strlen)
ENDPIPROC(strlen)
......@@ -307,4 +307,4 @@ CPU_BE( orr syndrome, diff, has_nul )
.Lret0:
mov result, #0
ret
ENDPROC(strncmp)
ENDPIPROC(strncmp)
......@@ -4,3 +4,6 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
context.o proc.o pageattr.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_ARM64_PTDUMP) += dump.o
obj-$(CONFIG_KASAN) += kasan_init.o
KASAN_SANITIZE_kasan_init.o := n
......@@ -98,7 +98,7 @@ ENTRY(__flush_dcache_area)
b.lo 1b
dsb sy
ret
ENDPROC(__flush_dcache_area)
ENDPIPROC(__flush_dcache_area)
/*
* __inval_cache_range(start, end)
......@@ -131,7 +131,7 @@ __dma_inv_range:
b.lo 2b
dsb sy
ret
ENDPROC(__inval_cache_range)
ENDPIPROC(__inval_cache_range)
ENDPROC(__dma_inv_range)
/*
......@@ -171,7 +171,7 @@ ENTRY(__dma_flush_range)
b.lo 1b
dsb sy
ret
ENDPROC(__dma_flush_range)
ENDPIPROC(__dma_flush_range)
/*
* __dma_map_area(start, size, dir)
......@@ -184,7 +184,7 @@ ENTRY(__dma_map_area)
cmp w2, #DMA_FROM_DEVICE
b.eq __dma_inv_range
b __dma_clean_range
ENDPROC(__dma_map_area)
ENDPIPROC(__dma_map_area)
/*
* __dma_unmap_area(start, size, dir)
......@@ -197,4 +197,4 @@ ENTRY(__dma_unmap_area)
cmp w2, #DMA_TO_DEVICE
b.ne __dma_inv_range
ret
ENDPROC(__dma_unmap_area)
ENDPIPROC(__dma_unmap_area)
......@@ -17,135 +17,185 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/init.h>
#include <linux/bitops.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/percpu.h>
#include <asm/cpufeature.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
#include <asm/cachetype.h>
#define asid_bits(reg) \
(((read_cpuid(ID_AA64MMFR0_EL1) & 0xf0) >> 2) + 8)
static u32 asid_bits;
static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
#define ASID_FIRST_VERSION (1 << MAX_ASID_BITS)
static atomic64_t asid_generation;
static unsigned long *asid_map;
static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
unsigned int cpu_last_asid = ASID_FIRST_VERSION;
static DEFINE_PER_CPU(atomic64_t, active_asids);
static DEFINE_PER_CPU(u64, reserved_asids);
static cpumask_t tlb_flush_pending;
/*
* We fork()ed a process, and we need a new context for the child to run in.
*/
void __init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
mm->context.id = 0;
raw_spin_lock_init(&mm->context.id_lock);
}
#define ASID_MASK (~GENMASK(asid_bits - 1, 0))
#define ASID_FIRST_VERSION (1UL << asid_bits)
#define NUM_USER_ASIDS ASID_FIRST_VERSION
static void flush_context(void)
static void flush_context(unsigned int cpu)
{
/* set the reserved TTBR0 before flushing the TLB */
cpu_set_reserved_ttbr0();
flush_tlb_all();
if (icache_is_aivivt())
__flush_icache_all();
}
int i;
u64 asid;
static void set_mm_context(struct mm_struct *mm, unsigned int asid)
{
unsigned long flags;
/* Update the list of reserved ASIDs and the ASID bitmap. */
bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
/*
* Locking needed for multi-threaded applications where the same
* mm->context.id could be set from different CPUs during the
* broadcast. This function is also called via IPI so the
* mm->context.id_lock has to be IRQ-safe.
* Ensure the generation bump is observed before we xchg the
* active_asids.
*/
raw_spin_lock_irqsave(&mm->context.id_lock, flags);
if (likely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) {
smp_wmb();
for_each_possible_cpu(i) {
asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
/*
* Old version of ASID found. Set the new one and reset
* mm_cpumask(mm).
* If this CPU has already been through a
* rollover, but hasn't run another task in
* the meantime, we must preserve its reserved
* ASID, as this is the only trace we have of
* the process it is still running.
*/
mm->context.id = asid;
cpumask_clear(mm_cpumask(mm));
if (asid == 0)
asid = per_cpu(reserved_asids, i);
__set_bit(asid & ~ASID_MASK, asid_map);
per_cpu(reserved_asids, i) = asid;
}
raw_spin_unlock_irqrestore(&mm->context.id_lock, flags);
/*
* Set the mm_cpumask(mm) bit for the current CPU.
*/
cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
/* Queue a TLB invalidate and flush the I-cache if necessary. */
cpumask_setall(&tlb_flush_pending);
if (icache_is_aivivt())
__flush_icache_all();
}
/*
* Reset the ASID on the current CPU. This function call is broadcast from the
* CPU handling the ASID rollover and holding cpu_asid_lock.
*/
static void reset_context(void *info)
static int is_reserved_asid(u64 asid)
{
int cpu;
for_each_possible_cpu(cpu)
if (per_cpu(reserved_asids, cpu) == asid)
return 1;
return 0;
}
static u64 new_context(struct mm_struct *mm, unsigned int cpu)
{
unsigned int asid;
unsigned int cpu = smp_processor_id();
struct mm_struct *mm = current->active_mm;
static u32 cur_idx = 1;
u64 asid = atomic64_read(&mm->context.id);
u64 generation = atomic64_read(&asid_generation);
if (asid != 0) {
/*
* If our current ASID was active during a rollover, we
* can continue to use it and this was just a false alarm.
*/
if (is_reserved_asid(asid))
return generation | (asid & ~ASID_MASK);
/*
* We had a valid ASID in a previous life, so try to re-use
* it if possible.
*/
asid &= ~ASID_MASK;
if (!__test_and_set_bit(asid, asid_map))
goto bump_gen;
}
/*
* current->active_mm could be init_mm for the idle thread immediately
* after secondary CPU boot or hotplug. TTBR0_EL1 is already set to
* the reserved value, so no need to reset any context.
* Allocate a free ASID. If we can't find one, take a note of the
* currently active ASIDs and mark the TLBs as requiring flushes.
* We always count from ASID #1, as we use ASID #0 when setting a
* reserved TTBR0 for the init_mm.
*/
if (mm == &init_mm)
return;
asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
if (asid != NUM_USER_ASIDS)
goto set_asid;
smp_rmb();
asid = cpu_last_asid + cpu;
/* We're out of ASIDs, so increment the global generation count */
generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION,
&asid_generation);
flush_context(cpu);
flush_context();
set_mm_context(mm, asid);
/* We have at least 1 ASID per CPU, so this will always succeed */
asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
/* set the new ASID */
cpu_switch_mm(mm->pgd, mm);
set_asid:
__set_bit(asid, asid_map);
cur_idx = asid;
bump_gen:
asid |= generation;
return asid;
}
void __new_context(struct mm_struct *mm)
void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
{
unsigned int asid;
unsigned int bits = asid_bits();
unsigned long flags;
u64 asid;
asid = atomic64_read(&mm->context.id);
raw_spin_lock(&cpu_asid_lock);
/*
* Check the ASID again, in case the change was broadcast from another
* CPU before we acquired the lock.
* The memory ordering here is subtle. We rely on the control
* dependency between the generation read and the update of
* active_asids to ensure that we are synchronised with a
* parallel rollover (i.e. this pairs with the smp_wmb() in
* flush_context).
*/
if (!unlikely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) {
cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
raw_spin_unlock(&cpu_asid_lock);
return;
if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits)
&& atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid))
goto switch_mm_fastpath;
raw_spin_lock_irqsave(&cpu_asid_lock, flags);
/* Check that our ASID belongs to the current generation. */
asid = atomic64_read(&mm->context.id);
if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) {
asid = new_context(mm, cpu);
atomic64_set(&mm->context.id, asid);
}
/*
* At this point, it is guaranteed that the current mm (with an old
* ASID) isn't active on any other CPU since the ASIDs are changed
* simultaneously via IPI.
*/
asid = ++cpu_last_asid;
/*
* If we've used up all our ASIDs, we need to start a new version and
* flush the TLB.
*/
if (unlikely((asid & ((1 << bits) - 1)) == 0)) {
/* increment the ASID version */
cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits);
if (cpu_last_asid == 0)
cpu_last_asid = ASID_FIRST_VERSION;
asid = cpu_last_asid + smp_processor_id();
flush_context();
smp_wmb();
smp_call_function(reset_context, NULL, 1);
cpu_last_asid += NR_CPUS - 1;
if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
local_flush_tlb_all();
atomic64_set(&per_cpu(active_asids, cpu), asid);
raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
switch_mm_fastpath:
cpu_switch_mm(mm->pgd, mm);
}
static int asids_init(void)
{
int fld = cpuid_feature_extract_field(read_cpuid(ID_AA64MMFR0_EL1), 4);
switch (fld) {
default:
pr_warn("Unknown ASID size (%d); assuming 8-bit\n", fld);
/* Fallthrough */
case 0:
asid_bits = 8;
break;
case 2:
asid_bits = 16;
}
set_mm_context(mm, asid);
raw_spin_unlock(&cpu_asid_lock);
/* If we end up with more CPUs than ASIDs, expect things to crash */
WARN_ON(NUM_USER_ASIDS < num_possible_cpus());
atomic64_set(&asid_generation, ASID_FIRST_VERSION);
asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map),
GFP_KERNEL);
if (!asid_map)
panic("Failed to allocate bitmap for %lu ASIDs\n",
NUM_USER_ASIDS);
pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);
return 0;
}
early_initcall(asids_init);
......@@ -67,6 +67,12 @@ static struct addr_marker address_markers[] = {
{ -1, NULL },
};
/*
* The page dumper groups page table entries of the same type into a single
* description. It uses pg_state to track the range information while
* iterating over the pte entries. When the continuity is broken it then
* dumps out a description of the range.
*/
struct pg_state {
struct seq_file *seq;
const struct addr_marker *marker;
......@@ -113,6 +119,16 @@ static const struct prot_bits pte_bits[] = {
.val = PTE_NG,
.set = "NG",
.clear = " ",
}, {
.mask = PTE_CONT,
.val = PTE_CONT,
.set = "CON",
.clear = " ",
}, {
.mask = PTE_TABLE_BIT,
.val = PTE_TABLE_BIT,
.set = " ",
.clear = "BLK",
}, {
.mask = PTE_UXN,
.val = PTE_UXN,
......@@ -198,7 +214,7 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
unsigned long delta;
if (st->current_prot) {
seq_printf(st->seq, "0x%16lx-0x%16lx ",
seq_printf(st->seq, "0x%016lx-0x%016lx ",
st->start_address, addr);
delta = (addr - st->start_address) >> 10;
......
......@@ -556,7 +556,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
}
#ifdef CONFIG_ARM64_PAN
void cpu_enable_pan(void)
void cpu_enable_pan(void *__unused)
{
config_sctlr_el1(SCTLR_EL1_SPAN, 0);
}
......
......@@ -86,10 +86,10 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
memset(zone_size, 0, sizeof(zone_size));
/* 4GB maximum for 32-bit only capable devices */
if (IS_ENABLED(CONFIG_ZONE_DMA)) {
max_dma = PFN_DOWN(arm64_dma_phys_limit);
zone_size[ZONE_DMA] = max_dma - min;
}
#ifdef CONFIG_ZONE_DMA
max_dma = PFN_DOWN(arm64_dma_phys_limit);
zone_size[ZONE_DMA] = max_dma - min;
#endif
zone_size[ZONE_NORMAL] = max - max_dma;
memcpy(zhole_size, zone_size, sizeof(zhole_size));
......@@ -101,11 +101,12 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
if (start >= max)
continue;
if (IS_ENABLED(CONFIG_ZONE_DMA) && start < max_dma) {
#ifdef CONFIG_ZONE_DMA
if (start < max_dma) {
unsigned long dma_end = min(end, max_dma);
zhole_size[ZONE_DMA] -= dma_end - start;
}
#endif
if (end > max_dma) {
unsigned long normal_end = min(end, max);
unsigned long normal_start = max(start, max_dma);
......@@ -298,6 +299,9 @@ void __init mem_init(void)
#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
pr_notice("Virtual kernel memory layout:\n"
#ifdef CONFIG_KASAN
" kasan : 0x%16lx - 0x%16lx (%6ld GB)\n"
#endif
" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n"
#ifdef CONFIG_SPARSEMEM_VMEMMAP
" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n"
......@@ -310,6 +314,9 @@ void __init mem_init(void)
" .init : 0x%p" " - 0x%p" " (%6ld KB)\n"
" .text : 0x%p" " - 0x%p" " (%6ld KB)\n"
" .data : 0x%p" " - 0x%p" " (%6ld KB)\n",
#ifdef CONFIG_KASAN
MLG(KASAN_SHADOW_START, KASAN_SHADOW_END),
#endif
MLG(VMALLOC_START, VMALLOC_END),
#ifdef CONFIG_SPARSEMEM_VMEMMAP
MLG((unsigned long)vmemmap,
......
/*
* This file contains kasan initialization code for ARM64.
*
* Copyright (c) 2015 Samsung Electronics Co., Ltd.
* Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*/
#define pr_fmt(fmt) "kasan: " fmt
#include <linux/kasan.h>
#include <linux/kernel.h>
#include <linux/memblock.h>
#include <linux/start_kernel.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
unsigned long end)
{
pte_t *pte;
unsigned long next;
if (pmd_none(*pmd))
pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
pte = pte_offset_kernel(pmd, addr);
do {
next = addr + PAGE_SIZE;
set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page),
PAGE_KERNEL));
} while (pte++, addr = next, addr != end && pte_none(*pte));
}
static void __init kasan_early_pmd_populate(pud_t *pud,
unsigned long addr,
unsigned long end)
{
pmd_t *pmd;
unsigned long next;
if (pud_none(*pud))
pud_populate(&init_mm, pud, kasan_zero_pmd);
pmd = pmd_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
kasan_early_pte_populate(pmd, addr, next);
} while (pmd++, addr = next, addr != end && pmd_none(*pmd));
}
static void __init kasan_early_pud_populate(pgd_t *pgd,
unsigned long addr,
unsigned long end)
{
pud_t *pud;
unsigned long next;
if (pgd_none(*pgd))
pgd_populate(&init_mm, pgd, kasan_zero_pud);
pud = pud_offset(pgd, addr);
do {
next = pud_addr_end(addr, end);
kasan_early_pmd_populate(pud, addr, next);
} while (pud++, addr = next, addr != end && pud_none(*pud));
}
static void __init kasan_map_early_shadow(void)
{
unsigned long addr = KASAN_SHADOW_START;
unsigned long end = KASAN_SHADOW_END;
unsigned long next;
pgd_t *pgd;
pgd = pgd_offset_k(addr);
do {
next = pgd_addr_end(addr, end);
kasan_early_pud_populate(pgd, addr, next);
} while (pgd++, addr = next, addr != end);
}
asmlinkage void __init kasan_early_init(void)
{
BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
kasan_map_early_shadow();
}
static void __init clear_pgds(unsigned long start,
unsigned long end)
{
/*
* Remove references to kasan page tables from
* swapper_pg_dir. pgd_clear() can't be used
* here because it's nop on 2,3-level pagetable setups
*/
for (; start < end; start += PGDIR_SIZE)
set_pgd(pgd_offset_k(start), __pgd(0));
}
static void __init cpu_set_ttbr1(unsigned long ttbr1)
{
asm(
" msr ttbr1_el1, %0\n"
" isb"
:
: "r" (ttbr1));
}
void __init kasan_init(void)
{
struct memblock_region *reg;
/*
* We are going to perform proper setup of shadow memory.
* At first we should unmap early shadow (clear_pgds() call bellow).
* However, instrumented code couldn't execute without shadow memory.
* tmp_pg_dir used to keep early shadow mapped until full shadow
* setup will be finished.
*/
memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
cpu_set_ttbr1(__pa(tmp_pg_dir));
flush_tlb_all();
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
kasan_mem_to_shadow((void *)MODULES_VADDR));
for_each_memblock(memory, reg) {
void *start = (void *)__phys_to_virt(reg->base);
void *end = (void *)__phys_to_virt(reg->base + reg->size);
if (start >= end)
break;
/*
* end + 1 here is intentional. We check several shadow bytes in
* advance to slightly speed up fastpath. In some rare cases
* we could cross boundary of mapped shadow, so we just map
* some more here.
*/
vmemmap_populate((unsigned long)kasan_mem_to_shadow(start),
(unsigned long)kasan_mem_to_shadow(end) + 1,
pfn_to_nid(virt_to_pfn(start)));
}
memset(kasan_zero_page, 0, PAGE_SIZE);
cpu_set_ttbr1(__pa(swapper_pg_dir));
flush_tlb_all();
/* At this point kasan is fully initialized. Enable error messages */
init_task.kasan_depth = 0;
pr_info("KernelAddressSanitizer initialized\n");
}
......@@ -32,6 +32,7 @@
#include <asm/cputype.h>
#include <asm/fixmap.h>
#include <asm/kernel-pgtable.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/sizes.h>
......@@ -80,19 +81,55 @@ static void split_pmd(pmd_t *pmd, pte_t *pte)
do {
/*
* Need to have the least restrictive permissions available
* permissions will be fixed up later
* permissions will be fixed up later. Default the new page
* range as contiguous ptes.
*/
set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC_CONT));
pfn++;
} while (pte++, i++, i < PTRS_PER_PTE);
}
/*
* Given a PTE with the CONT bit set, determine where the CONT range
* starts, and clear the entire range of PTE CONT bits.
*/
static void clear_cont_pte_range(pte_t *pte, unsigned long addr)
{
int i;
pte -= CONT_RANGE_OFFSET(addr);
for (i = 0; i < CONT_PTES; i++) {
set_pte(pte, pte_mknoncont(*pte));
pte++;
}
flush_tlb_all();
}
/*
* Given a range of PTEs set the pfn and provided page protection flags
*/
static void __populate_init_pte(pte_t *pte, unsigned long addr,
unsigned long end, phys_addr_t phys,
pgprot_t prot)
{
unsigned long pfn = __phys_to_pfn(phys);
do {
/* clear all the bits except the pfn, then apply the prot */
set_pte(pte, pfn_pte(pfn, prot));
pte++;
pfn++;
addr += PAGE_SIZE;
} while (addr != end);
}
static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
unsigned long end, unsigned long pfn,
unsigned long end, phys_addr_t phys,
pgprot_t prot,
void *(*alloc)(unsigned long size))
{
pte_t *pte;
unsigned long next;
if (pmd_none(*pmd) || pmd_sect(*pmd)) {
pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
......@@ -105,9 +142,27 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
pte = pte_offset_kernel(pmd, addr);
do {
set_pte(pte, pfn_pte(pfn, prot));
pfn++;
} while (pte++, addr += PAGE_SIZE, addr != end);
next = min(end, (addr + CONT_SIZE) & CONT_MASK);
if (((addr | next | phys) & ~CONT_MASK) == 0) {
/* a block of CONT_PTES */
__populate_init_pte(pte, addr, next, phys,
prot | __pgprot(PTE_CONT));
} else {
/*
* If the range being split is already inside of a
* contiguous range but this PTE isn't going to be
* contiguous, then we want to unmark the adjacent
* ranges, then update the portion of the range we
* are interrested in.
*/
clear_cont_pte_range(pte, addr);
__populate_init_pte(pte, addr, next, phys, prot);
}
pte += (next - addr) >> PAGE_SHIFT;
phys += next - addr;
addr = next;
} while (addr != end);
}
void split_pud(pud_t *old_pud, pmd_t *pmd)
......@@ -168,8 +223,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
}
}
} else {
alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
prot, alloc);
alloc_init_pte(pmd, addr, next, phys, prot, alloc);
}
phys += next - addr;
} while (pmd++, addr = next, addr != end);
......@@ -353,14 +407,11 @@ static void __init map_mem(void)
* memory addressable from the initial direct kernel mapping.
*
* The initial direct kernel mapping, located at swapper_pg_dir, gives
* us PUD_SIZE (4K pages) or PMD_SIZE (64K pages) memory starting from
* PHYS_OFFSET (which must be aligned to 2MB as per
* Documentation/arm64/booting.txt).
* us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps,
* memory starting from PHYS_OFFSET (which must be aligned to 2MB as
* per Documentation/arm64/booting.txt).
*/
if (IS_ENABLED(CONFIG_ARM64_64K_PAGES))
limit = PHYS_OFFSET + PMD_SIZE;
else
limit = PHYS_OFFSET + PUD_SIZE;
limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE;
memblock_set_current_limit(limit);
/* map all the memory banks */
......@@ -371,21 +422,24 @@ static void __init map_mem(void)
if (start >= end)
break;
#ifndef CONFIG_ARM64_64K_PAGES
/*
* For the first memory bank align the start address and
* current memblock limit to prevent create_mapping() from
* allocating pte page tables from unmapped memory.
* When 64K pages are enabled, the pte page table for the
* first PGDIR_SIZE is already present in swapper_pg_dir.
*/
if (start < limit)
start = ALIGN(start, PMD_SIZE);
if (end < limit) {
limit = end & PMD_MASK;
memblock_set_current_limit(limit);
if (ARM64_SWAPPER_USES_SECTION_MAPS) {
/*
* For the first memory bank align the start address and
* current memblock limit to prevent create_mapping() from
* allocating pte page tables from unmapped memory. With
* the section maps, if the first block doesn't end on section
* size boundary, create_mapping() will try to allocate a pte
* page, which may be returned from an unmapped area.
* When section maps are not used, the pte page table for the
* current limit is already present in swapper_pg_dir.
*/
if (start < limit)
start = ALIGN(start, SECTION_SIZE);
if (end < limit) {
limit = end & SECTION_MASK;
memblock_set_current_limit(limit);
}
}
#endif
__map_memblock(start, end);
}
......@@ -456,7 +510,7 @@ void __init paging_init(void)
* point to zero page to avoid speculatively fetching new entries.
*/
cpu_set_reserved_ttbr0();
flush_tlb_all();
local_flush_tlb_all();
cpu_set_default_tcr_t0sz();
}
......@@ -498,12 +552,12 @@ int kern_addr_valid(unsigned long addr)
return pfn_valid(pte_pfn(*pte));
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
#ifdef CONFIG_ARM64_64K_PAGES
#if !ARM64_SWAPPER_USES_SECTION_MAPS
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
{
return vmemmap_populate_basepages(start, end, node);
}
#else /* !CONFIG_ARM64_64K_PAGES */
#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
{
unsigned long addr = start;
......@@ -638,7 +692,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
{
const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
pgprot_t prot = PAGE_KERNEL | PTE_RDONLY;
int granularity, size, offset;
int size, offset;
void *dt_virt;
/*
......@@ -664,24 +718,15 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
*/
BUILD_BUG_ON(dt_virt_base % SZ_2M);
if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) {
BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> PMD_SHIFT !=
__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT);
granularity = PAGE_SIZE;
} else {
BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> PUD_SHIFT !=
__fix_to_virt(FIX_BTMAP_BEGIN) >> PUD_SHIFT);
granularity = PMD_SIZE;
}
BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT !=
__fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT);
offset = dt_phys % granularity;
offset = dt_phys % SWAPPER_BLOCK_SIZE;
dt_virt = (void *)dt_virt_base + offset;
/* map the first chunk so we can read the size from the header */
create_mapping(round_down(dt_phys, granularity), dt_virt_base,
granularity, prot);
create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
SWAPPER_BLOCK_SIZE, prot);
if (fdt_check_header(dt_virt) != 0)
return NULL;
......@@ -690,9 +735,9 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
if (size > MAX_FDT_SIZE)
return NULL;
if (offset + size > granularity)
create_mapping(round_down(dt_phys, granularity), dt_virt_base,
round_up(offset + size, granularity), prot);
if (offset + size > SWAPPER_BLOCK_SIZE)
create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
round_up(offset + size, SWAPPER_BLOCK_SIZE), prot);
memblock_reserve(dt_phys, size);
......
......@@ -45,7 +45,7 @@ static int change_memory_common(unsigned long addr, int numpages,
int ret;
struct page_change_data data;
if (!IS_ALIGNED(addr, PAGE_SIZE)) {
if (!PAGE_ALIGNED(addr)) {
start &= PAGE_MASK;
end = start + size;
WARN_ON_ONCE(1);
......
......@@ -28,8 +28,6 @@
#include "mm.h"
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
static struct kmem_cache *pgd_cache;
pgd_t *pgd_alloc(struct mm_struct *mm)
......
......@@ -30,7 +30,9 @@
#ifdef CONFIG_ARM64_64K_PAGES
#define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K
#else
#elif defined(CONFIG_ARM64_16K_PAGES)
#define TCR_TG_FLAGS TCR_TG0_16K | TCR_TG1_16K
#else /* CONFIG_ARM64_4K_PAGES */
#define TCR_TG_FLAGS TCR_TG0_4K | TCR_TG1_4K
#endif
......@@ -130,7 +132,7 @@ ENDPROC(cpu_do_resume)
* - pgd_phys - physical address of new TTB
*/
ENTRY(cpu_do_switch_mm)
mmid w1, x1 // get mm->context.id
mmid x1, x1 // get mm->context.id
bfi x0, x1, #48, #16 // set the ASID
msr ttbr0_el1, x0 // set TTBR0
isb
......@@ -146,8 +148,8 @@ ENDPROC(cpu_do_switch_mm)
* value of the SCTLR_EL1 register.
*/
ENTRY(__cpu_setup)
tlbi vmalle1is // invalidate I + D TLBs
dsb ish
tlbi vmalle1 // Invalidate local TLB
dsb nsh
mov x0, #3 << 20
msr cpacr_el1, x0 // Enable FP/ASIMD
......
#
# Makefile for linux kernel
#
#
# ARM64 maps efi runtime services in userspace addresses
# which don't have KASAN shadow. So dereference of these addresses
# in efi_call_virt() will cause crash if this code instrumented.
#
KASAN_SANITIZE_runtime-wrappers.o := n
obj-$(CONFIG_EFI) += efi.o vars.o reboot.o
obj-$(CONFIG_EFI_VARS) += efivars.o
obj-$(CONFIG_EFI_ESRT) += esrt.o
......
......@@ -14,6 +14,8 @@ cflags-$(CONFIG_ARM64) := $(subst -pg,,$(KBUILD_CFLAGS))
cflags-$(CONFIG_ARM) := $(subst -pg,,$(KBUILD_CFLAGS)) \
-fno-builtin -fpic -mno-single-pic-base
cflags-$(CONFIG_EFI_ARMSTUB) += -I$(srctree)/scripts/dtc/libfdt
KBUILD_CFLAGS := $(cflags-y) \
$(call cc-option,-ffreestanding) \
$(call cc-option,-fno-stack-protector)
......@@ -22,7 +24,18 @@ GCOV_PROFILE := n
KASAN_SANITIZE := n
lib-y := efi-stub-helper.o
lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o
# include the stub's generic dependencies from lib/ when building for ARM/arm64
arm-deps := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c fdt_empty_tree.c fdt_sw.c sort.c
$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
$(call if_changed_rule,cc_o_c)
lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o string.o \
$(patsubst %.c,lib-%.o,$(arm-deps))
lib-$(CONFIG_ARM64) += arm64-stub.o
CFLAGS_arm64-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
#
# arm64 puts the stub in the kernel proper, which will unnecessarily retain all
......@@ -30,10 +43,27 @@ lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o
# So let's apply the __init annotations at the section level, by prefixing
# the section names directly. This will ensure that even all the inline string
# literals are covered.
# The fact that the stub and the kernel proper are essentially the same binary
# also means that we need to be extra careful to make sure that the stub does
# not rely on any absolute symbol references, considering that the virtual
# kernel mapping that the linker uses is not active yet when the stub is
# executing. So build all C dependencies of the EFI stub into libstub, and do
# a verification pass to see if any absolute relocations exist in any of the
# object files.
#
extra-$(CONFIG_ARM64) := $(lib-y)
lib-$(CONFIG_ARM64) := $(patsubst %.o,%.init.o,$(lib-y))
extra-$(CONFIG_EFI_ARMSTUB) := $(lib-y)
lib-$(CONFIG_EFI_ARMSTUB) := $(patsubst %.o,%.stub.o,$(lib-y))
STUBCOPY_FLAGS-y := -R .debug* -R *ksymtab* -R *kcrctab*
STUBCOPY_FLAGS-$(CONFIG_ARM64) += --prefix-alloc-sections=.init \
--prefix-symbols=__efistub_
STUBCOPY_RELOC-$(CONFIG_ARM64) := R_AARCH64_ABS
$(obj)/%.stub.o: $(obj)/%.o FORCE
$(call if_changed,stubcopy)
OBJCOPYFLAGS := --prefix-alloc-sections=.init
$(obj)/%.init.o: $(obj)/%.o FORCE
$(call if_changed,objcopy)
quiet_cmd_stubcopy = STUBCPY $@
cmd_stubcopy = if $(OBJCOPY) $(STUBCOPY_FLAGS-y) $< $@; then \
$(OBJDUMP) -r $@ | grep $(STUBCOPY_RELOC-y) \
&& (echo >&2 "$@: absolute symbol references not allowed in the EFI stub"; \
rm -f $@; /bin/false); else /bin/false; fi
......@@ -147,15 +147,6 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
if (status)
goto fdt_set_fail;
/*
* Add kernel version banner so stub/kernel match can be
* verified.
*/
status = fdt_setprop_string(fdt, node, "linux,uefi-stub-kern-ver",
linux_banner);
if (status)
goto fdt_set_fail;
return EFI_SUCCESS;
fdt_set_fail:
......
/*
* Taken from:
* linux/lib/string.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*/
#include <linux/types.h>
#include <linux/string.h>
#ifndef __HAVE_ARCH_STRSTR
/**
* strstr - Find the first substring in a %NUL terminated string
* @s1: The string to be searched
* @s2: The string to search for
*/
char *strstr(const char *s1, const char *s2)
{
size_t l1, l2;
l2 = strlen(s2);
if (!l2)
return (char *)s1;
l1 = strlen(s1);
while (l1 >= l2) {
l1--;
if (!memcmp(s1, s2, l2))
return (char *)s1;
s1++;
}
return NULL;
}
#endif
#ifndef __HAVE_ARCH_STRNCMP
/**
* strncmp - Compare two length-limited strings
* @cs: One string
* @ct: Another string
* @count: The maximum number of bytes to compare
*/
int strncmp(const char *cs, const char *ct, size_t count)
{
unsigned char c1, c2;
while (count) {
c1 = *cs++;
c2 = *ct++;
if (c1 != c2)
return c1 < c2 ? -1 : 1;
if (!c1)
break;
count--;
}
return 0;
}
#endif
......@@ -5,7 +5,7 @@
menu "Performance monitor support"
config ARM_PMU
depends on PERF_EVENTS && ARM
depends on PERF_EVENTS && (ARM || ARM64)
bool "ARM PMU framework"
default y
help
......
......@@ -36,7 +36,7 @@ static bool migrate_one_irq(struct irq_desc *desc)
c = irq_data_get_irq_chip(d);
if (!c->irq_set_affinity) {
pr_warn_ratelimited("IRQ%u: unable to set affinity\n", d->irq);
pr_debug("IRQ%u: unable to set affinity\n", d->irq);
} else {
int r = irq_do_set_affinity(d, affinity, false);
if (r)
......
......@@ -5,10 +5,12 @@ else
call_threshold := 0
endif
KASAN_SHADOW_OFFSET ?= $(CONFIG_KASAN_SHADOW_OFFSET)
CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address
CFLAGS_KASAN := $(call cc-option, -fsanitize=kernel-address \
-fasan-shadow-offset=$(CONFIG_KASAN_SHADOW_OFFSET) \
-fasan-shadow-offset=$(KASAN_SHADOW_OFFSET) \
--param asan-stack=1 --param asan-globals=1 \
--param asan-instrumentation-with-call-threshold=$(call_threshold))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment