Commit d22fff81 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Ingo Molnar:

 - Extend the memmap= boot parameter syntax to allow the redeclaration
   and dropping of existing ranges, and to support all e820 range types
   (Jan H. Schönherr)

 - Improve the W+X boot time security checks to remove false positive
   warnings on Xen (Jan Beulich)

 - Support booting as Xen PVH guest (Juergen Gross)

 - Improved 5-level paging (LA57) support, in particular it's possible
   now to have a single kernel image for both 4-level and 5-level
   hardware (Kirill A. Shutemov)

 - AMD hardware RAM encryption support (SME/SEV) fixes (Tom Lendacky)

 - Preparatory commits for hardware-encrypted RAM support on Intel CPUs.
   (Kirill A. Shutemov)

 - Improved Intel-MID support (Andy Shevchenko)

 - Show EFI page tables in page_tables debug files (Andy Lutomirski)

 - ... plus misc fixes and smaller cleanups

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (56 commits)
  x86/cpu/tme: Fix spelling: "configuation" -> "configuration"
  x86/boot: Fix SEV boot failure from change to __PHYSICAL_MASK_SHIFT
  x86/mm: Update comment in detect_tme() regarding x86_phys_bits
  x86/mm/32: Remove unused node_memmap_size_bytes() & CONFIG_NEED_NODE_MEMMAP_SIZE logic
  x86/mm: Remove pointless checks in vmalloc_fault
  x86/platform/intel-mid: Add special handling for ACPI HW reduced platforms
  ACPI, x86/boot: Introduce the ->reduced_hw_early_init() ACPI callback
  ACPI, x86/boot: Split out acpi_generic_reduce_hw_init() and export
  x86/pconfig: Provide defines and helper to run MKTME_KEY_PROG leaf
  x86/pconfig: Detect PCONFIG targets
  x86/tme: Detect if TME and MKTME is activated by BIOS
  x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G
  x86/boot/compressed/64: Use page table in trampoline memory
  x86/boot/compressed/64: Use stack from trampoline memory
  x86/boot/compressed/64: Make sure we have a 32-bit code segment
  x86/mm: Do not use paravirtualized calls in native_set_p4d()
  kdump, vmcoreinfo: Export pgtable_l5_enabled value
  x86/boot/compressed/64: Prepare new top-level page table for trampoline
  x86/boot/compressed/64: Set up trampoline memory
  x86/boot/compressed/64: Save and restore trampoline memory
  ...
parents 986b37c0 eaeb8e76
...@@ -2248,6 +2248,15 @@ ...@@ -2248,6 +2248,15 @@
The memory region may be marked as e820 type 12 (0xc) The memory region may be marked as e820 type 12 (0xc)
and is NVDIMM or ADR memory. and is NVDIMM or ADR memory.
memmap=<size>%<offset>-<oldtype>+<newtype>
[KNL,ACPI] Convert memory within the specified region
from <oldtype> to <newtype>. If "-<oldtype>" is left
out, the whole region will be marked as <newtype>,
even if previously unavailable. If "+<newtype>" is left
out, matching memory will be removed. Types are
specified as e820 types, e.g., 1 = RAM, 2 = reserved,
3 = ACPI, 12 = PRAM.
memory_corruption_check=0/1 [X86] memory_corruption_check=0/1 [X86]
Some BIOSes seem to corrupt the first 64k of Some BIOSes seem to corrupt the first 64k of
memory when doing things like suspend/resume. memory when doing things like suspend/resume.
......
...@@ -20,12 +20,9 @@ Documentation/x86/x86_64/mm.txt ...@@ -20,12 +20,9 @@ Documentation/x86/x86_64/mm.txt
CONFIG_X86_5LEVEL=y enables the feature. CONFIG_X86_5LEVEL=y enables the feature.
So far, a kernel compiled with the option enabled will be able to boot Kernel with CONFIG_X86_5LEVEL=y still able to boot on 4-level hardware.
only on machines that supports the feature -- see for 'la57' flag in In this case additional page table level -- p4d -- will be folded at
/proc/cpuinfo. runtime.
The plan is to implement boot-time switching between 4- and 5-level paging
in the future.
== User-space and large virtual address space == == User-space and large virtual address space ==
......
...@@ -1461,6 +1461,8 @@ config X86_PAE ...@@ -1461,6 +1461,8 @@ config X86_PAE
config X86_5LEVEL config X86_5LEVEL
bool "Enable 5-level page tables support" bool "Enable 5-level page tables support"
select DYNAMIC_MEMORY_LAYOUT
select SPARSEMEM_VMEMMAP
depends on X86_64 depends on X86_64
---help--- ---help---
5-level paging enables access to larger address space: 5-level paging enables access to larger address space:
...@@ -1469,8 +1471,8 @@ config X86_5LEVEL ...@@ -1469,8 +1471,8 @@ config X86_5LEVEL
It will be supported by future Intel CPUs. It will be supported by future Intel CPUs.
Note: a kernel with this option enabled can only be booted A kernel with the option enabled can be booted on machines that
on machines that support the feature. support 4- or 5-level paging.
See Documentation/x86/x86_64/5level-paging.txt for more See Documentation/x86/x86_64/5level-paging.txt for more
information. information.
...@@ -1595,10 +1597,6 @@ config ARCH_HAVE_MEMORY_PRESENT ...@@ -1595,10 +1597,6 @@ config ARCH_HAVE_MEMORY_PRESENT
def_bool y def_bool y
depends on X86_32 && DISCONTIGMEM depends on X86_32 && DISCONTIGMEM
config NEED_NODE_MEMMAP_SIZE
def_bool y
depends on X86_32 && (DISCONTIGMEM || SPARSEMEM)
config ARCH_FLATMEM_ENABLE config ARCH_FLATMEM_ENABLE
def_bool y def_bool y
depends on X86_32 && !NUMA depends on X86_32 && !NUMA
...@@ -2174,10 +2172,17 @@ config PHYSICAL_ALIGN ...@@ -2174,10 +2172,17 @@ config PHYSICAL_ALIGN
Don't change this unless you know what you are doing. Don't change this unless you know what you are doing.
config DYNAMIC_MEMORY_LAYOUT
bool
---help---
This option makes base addresses of vmalloc and vmemmap as well as
__PAGE_OFFSET movable during boot.
config RANDOMIZE_MEMORY config RANDOMIZE_MEMORY
bool "Randomize the kernel memory sections" bool "Randomize the kernel memory sections"
depends on X86_64 depends on X86_64
depends on RANDOMIZE_BASE depends on RANDOMIZE_BASE
select DYNAMIC_MEMORY_LAYOUT
default RANDOMIZE_BASE default RANDOMIZE_BASE
---help--- ---help---
Randomizes the base virtual address of kernel memory sections Randomizes the base virtual address of kernel memory sections
......
...@@ -78,7 +78,7 @@ vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ ...@@ -78,7 +78,7 @@ vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
ifdef CONFIG_X86_64 ifdef CONFIG_X86_64
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr_64.o
vmlinux-objs-y += $(obj)/mem_encrypt.o vmlinux-objs-y += $(obj)/mem_encrypt.o
vmlinux-objs-y += $(obj)/pgtable_64.o vmlinux-objs-y += $(obj)/pgtable_64.o
endif endif
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <asm/processor-flags.h> #include <asm/processor-flags.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/bootparam.h> #include <asm/bootparam.h>
#include "pgtable.h"
/* /*
* Locally defined symbols should be marked hidden: * Locally defined symbols should be marked hidden:
...@@ -304,55 +305,77 @@ ENTRY(startup_64) ...@@ -304,55 +305,77 @@ ENTRY(startup_64)
/* Set up the stack */ /* Set up the stack */
leaq boot_stack_end(%rbx), %rsp leaq boot_stack_end(%rbx), %rsp
#ifdef CONFIG_X86_5LEVEL
/* /*
* Check if we need to enable 5-level paging. * At this point we are in long mode with 4-level paging enabled,
* RSI holds real mode data and need to be preserved across * but we might want to enable 5-level paging or vice versa.
* a function call. *
* The problem is that we cannot do it directly. Setting or clearing
* CR4.LA57 in long mode would trigger #GP. So we need to switch off
* long mode and paging first.
*
* We also need a trampoline in lower memory to switch over from
* 4- to 5-level paging for cases when the bootloader puts the kernel
* above 4G, but didn't enable 5-level paging for us.
*
* The same trampoline can be used to switch from 5- to 4-level paging
* mode, like when starting 4-level paging kernel via kexec() when
* original kernel worked in 5-level paging mode.
*
* For the trampoline, we need the top page table to reside in lower
* memory as we don't have a way to load 64-bit values into CR3 in
* 32-bit mode.
*
* We go though the trampoline even if we don't have to: if we're
* already in a desired paging mode. This way the trampoline code gets
* tested on every boot.
*/ */
pushq %rsi
call l5_paging_required
popq %rsi
/* If l5_paging_required() returned zero, we're done here. */ /* Make sure we have GDT with 32-bit code segment */
cmpq $0, %rax leaq gdt(%rip), %rax
je lvl5 movq %rax, gdt64+2(%rip)
lgdt gdt64(%rip)
/* /*
* At this point we are in long mode with 4-level paging enabled, * paging_prepare() sets up the trampoline and checks if we need to
* but we want to enable 5-level paging. * enable 5-level paging.
* *
* The problem is that we cannot do it directly. Setting LA57 in * Address of the trampoline is returned in RAX.
* long mode would trigger #GP. So we need to switch off long mode * Non zero RDX on return means we need to enable 5-level paging.
* first.
* *
* NOTE: This is not going to work if bootloader put us above 4G * RSI holds real mode data and needs to be preserved across
* limit. * this function call.
*
* The first step is go into compatibility mode.
*/ */
pushq %rsi
call paging_prepare
popq %rsi
/* Clear additional page table */ /* Save the trampoline address in RCX */
leaq lvl5_pgtable(%rbx), %rdi movq %rax, %rcx
xorq %rax, %rax
movq $(PAGE_SIZE/8), %rcx
rep stosq
/* /*
* Setup current CR3 as the first and only entry in a new top level * Load the address of trampoline_return() into RDI.
* page table. * It will be used by the trampoline to return to the main code.
*/ */
movq %cr3, %rdi leaq trampoline_return(%rip), %rdi
leaq 0x7 (%rdi), %rax
movq %rax, lvl5_pgtable(%rbx)
/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
pushq $__KERNEL32_CS pushq $__KERNEL32_CS
leaq compatible_mode(%rip), %rax leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
pushq %rax pushq %rax
lretq lretq
lvl5: trampoline_return:
#endif /* Restore the stack, the 32-bit trampoline uses its own stack */
leaq boot_stack_end(%rbx), %rsp
/*
* cleanup_trampoline() would restore trampoline memory.
*
* RSI holds real mode data and needs to be preserved across
* this function call.
*/
pushq %rsi
call cleanup_trampoline
popq %rsi
/* Zero EFLAGS */ /* Zero EFLAGS */
pushq $0 pushq $0
...@@ -490,46 +513,82 @@ relocated: ...@@ -490,46 +513,82 @@ relocated:
jmp *%rax jmp *%rax
.code32 .code32
#ifdef CONFIG_X86_5LEVEL /*
compatible_mode: * This is the 32-bit trampoline that will be copied over to low memory.
/* Setup data and stack segments */ *
* RDI contains the return address (might be above 4G).
* ECX contains the base address of the trampoline memory.
* Non zero RDX on return means we need to enable 5-level paging.
*/
ENTRY(trampoline_32bit_src)
/* Set up data and stack segments */
movl $__KERNEL_DS, %eax movl $__KERNEL_DS, %eax
movl %eax, %ds movl %eax, %ds
movl %eax, %ss movl %eax, %ss
/* Set up new stack */
leal TRAMPOLINE_32BIT_STACK_END(%ecx), %esp
/* Disable paging */ /* Disable paging */
movl %cr0, %eax movl %cr0, %eax
btrl $X86_CR0_PG_BIT, %eax btrl $X86_CR0_PG_BIT, %eax
movl %eax, %cr0 movl %eax, %cr0
/* Point CR3 to 5-level paging */ /* Check what paging mode we want to be in after the trampoline */
leal lvl5_pgtable(%ebx), %eax cmpl $0, %edx
movl %eax, %cr3 jz 1f
/* Enable PAE and LA57 mode */ /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
movl %cr4, %eax
testl $X86_CR4_LA57, %eax
jnz 3f
jmp 2f
1:
/* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */
movl %cr4, %eax movl %cr4, %eax
orl $(X86_CR4_PAE | X86_CR4_LA57), %eax testl $X86_CR4_LA57, %eax
jz 3f
2:
/* Point CR3 to the trampoline's new top level page table */
leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
movl %eax, %cr3
3:
/* Enable PAE and LA57 (if required) paging modes */
movl $X86_CR4_PAE, %eax
cmpl $0, %edx
jz 1f
orl $X86_CR4_LA57, %eax
1:
movl %eax, %cr4 movl %eax, %cr4
/* Calculate address we are running at */ /* Calculate address of paging_enabled() once we are executing in the trampoline */
call 1f leal paging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
1: popl %edi
subl $1b, %edi
/* Prepare stack for far return to Long Mode */ /* Prepare the stack for far return to Long Mode */
pushl $__KERNEL_CS pushl $__KERNEL_CS
leal lvl5(%edi), %eax pushl %eax
push %eax
/* Enable paging back */ /* Enable paging again */
movl $(X86_CR0_PG | X86_CR0_PE), %eax movl $(X86_CR0_PG | X86_CR0_PE), %eax
movl %eax, %cr0 movl %eax, %cr0
lret lret
#endif
.code64
paging_enabled:
/* Return from the trampoline */
jmp *%rdi
/*
* The trampoline code has a size limit.
* Make sure we fail to compile if the trampoline code grows
* beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
*/
.org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
.code32
no_longmode: no_longmode:
/* This isn't an x86-64 CPU so hang */ /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
1: 1:
hlt hlt
jmp 1b jmp 1b
...@@ -537,6 +596,11 @@ no_longmode: ...@@ -537,6 +596,11 @@ no_longmode:
#include "../../kernel/verify_cpu.S" #include "../../kernel/verify_cpu.S"
.data .data
gdt64:
.word gdt_end - gdt
.long 0
.word 0
.quad 0
gdt: gdt:
.word gdt_end - gdt .word gdt_end - gdt
.long gdt .long gdt
...@@ -585,7 +649,3 @@ boot_stack_end: ...@@ -585,7 +649,3 @@ boot_stack_end:
.balign 4096 .balign 4096
pgtable: pgtable:
.fill BOOT_PGT_SIZE, 1, 0 .fill BOOT_PGT_SIZE, 1, 0
#ifdef CONFIG_X86_5LEVEL
lvl5_pgtable:
.fill PAGE_SIZE, 1, 0
#endif
...@@ -46,6 +46,12 @@ ...@@ -46,6 +46,12 @@
#define STATIC #define STATIC
#include <linux/decompress/mm.h> #include <linux/decompress/mm.h>
#ifdef CONFIG_X86_5LEVEL
unsigned int pgtable_l5_enabled __ro_after_init;
unsigned int pgdir_shift __ro_after_init = 39;
unsigned int ptrs_per_p4d __ro_after_init = 1;
#endif
extern unsigned long get_cmd_line_ptr(void); extern unsigned long get_cmd_line_ptr(void);
/* Simplified build-specific string for starting entropy. */ /* Simplified build-specific string for starting entropy. */
...@@ -723,6 +729,14 @@ void choose_random_location(unsigned long input, ...@@ -723,6 +729,14 @@ void choose_random_location(unsigned long input,
return; return;
} }
#ifdef CONFIG_X86_5LEVEL
if (__read_cr4() & X86_CR4_LA57) {
pgtable_l5_enabled = 1;
pgdir_shift = 48;
ptrs_per_p4d = 512;
}
#endif
boot_params->hdr.loadflags |= KASLR_FLAG; boot_params->hdr.loadflags |= KASLR_FLAG;
/* Prepare to add new identity pagetables on demand. */ /* Prepare to add new identity pagetables on demand. */
......
...@@ -16,13 +16,6 @@ ...@@ -16,13 +16,6 @@
#define __pa(x) ((unsigned long)(x)) #define __pa(x) ((unsigned long)(x))
#define __va(x) ((void *)((unsigned long)(x))) #define __va(x) ((void *)((unsigned long)(x)))
/*
* The pgtable.h and mm/ident_map.c includes make use of the SME related
* information which is not used in the compressed image support. Un-define
* the SME support to avoid any compile and link errors.
*/
#undef CONFIG_AMD_MEM_ENCRYPT
/* No PAGE_TABLE_ISOLATION support needed either: */ /* No PAGE_TABLE_ISOLATION support needed either: */
#undef CONFIG_PAGE_TABLE_ISOLATION #undef CONFIG_PAGE_TABLE_ISOLATION
...@@ -85,13 +78,14 @@ static struct x86_mapping_info mapping_info; ...@@ -85,13 +78,14 @@ static struct x86_mapping_info mapping_info;
/* Locates and clears a region for a new top level page table. */ /* Locates and clears a region for a new top level page table. */
void initialize_identity_maps(void) void initialize_identity_maps(void)
{ {
unsigned long sev_me_mask = get_sev_encryption_mask(); /* If running as an SEV guest, the encryption mask is required. */
set_sev_encryption_mask();
/* Init mapping_info with run-time function/buffer pointers. */ /* Init mapping_info with run-time function/buffer pointers. */
mapping_info.alloc_pgt_page = alloc_pgt_page; mapping_info.alloc_pgt_page = alloc_pgt_page;
mapping_info.context = &pgt_data; mapping_info.context = &pgt_data;
mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sev_me_mask; mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask;
mapping_info.kernpg_flag = _KERNPG_TABLE | sev_me_mask; mapping_info.kernpg_flag = _KERNPG_TABLE;
/* /*
* It should be impossible for this not to already be true, * It should be impossible for this not to already be true,
......
...@@ -88,9 +88,7 @@ ENTRY(get_sev_encryption_bit) ...@@ -88,9 +88,7 @@ ENTRY(get_sev_encryption_bit)
ENDPROC(get_sev_encryption_bit) ENDPROC(get_sev_encryption_bit)
.code64 .code64
ENTRY(get_sev_encryption_mask) ENTRY(set_sev_encryption_mask)
xor %rax, %rax
#ifdef CONFIG_AMD_MEM_ENCRYPT #ifdef CONFIG_AMD_MEM_ENCRYPT
push %rbp push %rbp
push %rdx push %rdx
...@@ -101,9 +99,7 @@ ENTRY(get_sev_encryption_mask) ...@@ -101,9 +99,7 @@ ENTRY(get_sev_encryption_mask)
testl %eax, %eax testl %eax, %eax
jz .Lno_sev_mask jz .Lno_sev_mask
xor %rdx, %rdx bts %rax, sme_me_mask(%rip) /* Create the encryption mask */
bts %rax, %rdx /* Create the encryption mask */
mov %rdx, %rax /* ... and return it */
.Lno_sev_mask: .Lno_sev_mask:
movq %rbp, %rsp /* Restore original stack pointer */ movq %rbp, %rsp /* Restore original stack pointer */
...@@ -112,9 +108,16 @@ ENTRY(get_sev_encryption_mask) ...@@ -112,9 +108,16 @@ ENTRY(get_sev_encryption_mask)
pop %rbp pop %rbp
#endif #endif
xor %rax, %rax
ret ret
ENDPROC(get_sev_encryption_mask) ENDPROC(set_sev_encryption_mask)
.data .data
enc_bit: enc_bit:
.int 0xffffffff .int 0xffffffff
#ifdef CONFIG_AMD_MEM_ENCRYPT
.balign 8
GLOBAL(sme_me_mask)
.quad 0
#endif
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include "misc.h" #include "misc.h"
#include "error.h" #include "error.h"
#include "pgtable.h"
#include "../string.h" #include "../string.h"
#include "../voffset.h" #include "../voffset.h"
...@@ -169,16 +170,6 @@ void __puthex(unsigned long value) ...@@ -169,16 +170,6 @@ void __puthex(unsigned long value)
} }
} }
static bool l5_supported(void)
{
/* Check if leaf 7 is supported. */
if (native_cpuid_eax(0) < 7)
return 0;
/* Check if la57 is supported. */
return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31));
}
#if CONFIG_X86_NEED_RELOCS #if CONFIG_X86_NEED_RELOCS
static void handle_relocations(void *output, unsigned long output_len, static void handle_relocations(void *output, unsigned long output_len,
unsigned long virt_addr) unsigned long virt_addr)
...@@ -376,12 +367,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, ...@@ -376,12 +367,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
console_init(); console_init();
debug_putstr("early console in extract_kernel\n"); debug_putstr("early console in extract_kernel\n");
if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) {
error("This linux kernel as configured requires 5-level paging\n"
"This CPU does not support the required 'cr4.la57' feature\n"
"Unable to boot - please use a kernel appropriate for your CPU\n");
}
free_mem_ptr = heap; /* Heap */ free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE; free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
...@@ -392,6 +377,11 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, ...@@ -392,6 +377,11 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
debug_putaddr(output_len); debug_putaddr(output_len);
debug_putaddr(kernel_total_size); debug_putaddr(kernel_total_size);
#ifdef CONFIG_X86_64
/* Report address of 32-bit trampoline */
debug_putaddr(trampoline_32bit);
#endif
/* /*
* The memory hole needed for the kernel is the larger of either * The memory hole needed for the kernel is the larger of either
* the entire decompressed kernel plus relocation table, or the * the entire decompressed kernel plus relocation table, or the
......
...@@ -12,6 +12,11 @@ ...@@ -12,6 +12,11 @@
#undef CONFIG_PARAVIRT_SPINLOCKS #undef CONFIG_PARAVIRT_SPINLOCKS
#undef CONFIG_KASAN #undef CONFIG_KASAN
#ifdef CONFIG_X86_5LEVEL
/* cpu_feature_enabled() cannot be used that early */
#define pgtable_l5_enabled __pgtable_l5_enabled
#endif
#include <linux/linkage.h> #include <linux/linkage.h>
#include <linux/screen_info.h> #include <linux/screen_info.h>
#include <linux/elf.h> #include <linux/elf.h>
...@@ -109,6 +114,6 @@ static inline void console_init(void) ...@@ -109,6 +114,6 @@ static inline void console_init(void)
{ } { }
#endif #endif
unsigned long get_sev_encryption_mask(void); void set_sev_encryption_mask(void);
#endif #endif
#ifndef BOOT_COMPRESSED_PAGETABLE_H
#define BOOT_COMPRESSED_PAGETABLE_H
#define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE)
#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0
#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE
#define TRAMPOLINE_32BIT_CODE_SIZE 0x60
#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE
#ifndef __ASSEMBLER__
extern unsigned long *trampoline_32bit;
extern void trampoline_32bit_src(void *return_ptr);
#endif /* __ASSEMBLER__ */
#endif /* BOOT_COMPRESSED_PAGETABLE_H */
#include <asm/processor.h> #include <asm/processor.h>
#include "pgtable.h"
#include "../string.h"
/* /*
* __force_order is used by special_insns.h asm code to force instruction * __force_order is used by special_insns.h asm code to force instruction
...@@ -9,20 +11,144 @@ ...@@ -9,20 +11,144 @@
*/ */
unsigned long __force_order; unsigned long __force_order;
int l5_paging_required(void) #define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
struct paging_config {
unsigned long trampoline_start;
unsigned long l5_required;
};
/* Buffer to preserve trampoline memory */
static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
/*
* The page table is going to be used instead of page table in the trampoline
* memory.
*
* It must not be in BSS as BSS is cleared after cleanup_trampoline().
*/
static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data);
/*
* Trampoline address will be printed by extract_kernel() for debugging
* purposes.
*
* Avoid putting the pointer into .bss as it will be cleared between
* paging_prepare() and extract_kernel().
*/
unsigned long *trampoline_32bit __section(.data);
struct paging_config paging_prepare(void)
{ {
/* Check if leaf 7 is supported. */ struct paging_config paging_config = {};
unsigned long bios_start, ebda_start;
/*
* Check if LA57 is desired and supported.
*
* There are two parts to the check:
* - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y
* - if the machine supports 5-level paging:
* + CPUID leaf 7 is supported
* + the leaf has the feature bit set
*
* That's substitute for boot_cpu_has() in early boot code.
*/
if (IS_ENABLED(CONFIG_X86_5LEVEL) &&
native_cpuid_eax(0) >= 7 &&
(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
paging_config.l5_required = 1;
}
/*
* Find a suitable spot for the trampoline.
* This code is based on reserve_bios_regions().
*/
ebda_start = *(unsigned short *)0x40e << 4;
bios_start = *(unsigned short *)0x413 << 10;
if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX)
bios_start = BIOS_START_MAX;
if (ebda_start > BIOS_START_MIN && ebda_start < bios_start)
bios_start = ebda_start;
if (native_cpuid_eax(0) < 7) /* Place the trampoline just below the end of low memory, aligned to 4k */
return 0; paging_config.trampoline_start = bios_start - TRAMPOLINE_32BIT_SIZE;
paging_config.trampoline_start = round_down(paging_config.trampoline_start, PAGE_SIZE);
/* Check if la57 is supported. */ trampoline_32bit = (unsigned long *)paging_config.trampoline_start;
if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
return 0;
/* Check if 5-level paging has already been enabled. */ /* Preserve trampoline memory */
if (native_read_cr4() & X86_CR4_LA57) memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE);
return 0;
/* Clear trampoline memory first */
memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);
/* Copy trampoline code in place */
memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
&trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);
/*
* The code below prepares page table in trampoline memory.
*
* The new page table will be used by trampoline code for switching
* from 4- to 5-level paging or vice versa.
*
* If switching is not required, the page table is unused: trampoline
* code wouldn't touch CR3.
*/
/*
* We are not going to use the page table in trampoline memory if we
* are already in the desired paging mode.
*/
if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57))
goto out;
if (paging_config.l5_required) {
/*
* For 4- to 5-level paging transition, set up current CR3 as
* the first and the only entry in a new top-level page table.
*/
trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC;
} else {
unsigned long src;
/*
* For 5- to 4-level paging transition, copy page table pointed
* by first entry in the current top-level page table as our
* new top-level page table.
*
* We cannot just point to the page table from trampoline as it
* may be above 4G.
*/
src = *(unsigned long *)__native_read_cr3() & PAGE_MASK;
memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long),
(void *)src, PAGE_SIZE);
}
out:
return paging_config;
}
void cleanup_trampoline(void)
{
void *trampoline_pgtable;
trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET;
/*
* Move the top level page table out of trampoline memory,
* if it's there.
*/
if ((void *)__native_read_cr3() == trampoline_pgtable) {
memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE);
native_write_cr3((unsigned long)top_pgtable);
}
return 1; /* Restore trampoline memory */
memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);
} }
...@@ -260,8 +260,13 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) ...@@ -260,8 +260,13 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
* Change top bits to match most significant bit (47th or 56th bit * Change top bits to match most significant bit (47th or 56th bit
* depending on paging mode) in the address. * depending on paging mode) in the address.
*/ */
#ifdef CONFIG_X86_5LEVEL
ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
"shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
#else
shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
#endif
/* If this changed %rcx, it was not canonical */ /* If this changed %rcx, it was not canonical */
cmpq %rcx, %r11 cmpq %rcx, %r11
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include <asm/mmu.h> #include <asm/mmu.h>
#include <asm/mpspec.h> #include <asm/mpspec.h>
#include <asm/realmode.h> #include <asm/realmode.h>
#include <asm/x86_init.h>
#ifdef CONFIG_ACPI_APEI #ifdef CONFIG_ACPI_APEI
# include <asm/pgtable_types.h> # include <asm/pgtable_types.h>
...@@ -133,6 +134,14 @@ static inline bool acpi_has_cpu_in_madt(void) ...@@ -133,6 +134,14 @@ static inline bool acpi_has_cpu_in_madt(void)
return !!acpi_lapic; return !!acpi_lapic;
} }
#define ACPI_HAVE_ARCH_GET_ROOT_POINTER
static inline u64 acpi_arch_get_root_pointer(void)
{
return x86_init.acpi.get_root_pointer();
}
void acpi_generic_reduced_hw_init(void);
#else /* !CONFIG_ACPI */ #else /* !CONFIG_ACPI */
#define acpi_lapic 0 #define acpi_lapic 0
...@@ -142,6 +151,8 @@ static inline void acpi_noirq_set(void) { } ...@@ -142,6 +151,8 @@ static inline void acpi_noirq_set(void) { }
static inline void acpi_disable_pci(void) { } static inline void acpi_disable_pci(void) { }
static inline void disable_acpi(void) { } static inline void disable_acpi(void) { }
static inline void acpi_generic_reduced_hw_init(void) { }
#endif /* !CONFIG_ACPI */ #endif /* !CONFIG_ACPI */
#define ARCH_HAS_POWER_INIT 1 #define ARCH_HAS_POWER_INIT 1
......
#ifndef _ASM_X86_INTEL_PCONFIG_H
#define _ASM_X86_INTEL_PCONFIG_H
#include <asm/asm.h>
#include <asm/processor.h>
enum pconfig_target {
INVALID_TARGET = 0,
MKTME_TARGET = 1,
PCONFIG_TARGET_NR
};
int pconfig_target_supported(enum pconfig_target target);
enum pconfig_leaf {
MKTME_KEY_PROGRAM = 0,
PCONFIG_LEAF_INVALID,
};
#define PCONFIG ".byte 0x0f, 0x01, 0xc5"
/* Defines and structure for MKTME_KEY_PROGRAM of PCONFIG instruction */
/* mktme_key_program::keyid_ctrl COMMAND, bits [7:0] */
#define MKTME_KEYID_SET_KEY_DIRECT 0
#define MKTME_KEYID_SET_KEY_RANDOM 1
#define MKTME_KEYID_CLEAR_KEY 2
#define MKTME_KEYID_NO_ENCRYPT 3
/* mktme_key_program::keyid_ctrl ENC_ALG, bits [23:8] */
#define MKTME_AES_XTS_128 (1 << 8)
/* Return codes from the PCONFIG MKTME_KEY_PROGRAM */
#define MKTME_PROG_SUCCESS 0
#define MKTME_INVALID_PROG_CMD 1
#define MKTME_ENTROPY_ERROR 2
#define MKTME_INVALID_KEYID 3
#define MKTME_INVALID_ENC_ALG 4
#define MKTME_DEVICE_BUSY 5
/* Hardware requires the structure to be 256 byte alinged. Otherwise #GP(0). */
struct mktme_key_program {
u16 keyid;
u32 keyid_ctrl;
u8 __rsvd[58];
u8 key_field_1[64];
u8 key_field_2[64];
} __packed __aligned(256);
static inline int mktme_key_program(struct mktme_key_program *key_program)
{
unsigned long rax = MKTME_KEY_PROGRAM;
if (!pconfig_target_supported(MKTME_TARGET))
return -ENXIO;
asm volatile(PCONFIG
: "=a" (rax), "=b" (key_program)
: "0" (rax), "1" (key_program)
: "memory", "cc");
return rax;
}
#endif /* _ASM_X86_INTEL_PCONFIG_H */
...@@ -5,10 +5,6 @@ ...@@ -5,10 +5,6 @@
unsigned long kaslr_get_random_long(const char *purpose); unsigned long kaslr_get_random_long(const char *purpose);
#ifdef CONFIG_RANDOMIZE_MEMORY #ifdef CONFIG_RANDOMIZE_MEMORY
extern unsigned long page_offset_base;
extern unsigned long vmalloc_base;
extern unsigned long vmemmap_base;
void kernel_randomize_memory(void); void kernel_randomize_memory(void);
#else #else
static inline void kernel_randomize_memory(void) { } static inline void kernel_randomize_memory(void) { }
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#ifdef CONFIG_AMD_MEM_ENCRYPT #ifdef CONFIG_AMD_MEM_ENCRYPT
extern u64 sme_me_mask; extern u64 sme_me_mask;
extern bool sev_enabled;
void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr, void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
unsigned long decrypted_kernel_vaddr, unsigned long decrypted_kernel_vaddr,
......
...@@ -11,6 +11,10 @@ ...@@ -11,6 +11,10 @@
extern unsigned long max_pfn; extern unsigned long max_pfn;
extern unsigned long phys_base; extern unsigned long phys_base;
extern unsigned long page_offset_base;
extern unsigned long vmalloc_base;
extern unsigned long vmemmap_base;
static inline unsigned long __phys_addr_nodebug(unsigned long x) static inline unsigned long __phys_addr_nodebug(unsigned long x)
{ {
unsigned long y = x - __START_KERNEL_map; unsigned long y = x - __START_KERNEL_map;
......
...@@ -37,26 +37,24 @@ ...@@ -37,26 +37,24 @@
* hypervisor to fit. Choosing 16 slots here is arbitrary, but it's * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's
* what Xen requires. * what Xen requires.
*/ */
#ifdef CONFIG_X86_5LEVEL #define __PAGE_OFFSET_BASE_L5 _AC(0xff10000000000000, UL)
#define __PAGE_OFFSET_BASE _AC(0xff10000000000000, UL) #define __PAGE_OFFSET_BASE_L4 _AC(0xffff880000000000, UL)
#else
#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL)
#endif
#ifdef CONFIG_RANDOMIZE_MEMORY #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
#define __PAGE_OFFSET page_offset_base #define __PAGE_OFFSET page_offset_base
#else #else
#define __PAGE_OFFSET __PAGE_OFFSET_BASE #define __PAGE_OFFSET __PAGE_OFFSET_BASE_L4
#endif /* CONFIG_RANDOMIZE_MEMORY */ #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
#define __START_KERNEL_map _AC(0xffffffff80000000, UL) #define __START_KERNEL_map _AC(0xffffffff80000000, UL)
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
#ifdef CONFIG_X86_5LEVEL
#define __PHYSICAL_MASK_SHIFT 52 #define __PHYSICAL_MASK_SHIFT 52
#define __VIRTUAL_MASK_SHIFT 56
#ifdef CONFIG_X86_5LEVEL
#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled ? 56 : 47)
#else #else
#define __PHYSICAL_MASK_SHIFT 46
#define __VIRTUAL_MASK_SHIFT 47 #define __VIRTUAL_MASK_SHIFT 47
#endif #endif
......
...@@ -568,17 +568,22 @@ static inline p4dval_t p4d_val(p4d_t p4d) ...@@ -568,17 +568,22 @@ static inline p4dval_t p4d_val(p4d_t p4d)
return PVOP_CALLEE1(p4dval_t, pv_mmu_ops.p4d_val, p4d.p4d); return PVOP_CALLEE1(p4dval_t, pv_mmu_ops.p4d_val, p4d.p4d);
} }
static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
{ {
pgdval_t val = native_pgd_val(pgd); PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, native_pgd_val(pgd));
PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, val);
} }
static inline void pgd_clear(pgd_t *pgdp) #define set_pgd(pgdp, pgdval) do { \
{ if (pgtable_l5_enabled) \
set_pgd(pgdp, __pgd(0)); __set_pgd(pgdp, pgdval); \
} else \
set_p4d((p4d_t *)(pgdp), (p4d_t) { (pgdval).pgd }); \
} while (0)
#define pgd_clear(pgdp) do { \
if (pgtable_l5_enabled) \
set_pgd(pgdp, __pgd(0)); \
} while (0)
#endif /* CONFIG_PGTABLE_LEVELS == 5 */ #endif /* CONFIG_PGTABLE_LEVELS == 5 */
......
...@@ -167,6 +167,8 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, ...@@ -167,6 +167,8 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
#if CONFIG_PGTABLE_LEVELS > 4 #if CONFIG_PGTABLE_LEVELS > 4
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
{ {
if (!pgtable_l5_enabled)
return;
paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT); paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
} }
...@@ -191,6 +193,7 @@ extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d); ...@@ -191,6 +193,7 @@ extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d);
static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
unsigned long address) unsigned long address)
{ {
if (pgtable_l5_enabled)
___p4d_free_tlb(tlb, p4d); ___p4d_free_tlb(tlb, p4d);
} }
......
...@@ -44,5 +44,6 @@ typedef union { ...@@ -44,5 +44,6 @@ typedef union {
*/ */
#define PTRS_PER_PTE 512 #define PTRS_PER_PTE 512
#define MAX_POSSIBLE_PHYSMEM_BITS 36
#endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */ #endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */
...@@ -65,7 +65,7 @@ extern pmdval_t early_pmd_flags; ...@@ -65,7 +65,7 @@ extern pmdval_t early_pmd_flags;
#ifndef __PAGETABLE_P4D_FOLDED #ifndef __PAGETABLE_P4D_FOLDED
#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) #define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd)
#define pgd_clear(pgd) native_pgd_clear(pgd) #define pgd_clear(pgd) (pgtable_l5_enabled ? native_pgd_clear(pgd) : 0)
#endif #endif
#ifndef set_p4d #ifndef set_p4d
...@@ -859,6 +859,8 @@ static inline unsigned long p4d_index(unsigned long address) ...@@ -859,6 +859,8 @@ static inline unsigned long p4d_index(unsigned long address)
#if CONFIG_PGTABLE_LEVELS > 4 #if CONFIG_PGTABLE_LEVELS > 4
static inline int pgd_present(pgd_t pgd) static inline int pgd_present(pgd_t pgd)
{ {
if (!pgtable_l5_enabled)
return 1;
return pgd_flags(pgd) & _PAGE_PRESENT; return pgd_flags(pgd) & _PAGE_PRESENT;
} }
...@@ -876,6 +878,8 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) ...@@ -876,6 +878,8 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
/* to find an entry in a page-table-directory. */ /* to find an entry in a page-table-directory. */
static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{ {
if (!pgtable_l5_enabled)
return (p4d_t *)pgd;
return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address); return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address);
} }
...@@ -883,6 +887,9 @@ static inline int pgd_bad(pgd_t pgd) ...@@ -883,6 +887,9 @@ static inline int pgd_bad(pgd_t pgd)
{ {
unsigned long ignore_flags = _PAGE_USER; unsigned long ignore_flags = _PAGE_USER;
if (!pgtable_l5_enabled)
return 0;
if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
ignore_flags |= _PAGE_NX; ignore_flags |= _PAGE_NX;
...@@ -891,6 +898,8 @@ static inline int pgd_bad(pgd_t pgd) ...@@ -891,6 +898,8 @@ static inline int pgd_bad(pgd_t pgd)
static inline int pgd_none(pgd_t pgd) static inline int pgd_none(pgd_t pgd)
{ {
if (!pgtable_l5_enabled)
return 0;
/* /*
* There is no need to do a workaround for the KNL stray * There is no need to do a workaround for the KNL stray
* A/D bit erratum here. PGDs only point to page tables * A/D bit erratum here. PGDs only point to page tables
......
...@@ -34,6 +34,8 @@ static inline void check_pgt_cache(void) { } ...@@ -34,6 +34,8 @@ static inline void check_pgt_cache(void) { }
void paging_init(void); void paging_init(void);
void sync_initial_page_table(void); void sync_initial_page_table(void);
static inline int pgd_large(pgd_t pgd) { return 0; }
/* /*
* Define this if things work differently on an i386 and an i486: * Define this if things work differently on an i386 and an i486:
* it will (on an i486) warn about kernel memory accesses that are * it will (on an i486) warn about kernel memory accesses that are
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
# include <asm/pgtable-2level_types.h> # include <asm/pgtable-2level_types.h>
#endif #endif
#define pgtable_l5_enabled 0
#define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1)) #define PGDIR_MASK (~(PGDIR_SIZE - 1))
......
...@@ -218,29 +218,26 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) ...@@ -218,29 +218,26 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
{ {
#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL) pgd_t pgd;
p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
#else if (pgtable_l5_enabled || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
*p4dp = p4d; *p4dp = p4d;
#endif return;
}
pgd = native_make_pgd(native_p4d_val(p4d));
pgd = pti_set_user_pgd((pgd_t *)p4dp, pgd);
*p4dp = native_make_p4d(native_pgd_val(pgd));
} }
static inline void native_p4d_clear(p4d_t *p4d) static inline void native_p4d_clear(p4d_t *p4d)
{ {
#ifdef CONFIG_X86_5LEVEL
native_set_p4d(p4d, native_make_p4d(0)); native_set_p4d(p4d, native_make_p4d(0));
#else
native_set_p4d(p4d, (p4d_t) { .pgd = native_make_pgd(0)});
#endif
} }
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{ {
#ifdef CONFIG_PAGE_TABLE_ISOLATION
*pgdp = pti_set_user_pgd(pgdp, pgd); *pgdp = pti_set_user_pgd(pgdp, pgd);
#else
*pgdp = pgd;
#endif
} }
static inline void native_pgd_clear(pgd_t *pgd) static inline void native_pgd_clear(pgd_t *pgd)
......
...@@ -20,6 +20,18 @@ typedef unsigned long pgprotval_t; ...@@ -20,6 +20,18 @@ typedef unsigned long pgprotval_t;
typedef struct { pteval_t pte; } pte_t; typedef struct { pteval_t pte; } pte_t;
#ifdef CONFIG_X86_5LEVEL
extern unsigned int __pgtable_l5_enabled;
#ifndef pgtable_l5_enabled
#define pgtable_l5_enabled cpu_feature_enabled(X86_FEATURE_LA57)
#endif
#else
#define pgtable_l5_enabled 0
#endif
extern unsigned int pgdir_shift;
extern unsigned int ptrs_per_p4d;
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#define SHARED_KERNEL_PMD 0 #define SHARED_KERNEL_PMD 0
...@@ -29,17 +41,20 @@ typedef struct { pteval_t pte; } pte_t; ...@@ -29,17 +41,20 @@ typedef struct { pteval_t pte; } pte_t;
/* /*
* PGDIR_SHIFT determines what a top-level page table entry can map * PGDIR_SHIFT determines what a top-level page table entry can map
*/ */
#define PGDIR_SHIFT 48 #define PGDIR_SHIFT pgdir_shift
#define PTRS_PER_PGD 512 #define PTRS_PER_PGD 512
/* /*
* 4th level page in 5-level paging case * 4th level page in 5-level paging case
*/ */
#define P4D_SHIFT 39 #define P4D_SHIFT 39
#define PTRS_PER_P4D 512 #define MAX_PTRS_PER_P4D 512
#define PTRS_PER_P4D ptrs_per_p4d
#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) #define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
#define P4D_MASK (~(P4D_SIZE - 1)) #define P4D_MASK (~(P4D_SIZE - 1))
#define MAX_POSSIBLE_PHYSMEM_BITS 52
#else /* CONFIG_X86_5LEVEL */ #else /* CONFIG_X86_5LEVEL */
/* /*
...@@ -47,6 +62,7 @@ typedef struct { pteval_t pte; } pte_t; ...@@ -47,6 +62,7 @@ typedef struct { pteval_t pte; } pte_t;
*/ */
#define PGDIR_SHIFT 39 #define PGDIR_SHIFT 39
#define PTRS_PER_PGD 512 #define PTRS_PER_PGD 512
#define MAX_PTRS_PER_P4D 1
#endif /* CONFIG_X86_5LEVEL */ #endif /* CONFIG_X86_5LEVEL */
...@@ -82,31 +98,33 @@ typedef struct { pteval_t pte; } pte_t; ...@@ -82,31 +98,33 @@ typedef struct { pteval_t pte; } pte_t;
* range must not overlap with anything except the KASAN shadow area, which * range must not overlap with anything except the KASAN shadow area, which
* is correct as KASAN disables KASLR. * is correct as KASAN disables KASLR.
*/ */
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) #define MAXMEM (1UL << MAX_PHYSMEM_BITS)
#ifdef CONFIG_X86_5LEVEL #define LDT_PGD_ENTRY_L4 -3UL
# define VMALLOC_SIZE_TB _AC(12800, UL) #define LDT_PGD_ENTRY_L5 -112UL
# define __VMALLOC_BASE _AC(0xffa0000000000000, UL) #define LDT_PGD_ENTRY (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) #define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
# define LDT_PGD_ENTRY _AC(-112, UL)
# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #define __VMALLOC_BASE_L4 0xffffc90000000000
#else #define __VMALLOC_BASE_L5 0xffa0000000000000
# define VMALLOC_SIZE_TB _AC(32, UL)
# define __VMALLOC_BASE _AC(0xffffc90000000000, UL) #define VMALLOC_SIZE_TB_L4 32UL
# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) #define VMALLOC_SIZE_TB_L5 12800UL
# define LDT_PGD_ENTRY _AC(-3, UL)
# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #define __VMEMMAP_BASE_L4 0xffffea0000000000
#endif #define __VMEMMAP_BASE_L5 0xffd4000000000000
#ifdef CONFIG_RANDOMIZE_MEMORY #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
# define VMALLOC_START vmalloc_base # define VMALLOC_START vmalloc_base
# define VMALLOC_SIZE_TB (pgtable_l5_enabled ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4)
# define VMEMMAP_START vmemmap_base # define VMEMMAP_START vmemmap_base
#else #else
# define VMALLOC_START __VMALLOC_BASE # define VMALLOC_START __VMALLOC_BASE_L4
# define VMEMMAP_START __VMEMMAP_BASE # define VMALLOC_SIZE_TB VMALLOC_SIZE_TB_L4
#endif /* CONFIG_RANDOMIZE_MEMORY */ # define VMEMMAP_START __VMEMMAP_BASE_L4
#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) #define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
/* The module sections ends with the start of the fixmap */ /* The module sections ends with the start of the fixmap */
......
...@@ -53,12 +53,6 @@ ...@@ -53,12 +53,6 @@
# define NEED_MOVBE 0 # define NEED_MOVBE 0
#endif #endif
#ifdef CONFIG_X86_5LEVEL
# define NEED_LA57 (1<<(X86_FEATURE_LA57 & 31))
#else
# define NEED_LA57 0
#endif
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
/* Paravirtualized systems may not have PSE or PGE available */ /* Paravirtualized systems may not have PSE or PGE available */
...@@ -104,7 +98,7 @@ ...@@ -104,7 +98,7 @@
#define REQUIRED_MASK13 0 #define REQUIRED_MASK13 0
#define REQUIRED_MASK14 0 #define REQUIRED_MASK14 0
#define REQUIRED_MASK15 0 #define REQUIRED_MASK15 0
#define REQUIRED_MASK16 (NEED_LA57) #define REQUIRED_MASK16 0
#define REQUIRED_MASK17 0 #define REQUIRED_MASK17 0
#define REQUIRED_MASK18 0 #define REQUIRED_MASK18 0
#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) #define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
......
...@@ -27,13 +27,8 @@ ...@@ -27,13 +27,8 @@
# endif # endif
#else /* CONFIG_X86_32 */ #else /* CONFIG_X86_32 */
# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
# ifdef CONFIG_X86_5LEVEL # define MAX_PHYSADDR_BITS (pgtable_l5_enabled ? 52 : 44)
# define MAX_PHYSADDR_BITS 52 # define MAX_PHYSMEM_BITS (pgtable_l5_enabled ? 52 : 46)
# define MAX_PHYSMEM_BITS 52
# else
# define MAX_PHYSADDR_BITS 44
# define MAX_PHYSMEM_BITS 46
# endif
#endif #endif
#endif /* CONFIG_SPARSEMEM */ #endif /* CONFIG_SPARSEMEM */
......
...@@ -130,6 +130,16 @@ struct x86_hyper_init { ...@@ -130,6 +130,16 @@ struct x86_hyper_init {
void (*init_mem_mapping)(void); void (*init_mem_mapping)(void);
}; };
/**
* struct x86_init_acpi - x86 ACPI init functions
* @get_root_pointer: get RSDP address
* @reduced_hw_early_init: hardware reduced platform early init
*/
struct x86_init_acpi {
u64 (*get_root_pointer)(void);
void (*reduced_hw_early_init)(void);
};
/** /**
* struct x86_init_ops - functions for platform specific setup * struct x86_init_ops - functions for platform specific setup
* *
...@@ -144,6 +154,7 @@ struct x86_init_ops { ...@@ -144,6 +154,7 @@ struct x86_init_ops {
struct x86_init_iommu iommu; struct x86_init_iommu iommu;
struct x86_init_pci pci; struct x86_init_pci pci;
struct x86_hyper_init hyper; struct x86_hyper_init hyper;
struct x86_init_acpi acpi;
}; };
/** /**
......
...@@ -1376,17 +1376,21 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) ...@@ -1376,17 +1376,21 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
* *
* We initialize the Hardware-reduced ACPI model here: * We initialize the Hardware-reduced ACPI model here:
*/ */
static void __init acpi_reduced_hw_init(void) void __init acpi_generic_reduced_hw_init(void)
{ {
if (acpi_gbl_reduced_hardware) {
/* /*
* Override x86_init functions and bypass legacy pic * Override x86_init functions and bypass legacy PIC in
* in Hardware-reduced ACPI mode * hardware reduced ACPI mode.
*/ */
x86_init.timers.timer_init = x86_init_noop; x86_init.timers.timer_init = x86_init_noop;
x86_init.irqs.pre_vector_init = x86_init_noop; x86_init.irqs.pre_vector_init = x86_init_noop;
legacy_pic = &null_legacy_pic; legacy_pic = &null_legacy_pic;
} }
static void __init acpi_reduced_hw_init(void)
{
if (acpi_gbl_reduced_hardware)
x86_init.acpi.reduced_hw_early_init();
} }
/* /*
......
...@@ -28,7 +28,7 @@ obj-y += cpuid-deps.o ...@@ -28,7 +28,7 @@ obj-y += cpuid-deps.o
obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o
obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
......
...@@ -509,6 +509,90 @@ static void detect_vmx_virtcap(struct cpuinfo_x86 *c) ...@@ -509,6 +509,90 @@ static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
} }
} }
#define MSR_IA32_TME_ACTIVATE 0x982
/* Helpers to access TME_ACTIVATE MSR */
#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
#define TME_ACTIVATE_POLICY_AES_XTS_128 0
#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
/* Values for mktme_status (SW only construct) */
#define MKTME_ENABLED 0
#define MKTME_DISABLED 1
#define MKTME_UNINITIALIZED 2
static int mktme_status = MKTME_UNINITIALIZED;
static void detect_tme(struct cpuinfo_x86 *c)
{
u64 tme_activate, tme_policy, tme_crypto_algs;
int keyid_bits = 0, nr_keyids = 0;
static u64 tme_activate_cpu0 = 0;
rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
if (mktme_status != MKTME_UNINITIALIZED) {
if (tme_activate != tme_activate_cpu0) {
/* Broken BIOS? */
pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
pr_err_once("x86/tme: MKTME is not usable\n");
mktme_status = MKTME_DISABLED;
/* Proceed. We may need to exclude bits from x86_phys_bits. */
}
} else {
tme_activate_cpu0 = tme_activate;
}
if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
pr_info_once("x86/tme: not enabled by BIOS\n");
mktme_status = MKTME_DISABLED;
return;
}
if (mktme_status != MKTME_UNINITIALIZED)
goto detect_keyid_bits;
pr_info("x86/tme: enabled by BIOS\n");
tme_policy = TME_ACTIVATE_POLICY(tme_activate);
if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
tme_crypto_algs);
mktme_status = MKTME_DISABLED;
}
detect_keyid_bits:
keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
nr_keyids = (1UL << keyid_bits) - 1;
if (nr_keyids) {
pr_info_once("x86/mktme: enabled by BIOS\n");
pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
} else {
pr_info_once("x86/mktme: disabled by BIOS\n");
}
if (mktme_status == MKTME_UNINITIALIZED) {
/* MKTME is usable */
mktme_status = MKTME_ENABLED;
}
/*
* KeyID bits effectively lower the number of physical address
* bits. Update cpuinfo_x86::x86_phys_bits accordingly.
*/
c->x86_phys_bits -= keyid_bits;
}
static void init_intel_energy_perf(struct cpuinfo_x86 *c) static void init_intel_energy_perf(struct cpuinfo_x86 *c)
{ {
u64 epb; u64 epb;
...@@ -679,6 +763,9 @@ static void init_intel(struct cpuinfo_x86 *c) ...@@ -679,6 +763,9 @@ static void init_intel(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_VMX)) if (cpu_has(c, X86_FEATURE_VMX))
detect_vmx_virtcap(c); detect_vmx_virtcap(c);
if (cpu_has(c, X86_FEATURE_TME))
detect_tme(c);
init_intel_energy_perf(c); init_intel_energy_perf(c);
init_intel_misc_features(c); init_intel_misc_features(c);
......
// SPDX-License-Identifier: GPL-2.0
/*
* Intel PCONFIG instruction support.
*
* Copyright (C) 2017 Intel Corporation
*
* Author:
* Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
*/
#include <asm/cpufeature.h>
#include <asm/intel_pconfig.h>
#define PCONFIG_CPUID 0x1b
#define PCONFIG_CPUID_SUBLEAF_MASK ((1 << 12) - 1)
/* Subleaf type (EAX) for PCONFIG CPUID leaf (0x1B) */
enum {
PCONFIG_CPUID_SUBLEAF_INVALID = 0,
PCONFIG_CPUID_SUBLEAF_TARGETID = 1,
};
/* Bitmask of supported targets */
static u64 targets_supported __read_mostly;
int pconfig_target_supported(enum pconfig_target target)
{
/*
* We would need to re-think the implementation once we get > 64
* PCONFIG targets. Spec allows up to 2^32 targets.
*/
BUILD_BUG_ON(PCONFIG_TARGET_NR >= 64);
if (WARN_ON_ONCE(target >= 64))
return 0;
return targets_supported & (1ULL << target);
}
static int __init intel_pconfig_init(void)
{
int subleaf;
if (!boot_cpu_has(X86_FEATURE_PCONFIG))
return 0;
/*
* Scan subleafs of PCONFIG CPUID leaf.
*
* Subleafs of the same type need not to be consecutive.
*
* Stop on the first invalid subleaf type. All subleafs after the first
* invalid are invalid too.
*/
for (subleaf = 0; subleaf < INT_MAX; subleaf++) {
struct cpuid_regs regs;
cpuid_count(PCONFIG_CPUID, subleaf,
&regs.eax, &regs.ebx, &regs.ecx, &regs.edx);
switch (regs.eax & PCONFIG_CPUID_SUBLEAF_MASK) {
case PCONFIG_CPUID_SUBLEAF_INVALID:
/* Stop on the first invalid subleaf */
goto out;
case PCONFIG_CPUID_SUBLEAF_TARGETID:
/* Mark supported PCONFIG targets */
if (regs.ebx < 64)
targets_supported |= (1ULL << regs.ebx);
if (regs.ecx < 64)
targets_supported |= (1ULL << regs.ecx);
if (regs.edx < 64)
targets_supported |= (1ULL << regs.edx);
break;
default:
/* Unknown CPUID.PCONFIG subleaf: ignore */
break;
}
}
out:
return 0;
}
arch_initcall(intel_pconfig_init);
...@@ -1095,19 +1095,7 @@ static void mce_unmap_kpfn(unsigned long pfn) ...@@ -1095,19 +1095,7 @@ static void mce_unmap_kpfn(unsigned long pfn)
* a legal address. * a legal address.
*/ */
/*
* Build time check to see if we have a spare virtual bit. Don't want
* to leave this until run time because most developers don't have a
* system that can exercise this code path. This will only become a
* problem if/when we move beyond 5-level page tables.
*
* Hard code "9" here because cpp doesn't grok ilog2(PTRS_PER_PGD)
*/
#if PGDIR_SHIFT + 9 < 63
decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
#else
#error "no unused virtual bit available"
#endif
if (set_memory_np(decoy_addr, 1)) if (set_memory_np(decoy_addr, 1))
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
...@@ -2357,6 +2345,12 @@ static __init int mcheck_init_device(void) ...@@ -2357,6 +2345,12 @@ static __init int mcheck_init_device(void)
{ {
int err; int err;
/*
* Check if we have a spare virtual bit. This will only become
* a problem if/when we move beyond 5-level page tables.
*/
MAYBE_BUILD_BUG_ON(__VIRTUAL_MASK_SHIFT >= 63);
if (!mce_available(&boot_cpu_data)) { if (!mce_available(&boot_cpu_data)) {
err = -EIO; err = -EIO;
goto err_out; goto err_out;
......
...@@ -924,6 +924,24 @@ static int __init parse_memmap_one(char *p) ...@@ -924,6 +924,24 @@ static int __init parse_memmap_one(char *p)
} else if (*p == '!') { } else if (*p == '!') {
start_at = memparse(p+1, &p); start_at = memparse(p+1, &p);
e820__range_add(start_at, mem_size, E820_TYPE_PRAM); e820__range_add(start_at, mem_size, E820_TYPE_PRAM);
} else if (*p == '%') {
enum e820_type from = 0, to = 0;
start_at = memparse(p + 1, &p);
if (*p == '-')
from = simple_strtoull(p + 1, &p, 0);
if (*p == '+')
to = simple_strtoull(p + 1, &p, 0);
if (*p != '\0')
return -EINVAL;
if (from && to)
e820__range_update(start_at, mem_size, from, to);
else if (to)
e820__range_add(start_at, mem_size, to);
else if (from)
e820__range_remove(start_at, mem_size, from, 1);
else
e820__range_remove(start_at, mem_size, 0, 0);
} else { } else {
e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1); e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1);
} }
......
...@@ -32,6 +32,11 @@ ...@@ -32,6 +32,11 @@
#include <asm/microcode.h> #include <asm/microcode.h>
#include <asm/kasan.h> #include <asm/kasan.h>
#ifdef CONFIG_X86_5LEVEL
#undef pgtable_l5_enabled
#define pgtable_l5_enabled __pgtable_l5_enabled
#endif
/* /*
* Manage page tables very early on. * Manage page tables very early on.
*/ */
...@@ -39,6 +44,24 @@ extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; ...@@ -39,6 +44,24 @@ extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
static unsigned int __initdata next_early_pgt; static unsigned int __initdata next_early_pgt;
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
#ifdef CONFIG_X86_5LEVEL
unsigned int __pgtable_l5_enabled __ro_after_init;
EXPORT_SYMBOL(__pgtable_l5_enabled);
unsigned int pgdir_shift __ro_after_init = 39;
EXPORT_SYMBOL(pgdir_shift);
unsigned int ptrs_per_p4d __ro_after_init = 1;
EXPORT_SYMBOL(ptrs_per_p4d);
#endif
#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4;
EXPORT_SYMBOL(page_offset_base);
unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE_L4;
EXPORT_SYMBOL(vmalloc_base);
unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4;
EXPORT_SYMBOL(vmemmap_base);
#endif
#define __head __section(.head.text) #define __head __section(.head.text)
static void __head *fixup_pointer(void *ptr, unsigned long physaddr) static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
...@@ -46,6 +69,41 @@ static void __head *fixup_pointer(void *ptr, unsigned long physaddr) ...@@ -46,6 +69,41 @@ static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
return ptr - (void *)_text + (void *)physaddr; return ptr - (void *)_text + (void *)physaddr;
} }
static unsigned long __head *fixup_long(void *ptr, unsigned long physaddr)
{
return fixup_pointer(ptr, physaddr);
}
#ifdef CONFIG_X86_5LEVEL
static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr)
{
return fixup_pointer(ptr, physaddr);
}
static bool __head check_la57_support(unsigned long physaddr)
{
if (native_cpuid_eax(0) < 7)
return false;
if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
return false;
*fixup_int(&pgtable_l5_enabled, physaddr) = 1;
*fixup_int(&pgdir_shift, physaddr) = 48;
*fixup_int(&ptrs_per_p4d, physaddr) = 512;
*fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5;
*fixup_long(&vmalloc_base, physaddr) = __VMALLOC_BASE_L5;
*fixup_long(&vmemmap_base, physaddr) = __VMEMMAP_BASE_L5;
return true;
}
#else
static bool __head check_la57_support(unsigned long physaddr)
{
return false;
}
#endif
unsigned long __head __startup_64(unsigned long physaddr, unsigned long __head __startup_64(unsigned long physaddr,
struct boot_params *bp) struct boot_params *bp)
{ {
...@@ -55,9 +113,12 @@ unsigned long __head __startup_64(unsigned long physaddr, ...@@ -55,9 +113,12 @@ unsigned long __head __startup_64(unsigned long physaddr,
p4dval_t *p4d; p4dval_t *p4d;
pudval_t *pud; pudval_t *pud;
pmdval_t *pmd, pmd_entry; pmdval_t *pmd, pmd_entry;
bool la57;
int i; int i;
unsigned int *next_pgt_ptr; unsigned int *next_pgt_ptr;
la57 = check_la57_support(physaddr);
/* Is the address too large? */ /* Is the address too large? */
if (physaddr >> MAX_PHYSMEM_BITS) if (physaddr >> MAX_PHYSMEM_BITS)
for (;;); for (;;);
...@@ -81,9 +142,14 @@ unsigned long __head __startup_64(unsigned long physaddr, ...@@ -81,9 +142,14 @@ unsigned long __head __startup_64(unsigned long physaddr,
/* Fixup the physical addresses in the page table */ /* Fixup the physical addresses in the page table */
pgd = fixup_pointer(&early_top_pgt, physaddr); pgd = fixup_pointer(&early_top_pgt, physaddr);
pgd[pgd_index(__START_KERNEL_map)] += load_delta; p = pgd + pgd_index(__START_KERNEL_map);
if (la57)
if (IS_ENABLED(CONFIG_X86_5LEVEL)) { *p = (unsigned long)level4_kernel_pgt;
else
*p = (unsigned long)level3_kernel_pgt;
*p += _PAGE_TABLE_NOENC - __START_KERNEL_map + load_delta;
if (la57) {
p4d = fixup_pointer(&level4_kernel_pgt, physaddr); p4d = fixup_pointer(&level4_kernel_pgt, physaddr);
p4d[511] += load_delta; p4d[511] += load_delta;
} }
...@@ -108,7 +174,7 @@ unsigned long __head __startup_64(unsigned long physaddr, ...@@ -108,7 +174,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
if (IS_ENABLED(CONFIG_X86_5LEVEL)) { if (la57) {
p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
...@@ -154,8 +220,7 @@ unsigned long __head __startup_64(unsigned long physaddr, ...@@ -154,8 +220,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
* Fixup phys_base - remove the memory encryption mask to obtain * Fixup phys_base - remove the memory encryption mask to obtain
* the true physical address. * the true physical address.
*/ */
p = fixup_pointer(&phys_base, physaddr); *fixup_long(&phys_base, physaddr) += load_delta - sme_get_me_mask();
*p += load_delta - sme_get_me_mask();
/* Encrypt the kernel and related (if SME is active) */ /* Encrypt the kernel and related (if SME is active) */
sme_encrypt_kernel(bp); sme_encrypt_kernel(bp);
...@@ -206,7 +271,7 @@ int __init __early_make_pgtable(unsigned long address, pmdval_t pmd) ...@@ -206,7 +271,7 @@ int __init __early_make_pgtable(unsigned long address, pmdval_t pmd)
* critical -- __PAGE_OFFSET would point us back into the dynamic * critical -- __PAGE_OFFSET would point us back into the dynamic
* range and we might end up looping forever... * range and we might end up looping forever...
*/ */
if (!IS_ENABLED(CONFIG_X86_5LEVEL)) if (!pgtable_l5_enabled)
p4d_p = pgd_p; p4d_p = pgd_p;
else if (pgd) else if (pgd)
p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
...@@ -322,7 +387,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) ...@@ -322,7 +387,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0); BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0);
BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0); BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == MAYBE_BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
(__START_KERNEL & PGDIR_MASK))); (__START_KERNEL & PGDIR_MASK)));
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
......
...@@ -39,12 +39,12 @@ ...@@ -39,12 +39,12 @@
* *
*/ */
#define l4_index(x) (((x) >> 39) & 511)
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE) L4_START_KERNEL = l4_index(__START_KERNEL_map)
PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
#endif
L3_START_KERNEL = pud_index(__START_KERNEL_map) L3_START_KERNEL = pud_index(__START_KERNEL_map)
.text .text
...@@ -125,7 +125,10 @@ ENTRY(secondary_startup_64) ...@@ -125,7 +125,10 @@ ENTRY(secondary_startup_64)
/* Enable PAE mode, PGE and LA57 */ /* Enable PAE mode, PGE and LA57 */
movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
#ifdef CONFIG_X86_5LEVEL #ifdef CONFIG_X86_5LEVEL
testl $1, __pgtable_l5_enabled(%rip)
jz 1f
orl $X86_CR4_LA57, %ecx orl $X86_CR4_LA57, %ecx
1:
#endif #endif
movq %rcx, %cr4 movq %rcx, %cr4
...@@ -374,12 +377,7 @@ GLOBAL(name) ...@@ -374,12 +377,7 @@ GLOBAL(name)
__INITDATA __INITDATA
NEXT_PGD_PAGE(early_top_pgt) NEXT_PGD_PAGE(early_top_pgt)
.fill 511,8,0 .fill 512,8,0
#ifdef CONFIG_X86_5LEVEL
.quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
#else
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
#endif
.fill PTI_USER_PGD_FILL,8,0 .fill PTI_USER_PGD_FILL,8,0
NEXT_PAGE(early_dynamic_pgts) NEXT_PAGE(early_dynamic_pgts)
...@@ -390,9 +388,9 @@ NEXT_PAGE(early_dynamic_pgts) ...@@ -390,9 +388,9 @@ NEXT_PAGE(early_dynamic_pgts)
#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) #if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
NEXT_PGD_PAGE(init_top_pgt) NEXT_PGD_PAGE(init_top_pgt)
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + PGD_PAGE_OFFSET*8, 0 .org init_top_pgt + L4_PAGE_OFFSET*8, 0
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + PGD_START_KERNEL*8, 0 .org init_top_pgt + L4_START_KERNEL*8, 0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
.fill PTI_USER_PGD_FILL,8,0 .fill PTI_USER_PGD_FILL,8,0
......
...@@ -350,6 +350,7 @@ void arch_crash_save_vmcoreinfo(void) ...@@ -350,6 +350,7 @@ void arch_crash_save_vmcoreinfo(void)
{ {
VMCOREINFO_NUMBER(phys_base); VMCOREINFO_NUMBER(phys_base);
VMCOREINFO_SYMBOL(init_top_pgt); VMCOREINFO_SYMBOL(init_top_pgt);
VMCOREINFO_NUMBER(pgtable_l5_enabled);
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
VMCOREINFO_SYMBOL(node_data); VMCOREINFO_SYMBOL(node_data);
......
...@@ -189,9 +189,7 @@ struct ist_info ist_info; ...@@ -189,9 +189,7 @@ struct ist_info ist_info;
#endif #endif
#else #else
struct cpuinfo_x86 boot_cpu_data __read_mostly = { struct cpuinfo_x86 boot_cpu_data __read_mostly;
.x86_phys_bits = MAX_PHYSMEM_BITS,
};
EXPORT_SYMBOL(boot_cpu_data); EXPORT_SYMBOL(boot_cpu_data);
#endif #endif
...@@ -851,6 +849,7 @@ void __init setup_arch(char **cmdline_p) ...@@ -851,6 +849,7 @@ void __init setup_arch(char **cmdline_p)
__flush_tlb_all(); __flush_tlb_all();
#else #else
printk(KERN_INFO "Command line: %s\n", boot_command_line); printk(KERN_INFO "Command line: %s\n", boot_command_line);
boot_cpu_data.x86_phys_bits = MAX_PHYSMEM_BITS;
#endif #endif
/* /*
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <linux/export.h> #include <linux/export.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <asm/acpi.h>
#include <asm/bios_ebda.h> #include <asm/bios_ebda.h>
#include <asm/paravirt.h> #include <asm/paravirt.h>
#include <asm/pci_x86.h> #include <asm/pci_x86.h>
...@@ -26,10 +27,11 @@ ...@@ -26,10 +27,11 @@
void x86_init_noop(void) { } void x86_init_noop(void) { }
void __init x86_init_uint_noop(unsigned int unused) { } void __init x86_init_uint_noop(unsigned int unused) { }
int __init iommu_init_noop(void) { return 0; } static int __init iommu_init_noop(void) { return 0; }
void iommu_shutdown_noop(void) { } static void iommu_shutdown_noop(void) { }
bool __init bool_x86_init_noop(void) { return false; } static bool __init bool_x86_init_noop(void) { return false; }
void x86_op_int_noop(int cpu) { } static void x86_op_int_noop(int cpu) { }
static u64 u64_x86_init_noop(void) { return 0; }
/* /*
* The platform setup functions are preset with the default functions * The platform setup functions are preset with the default functions
...@@ -91,6 +93,11 @@ struct x86_init_ops x86_init __initdata = { ...@@ -91,6 +93,11 @@ struct x86_init_ops x86_init __initdata = {
.x2apic_available = bool_x86_init_noop, .x2apic_available = bool_x86_init_noop,
.init_mem_mapping = x86_init_noop, .init_mem_mapping = x86_init_noop,
}, },
.acpi = {
.get_root_pointer = u64_x86_init_noop,
.reduced_hw_early_init = acpi_generic_reduced_hw_init,
},
}; };
struct x86_cpuinit_ops x86_cpuinit = { struct x86_cpuinit_ops x86_cpuinit = {
......
# SPDX-License-Identifier: GPL-2.0 # SPDX-License-Identifier: GPL-2.0
# Kernel does not boot with instrumentation of tlb.c and mem_encrypt.c # Kernel does not boot with instrumentation of tlb.c and mem_encrypt*.c
KCOV_INSTRUMENT_tlb.o := n KCOV_INSTRUMENT_tlb.o := n
KCOV_INSTRUMENT_mem_encrypt.o := n KCOV_INSTRUMENT_mem_encrypt.o := n
KCOV_INSTRUMENT_mem_encrypt_identity.o := n
KASAN_SANITIZE_mem_encrypt.o := n KASAN_SANITIZE_mem_encrypt.o := n
KASAN_SANITIZE_mem_encrypt_identity.o := n
ifdef CONFIG_FUNCTION_TRACER ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_mem_encrypt.o = -pg CFLAGS_REMOVE_mem_encrypt.o = -pg
CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
endif endif
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
...@@ -16,6 +19,7 @@ obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ ...@@ -16,6 +19,7 @@ obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
nostackp := $(call cc-option, -fno-stack-protector) nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_physaddr.o := $(nostackp) CFLAGS_physaddr.o := $(nostackp)
CFLAGS_setup_nx.o := $(nostackp) CFLAGS_setup_nx.o := $(nostackp)
CFLAGS_mem_encrypt_identity.o := $(nostackp)
CFLAGS_fault.o := -I$(src)/../include/asm/trace CFLAGS_fault.o := -I$(src)/../include/asm/trace
...@@ -47,4 +51,5 @@ obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o ...@@ -47,4 +51,5 @@ obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_identity.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o
...@@ -72,6 +72,31 @@ static const struct file_operations ptdump_curusr_fops = { ...@@ -72,6 +72,31 @@ static const struct file_operations ptdump_curusr_fops = {
}; };
#endif #endif
#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
extern pgd_t *efi_pgd;
static struct dentry *pe_efi;
static int ptdump_show_efi(struct seq_file *m, void *v)
{
if (efi_pgd)
ptdump_walk_pgd_level_debugfs(m, efi_pgd, false);
return 0;
}
static int ptdump_open_efi(struct inode *inode, struct file *filp)
{
return single_open(filp, ptdump_show_efi, NULL);
}
static const struct file_operations ptdump_efi_fops = {
.owner = THIS_MODULE,
.open = ptdump_open_efi,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
#endif
static struct dentry *dir, *pe_knl, *pe_curknl; static struct dentry *dir, *pe_knl, *pe_curknl;
static int __init pt_dump_debug_init(void) static int __init pt_dump_debug_init(void)
...@@ -96,6 +121,13 @@ static int __init pt_dump_debug_init(void) ...@@ -96,6 +121,13 @@ static int __init pt_dump_debug_init(void)
if (!pe_curusr) if (!pe_curusr)
goto err; goto err;
#endif #endif
#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
pe_efi = debugfs_create_file("efi", 0400, dir, NULL, &ptdump_efi_fops);
if (!pe_efi)
goto err;
#endif
return 0; return 0;
err: err:
debugfs_remove_recursive(dir); debugfs_remove_recursive(dir);
......
This diff is collapsed.
...@@ -417,11 +417,11 @@ void vmalloc_sync_all(void) ...@@ -417,11 +417,11 @@ void vmalloc_sync_all(void)
*/ */
static noinline int vmalloc_fault(unsigned long address) static noinline int vmalloc_fault(unsigned long address)
{ {
pgd_t *pgd, *pgd_ref; pgd_t *pgd, *pgd_k;
p4d_t *p4d, *p4d_ref; p4d_t *p4d, *p4d_k;
pud_t *pud, *pud_ref; pud_t *pud;
pmd_t *pmd, *pmd_ref; pmd_t *pmd;
pte_t *pte, *pte_ref; pte_t *pte;
/* Make sure we are in vmalloc area: */ /* Make sure we are in vmalloc area: */
if (!(address >= VMALLOC_START && address < VMALLOC_END)) if (!(address >= VMALLOC_START && address < VMALLOC_END))
...@@ -435,73 +435,51 @@ static noinline int vmalloc_fault(unsigned long address) ...@@ -435,73 +435,51 @@ static noinline int vmalloc_fault(unsigned long address)
* case just flush: * case just flush:
*/ */
pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address); pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address);
pgd_ref = pgd_offset_k(address); pgd_k = pgd_offset_k(address);
if (pgd_none(*pgd_ref)) if (pgd_none(*pgd_k))
return -1; return -1;
if (CONFIG_PGTABLE_LEVELS > 4) { if (pgtable_l5_enabled) {
if (pgd_none(*pgd)) { if (pgd_none(*pgd)) {
set_pgd(pgd, *pgd_ref); set_pgd(pgd, *pgd_k);
arch_flush_lazy_mmu_mode(); arch_flush_lazy_mmu_mode();
} else { } else {
BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_k));
} }
} }
/* With 4-level paging, copying happens on the p4d level. */ /* With 4-level paging, copying happens on the p4d level. */
p4d = p4d_offset(pgd, address); p4d = p4d_offset(pgd, address);
p4d_ref = p4d_offset(pgd_ref, address); p4d_k = p4d_offset(pgd_k, address);
if (p4d_none(*p4d_ref)) if (p4d_none(*p4d_k))
return -1; return -1;
if (p4d_none(*p4d) && CONFIG_PGTABLE_LEVELS == 4) { if (p4d_none(*p4d) && !pgtable_l5_enabled) {
set_p4d(p4d, *p4d_ref); set_p4d(p4d, *p4d_k);
arch_flush_lazy_mmu_mode(); arch_flush_lazy_mmu_mode();
} else { } else {
BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_ref)); BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_k));
} }
/*
* Below here mismatches are bugs because these lower tables
* are shared:
*/
BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4); BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4);
pud = pud_offset(p4d, address); pud = pud_offset(p4d, address);
pud_ref = pud_offset(p4d_ref, address); if (pud_none(*pud))
if (pud_none(*pud_ref))
return -1; return -1;
if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref))
BUG();
if (pud_large(*pud)) if (pud_large(*pud))
return 0; return 0;
pmd = pmd_offset(pud, address); pmd = pmd_offset(pud, address);
pmd_ref = pmd_offset(pud_ref, address); if (pmd_none(*pmd))
if (pmd_none(*pmd_ref))
return -1; return -1;
if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref))
BUG();
if (pmd_large(*pmd)) if (pmd_large(*pmd))
return 0; return 0;
pte_ref = pte_offset_kernel(pmd_ref, address);
if (!pte_present(*pte_ref))
return -1;
pte = pte_offset_kernel(pmd, address); pte = pte_offset_kernel(pmd, address);
if (!pte_present(*pte))
/* return -1;
* Don't use pte_page here, because the mappings can point
* outside mem_map, and the NUMA hash lookup cannot handle
* that:
*/
if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
BUG();
return 0; return 0;
} }
......
...@@ -120,7 +120,7 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, ...@@ -120,7 +120,7 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
result = ident_p4d_init(info, p4d, addr, next); result = ident_p4d_init(info, p4d, addr, next);
if (result) if (result)
return result; return result;
if (IS_ENABLED(CONFIG_X86_5LEVEL)) { if (pgtable_l5_enabled) {
set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag)); set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
} else { } else {
/* /*
......
...@@ -88,12 +88,7 @@ static int __init nonx32_setup(char *str) ...@@ -88,12 +88,7 @@ static int __init nonx32_setup(char *str)
} }
__setup("noexec32=", nonx32_setup); __setup("noexec32=", nonx32_setup);
/* static void sync_global_pgds_l5(unsigned long start, unsigned long end)
* When memory was added make sure all the processes MM have
* suitable PGD entries in the local PGD level page.
*/
#ifdef CONFIG_X86_5LEVEL
void sync_global_pgds(unsigned long start, unsigned long end)
{ {
unsigned long addr; unsigned long addr;
...@@ -129,8 +124,8 @@ void sync_global_pgds(unsigned long start, unsigned long end) ...@@ -129,8 +124,8 @@ void sync_global_pgds(unsigned long start, unsigned long end)
spin_unlock(&pgd_lock); spin_unlock(&pgd_lock);
} }
} }
#else
void sync_global_pgds(unsigned long start, unsigned long end) static void sync_global_pgds_l4(unsigned long start, unsigned long end)
{ {
unsigned long addr; unsigned long addr;
...@@ -143,7 +138,7 @@ void sync_global_pgds(unsigned long start, unsigned long end) ...@@ -143,7 +138,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
* With folded p4d, pgd_none() is always false, we need to * With folded p4d, pgd_none() is always false, we need to
* handle synchonization on p4d level. * handle synchonization on p4d level.
*/ */
BUILD_BUG_ON(pgd_none(*pgd_ref)); MAYBE_BUILD_BUG_ON(pgd_none(*pgd_ref));
p4d_ref = p4d_offset(pgd_ref, addr); p4d_ref = p4d_offset(pgd_ref, addr);
if (p4d_none(*p4d_ref)) if (p4d_none(*p4d_ref))
...@@ -173,7 +168,18 @@ void sync_global_pgds(unsigned long start, unsigned long end) ...@@ -173,7 +168,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
spin_unlock(&pgd_lock); spin_unlock(&pgd_lock);
} }
} }
#endif
/*
* When memory was added make sure all the processes MM have
* suitable PGD entries in the local PGD level page.
*/
void sync_global_pgds(unsigned long start, unsigned long end)
{
if (pgtable_l5_enabled)
sync_global_pgds_l5(start, end);
else
sync_global_pgds_l4(start, end);
}
/* /*
* NOTE: This function is marked __ref because it calls __init function * NOTE: This function is marked __ref because it calls __init function
...@@ -632,7 +638,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, ...@@ -632,7 +638,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
unsigned long vaddr = (unsigned long)__va(paddr); unsigned long vaddr = (unsigned long)__va(paddr);
int i = p4d_index(vaddr); int i = p4d_index(vaddr);
if (!IS_ENABLED(CONFIG_X86_5LEVEL)) if (!pgtable_l5_enabled)
return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask); return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
...@@ -712,7 +718,7 @@ kernel_physical_mapping_init(unsigned long paddr_start, ...@@ -712,7 +718,7 @@ kernel_physical_mapping_init(unsigned long paddr_start,
page_size_mask); page_size_mask);
spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock);
if (IS_ENABLED(CONFIG_X86_5LEVEL)) if (pgtable_l5_enabled)
pgd_populate(&init_mm, pgd, p4d); pgd_populate(&init_mm, pgd, p4d);
else else
p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d); p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
...@@ -1089,7 +1095,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, ...@@ -1089,7 +1095,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
* 5-level case we should free them. This code will have to change * 5-level case we should free them. This code will have to change
* to adapt for boot-time switching between 4 and 5 level page tables. * to adapt for boot-time switching between 4 and 5 level page tables.
*/ */
if (CONFIG_PGTABLE_LEVELS == 5) if (pgtable_l5_enabled)
free_pud_table(pud_base, p4d); free_pud_table(pud_base, p4d);
} }
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#define DISABLE_BRANCH_PROFILING #define DISABLE_BRANCH_PROFILING
#define pr_fmt(fmt) "kasan: " fmt #define pr_fmt(fmt) "kasan: " fmt
#ifdef CONFIG_X86_5LEVEL
/* Too early to use cpu_feature_enabled() */
#define pgtable_l5_enabled __pgtable_l5_enabled
#endif
#include <linux/bootmem.h> #include <linux/bootmem.h>
#include <linux/kasan.h> #include <linux/kasan.h>
#include <linux/kdebug.h> #include <linux/kdebug.h>
...@@ -19,7 +25,7 @@ ...@@ -19,7 +25,7 @@
extern struct range pfn_mapped[E820_MAX_ENTRIES]; extern struct range pfn_mapped[E820_MAX_ENTRIES];
static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
static __init void *early_alloc(size_t size, int nid, bool panic) static __init void *early_alloc(size_t size, int nid, bool panic)
{ {
...@@ -176,10 +182,10 @@ static void __init clear_pgds(unsigned long start, ...@@ -176,10 +182,10 @@ static void __init clear_pgds(unsigned long start,
* With folded p4d, pgd_clear() is nop, use p4d_clear() * With folded p4d, pgd_clear() is nop, use p4d_clear()
* instead. * instead.
*/ */
if (CONFIG_PGTABLE_LEVELS < 5) if (pgtable_l5_enabled)
p4d_clear(p4d_offset(pgd, start));
else
pgd_clear(pgd); pgd_clear(pgd);
else
p4d_clear(p4d_offset(pgd, start));
} }
pgd = pgd_offset_k(start); pgd = pgd_offset_k(start);
...@@ -191,7 +197,7 @@ static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr) ...@@ -191,7 +197,7 @@ static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
{ {
unsigned long p4d; unsigned long p4d;
if (!IS_ENABLED(CONFIG_X86_5LEVEL)) if (!pgtable_l5_enabled)
return (p4d_t *)pgd; return (p4d_t *)pgd;
p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK; p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
...@@ -272,7 +278,7 @@ void __init kasan_early_init(void) ...@@ -272,7 +278,7 @@ void __init kasan_early_init(void)
for (i = 0; i < PTRS_PER_PUD; i++) for (i = 0; i < PTRS_PER_PUD; i++)
kasan_zero_pud[i] = __pud(pud_val); kasan_zero_pud[i] = __pud(pud_val);
for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++) for (i = 0; pgtable_l5_enabled && i < PTRS_PER_P4D; i++)
kasan_zero_p4d[i] = __p4d(p4d_val); kasan_zero_p4d[i] = __p4d(p4d_val);
kasan_map_early_shadow(early_top_pgt); kasan_map_early_shadow(early_top_pgt);
...@@ -303,7 +309,7 @@ void __init kasan_init(void) ...@@ -303,7 +309,7 @@ void __init kasan_init(void)
* bunch of things like kernel code, modules, EFI mapping, etc. * bunch of things like kernel code, modules, EFI mapping, etc.
* We need to take extra steps to not overwrite them. * We need to take extra steps to not overwrite them.
*/ */
if (IS_ENABLED(CONFIG_X86_5LEVEL)) { if (pgtable_l5_enabled) {
void *ptr; void *ptr;
ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END)); ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
......
...@@ -34,23 +34,12 @@ ...@@ -34,23 +34,12 @@
#define TB_SHIFT 40 #define TB_SHIFT 40
/* /*
* Virtual address start and end range for randomization.
*
* The end address could depend on more configuration options to make the * The end address could depend on more configuration options to make the
* highest amount of space for randomization available, but that's too hard * highest amount of space for randomization available, but that's too hard
* to keep straight and caused issues already. * to keep straight and caused issues already.
*/ */
static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE; static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
/* Default values */
unsigned long page_offset_base = __PAGE_OFFSET_BASE;
EXPORT_SYMBOL(page_offset_base);
unsigned long vmalloc_base = __VMALLOC_BASE;
EXPORT_SYMBOL(vmalloc_base);
unsigned long vmemmap_base = __VMEMMAP_BASE;
EXPORT_SYMBOL(vmemmap_base);
/* /*
* Memory regions randomized by KASLR (except modules that use a separate logic * Memory regions randomized by KASLR (except modules that use a separate logic
* earlier during boot). The list is ordered based on virtual addresses. This * earlier during boot). The list is ordered based on virtual addresses. This
...@@ -60,8 +49,8 @@ static __initdata struct kaslr_memory_region { ...@@ -60,8 +49,8 @@ static __initdata struct kaslr_memory_region {
unsigned long *base; unsigned long *base;
unsigned long size_tb; unsigned long size_tb;
} kaslr_regions[] = { } kaslr_regions[] = {
{ &page_offset_base, 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT) /* Maximum */ }, { &page_offset_base, 0 },
{ &vmalloc_base, VMALLOC_SIZE_TB }, { &vmalloc_base, 0 },
{ &vmemmap_base, 1 }, { &vmemmap_base, 1 },
}; };
...@@ -84,11 +73,14 @@ static inline bool kaslr_memory_enabled(void) ...@@ -84,11 +73,14 @@ static inline bool kaslr_memory_enabled(void)
void __init kernel_randomize_memory(void) void __init kernel_randomize_memory(void)
{ {
size_t i; size_t i;
unsigned long vaddr = vaddr_start; unsigned long vaddr_start, vaddr;
unsigned long rand, memory_tb; unsigned long rand, memory_tb;
struct rnd_state rand_state; struct rnd_state rand_state;
unsigned long remain_entropy; unsigned long remain_entropy;
vaddr_start = pgtable_l5_enabled ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
vaddr = vaddr_start;
/* /*
* These BUILD_BUG_ON checks ensure the memory layout is consistent * These BUILD_BUG_ON checks ensure the memory layout is consistent
* with the vaddr_start/vaddr_end variables. These checks are very * with the vaddr_start/vaddr_end variables. These checks are very
...@@ -101,6 +93,9 @@ void __init kernel_randomize_memory(void) ...@@ -101,6 +93,9 @@ void __init kernel_randomize_memory(void)
if (!kaslr_memory_enabled()) if (!kaslr_memory_enabled())
return; return;
kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT);
kaslr_regions[1].size_tb = VMALLOC_SIZE_TB;
/* /*
* Update Physical memory mapping to available and * Update Physical memory mapping to available and
* add padding if needed (especially for memory hotplug support). * add padding if needed (especially for memory hotplug support).
...@@ -129,7 +124,7 @@ void __init kernel_randomize_memory(void) ...@@ -129,7 +124,7 @@ void __init kernel_randomize_memory(void)
*/ */
entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i); entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
prandom_bytes_state(&rand_state, &rand, sizeof(rand)); prandom_bytes_state(&rand_state, &rand, sizeof(rand));
if (IS_ENABLED(CONFIG_X86_5LEVEL)) if (pgtable_l5_enabled)
entropy = (rand % (entropy + 1)) & P4D_MASK; entropy = (rand % (entropy + 1)) & P4D_MASK;
else else
entropy = (rand % (entropy + 1)) & PUD_MASK; entropy = (rand % (entropy + 1)) & PUD_MASK;
...@@ -141,7 +136,7 @@ void __init kernel_randomize_memory(void) ...@@ -141,7 +136,7 @@ void __init kernel_randomize_memory(void)
* randomization alignment. * randomization alignment.
*/ */
vaddr += get_padding(&kaslr_regions[i]); vaddr += get_padding(&kaslr_regions[i]);
if (IS_ENABLED(CONFIG_X86_5LEVEL)) if (pgtable_l5_enabled)
vaddr = round_up(vaddr + 1, P4D_SIZE); vaddr = round_up(vaddr + 1, P4D_SIZE);
else else
vaddr = round_up(vaddr + 1, PUD_SIZE); vaddr = round_up(vaddr + 1, PUD_SIZE);
...@@ -217,7 +212,7 @@ void __meminit init_trampoline(void) ...@@ -217,7 +212,7 @@ void __meminit init_trampoline(void)
return; return;
} }
if (IS_ENABLED(CONFIG_X86_5LEVEL)) if (pgtable_l5_enabled)
init_trampoline_p4d(); init_trampoline_p4d();
else else
init_trampoline_pud(); init_trampoline_pud();
......
This diff is collapsed.
This diff is collapsed.
...@@ -60,17 +60,6 @@ void memory_present(int nid, unsigned long start, unsigned long end) ...@@ -60,17 +60,6 @@ void memory_present(int nid, unsigned long start, unsigned long end)
} }
printk(KERN_CONT "\n"); printk(KERN_CONT "\n");
} }
unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
unsigned long end_pfn)
{
unsigned long nr_pages = end_pfn - start_pfn;
if (!nr_pages)
return 0;
return (nr_pages + 1) * sizeof(struct page);
}
#endif #endif
extern unsigned long highend_pfn, highstart_pfn; extern unsigned long highend_pfn, highstart_pfn;
......
...@@ -157,7 +157,7 @@ static void sync_current_stack_to_mm(struct mm_struct *mm) ...@@ -157,7 +157,7 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
unsigned long sp = current_stack_pointer; unsigned long sp = current_stack_pointer;
pgd_t *pgd = pgd_offset(mm, sp); pgd_t *pgd = pgd_offset(mm, sp);
if (CONFIG_PGTABLE_LEVELS > 4) { if (pgtable_l5_enabled) {
if (unlikely(pgd_none(*pgd))) { if (unlikely(pgd_none(*pgd))) {
pgd_t *pgd_ref = pgd_offset_k(sp); pgd_t *pgd_ref = pgd_offset_k(sp);
...@@ -613,7 +613,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, ...@@ -613,7 +613,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
{ {
int cpu; int cpu;
struct flush_tlb_info info = { struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = {
.mm = mm, .mm = mm,
}; };
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <linux/ioport.h> #include <linux/ioport.h>
#include <linux/mc146818rtc.h> #include <linux/mc146818rtc.h>
#include <linux/efi.h> #include <linux/efi.h>
#include <linux/export.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/io.h> #include <linux/io.h>
#include <linux/reboot.h> #include <linux/reboot.h>
...@@ -190,7 +191,8 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) ...@@ -190,7 +191,8 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
early_code_mapping_set_exec(0); early_code_mapping_set_exec(0);
} }
static pgd_t *efi_pgd; pgd_t *efi_pgd;
EXPORT_SYMBOL_GPL(efi_pgd);
/* /*
* We need our own copy of the higher levels of the page tables * We need our own copy of the higher levels of the page tables
...@@ -225,7 +227,7 @@ int __init efi_alloc_page_tables(void) ...@@ -225,7 +227,7 @@ int __init efi_alloc_page_tables(void)
pud = pud_alloc(&init_mm, p4d, EFI_VA_END); pud = pud_alloc(&init_mm, p4d, EFI_VA_END);
if (!pud) { if (!pud) {
if (CONFIG_PGTABLE_LEVELS > 4) if (pgtable_l5_enabled)
free_page((unsigned long) pgd_page_vaddr(*pgd)); free_page((unsigned long) pgd_page_vaddr(*pgd));
free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER);
return -ENOMEM; return -ENOMEM;
...@@ -255,8 +257,8 @@ void efi_sync_low_kernel_mappings(void) ...@@ -255,8 +257,8 @@ void efi_sync_low_kernel_mappings(void)
* only span a single PGD entry and that the entry also maps * only span a single PGD entry and that the entry also maps
* other important kernel regions. * other important kernel regions.
*/ */
BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); MAYBE_BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != MAYBE_BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
(EFI_VA_END & PGDIR_MASK)); (EFI_VA_END & PGDIR_MASK));
pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET);
......
...@@ -199,6 +199,12 @@ void __init x86_intel_mid_early_setup(void) ...@@ -199,6 +199,12 @@ void __init x86_intel_mid_early_setup(void)
legacy_pic = &null_legacy_pic; legacy_pic = &null_legacy_pic;
/*
* Do nothing for now as everything needed done in
* x86_intel_mid_early_setup() below.
*/
x86_init.acpi.reduced_hw_early_init = x86_init_noop;
pm_power_off = intel_mid_power_off; pm_power_off = intel_mid_power_off;
machine_ops.emergency_restart = intel_mid_reboot; machine_ops.emergency_restart = intel_mid_reboot;
......
...@@ -50,7 +50,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd) ...@@ -50,7 +50,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
{ {
pmd_t *pmd; pmd_t *pmd;
pud_t *pud; pud_t *pud;
p4d_t *p4d; p4d_t *p4d = NULL;
/* /*
* The new mapping only has to cover the page containing the image * The new mapping only has to cover the page containing the image
...@@ -66,7 +66,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd) ...@@ -66,7 +66,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
* tables used by the image kernel. * tables used by the image kernel.
*/ */
if (IS_ENABLED(CONFIG_X86_5LEVEL)) { if (pgtable_l5_enabled) {
p4d = (p4d_t *)get_safe_page(GFP_ATOMIC); p4d = (p4d_t *)get_safe_page(GFP_ATOMIC);
if (!p4d) if (!p4d)
return -ENOMEM; return -ENOMEM;
...@@ -84,7 +84,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd) ...@@ -84,7 +84,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
__pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC)); __pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
set_pud(pud + pud_index(restore_jump_address), set_pud(pud + pud_index(restore_jump_address),
__pud(__pa(pmd) | _KERNPG_TABLE)); __pud(__pa(pmd) | _KERNPG_TABLE));
if (IS_ENABLED(CONFIG_X86_5LEVEL)) { if (p4d) {
set_p4d(p4d + p4d_index(restore_jump_address), __p4d(__pa(pud) | _KERNPG_TABLE)); set_p4d(p4d + p4d_index(restore_jump_address), __p4d(__pa(pud) | _KERNPG_TABLE));
set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(p4d) | _KERNPG_TABLE)); set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(p4d) | _KERNPG_TABLE));
} else { } else {
......
...@@ -18,9 +18,6 @@ config XEN_PV ...@@ -18,9 +18,6 @@ config XEN_PV
bool "Xen PV guest support" bool "Xen PV guest support"
default y default y
depends on XEN depends on XEN
# XEN_PV is not ready to work with 5-level paging.
# Changes to hypervisor are also required.
depends on !X86_5LEVEL
select XEN_HAVE_PVMMU select XEN_HAVE_PVMMU
select XEN_HAVE_VPMU select XEN_HAVE_VPMU
help help
...@@ -79,6 +76,4 @@ config XEN_DEBUG_FS ...@@ -79,6 +76,4 @@ config XEN_DEBUG_FS
config XEN_PVH config XEN_PVH
bool "Support for running as a PVH guest" bool "Support for running as a PVH guest"
depends on XEN && XEN_PVHVM && ACPI depends on XEN && XEN_PVHVM && ACPI
# Pre-built page tables are not ready to handle 5-level paging.
depends on !X86_5LEVEL
def_bool n def_bool n
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <asm/io_apic.h> #include <asm/io_apic.h>
#include <asm/hypervisor.h> #include <asm/hypervisor.h>
#include <asm/e820/api.h> #include <asm/e820/api.h>
#include <asm/x86_init.h>
#include <asm/xen/interface.h> #include <asm/xen/interface.h>
#include <asm/xen/hypercall.h> #include <asm/xen/hypercall.h>
...@@ -16,15 +17,20 @@ ...@@ -16,15 +17,20 @@
/* /*
* PVH variables. * PVH variables.
* *
* xen_pvh and pvh_bootparams need to live in data segment since they * xen_pvh pvh_bootparams and pvh_start_info need to live in data segment
* are used after startup_{32|64}, which clear .bss, are invoked. * since they are used after startup_{32|64}, which clear .bss, are invoked.
*/ */
bool xen_pvh __attribute__((section(".data"))) = 0; bool xen_pvh __attribute__((section(".data"))) = 0;
struct boot_params pvh_bootparams __attribute__((section(".data"))); struct boot_params pvh_bootparams __attribute__((section(".data")));
struct hvm_start_info pvh_start_info __attribute__((section(".data")));
struct hvm_start_info pvh_start_info;
unsigned int pvh_start_info_sz = sizeof(pvh_start_info); unsigned int pvh_start_info_sz = sizeof(pvh_start_info);
static u64 pvh_get_root_pointer(void)
{
return pvh_start_info.rsdp_paddr;
}
static void __init init_pvh_bootparams(void) static void __init init_pvh_bootparams(void)
{ {
struct xen_memory_map memmap; struct xen_memory_map memmap;
...@@ -71,6 +77,8 @@ static void __init init_pvh_bootparams(void) ...@@ -71,6 +77,8 @@ static void __init init_pvh_bootparams(void)
*/ */
pvh_bootparams.hdr.version = 0x212; pvh_bootparams.hdr.version = 0x212;
pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */ pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
x86_init.acpi.get_root_pointer = pvh_get_root_pointer;
} }
/* /*
......
...@@ -538,6 +538,22 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val) ...@@ -538,6 +538,22 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val)
xen_mc_issue(PARAVIRT_LAZY_MMU); xen_mc_issue(PARAVIRT_LAZY_MMU);
} }
#if CONFIG_PGTABLE_LEVELS >= 5
__visible p4dval_t xen_p4d_val(p4d_t p4d)
{
return pte_mfn_to_pfn(p4d.p4d);
}
PV_CALLEE_SAVE_REGS_THUNK(xen_p4d_val);
__visible p4d_t xen_make_p4d(p4dval_t p4d)
{
p4d = pte_pfn_to_mfn(p4d);
return native_make_p4d(p4d);
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d);
#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_64 */
static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
...@@ -2411,6 +2427,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { ...@@ -2411,6 +2427,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.alloc_pud = xen_alloc_pmd_init, .alloc_pud = xen_alloc_pmd_init,
.release_pud = xen_release_pmd_init, .release_pud = xen_release_pmd_init,
#if CONFIG_PGTABLE_LEVELS >= 5
.p4d_val = PV_CALLEE_SAVE(xen_p4d_val),
.make_p4d = PV_CALLEE_SAVE(xen_make_p4d),
#endif
#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_64 */
.activate_mm = xen_activate_mm, .activate_mm = xen_activate_mm,
......
...@@ -189,12 +189,15 @@ early_param("acpi_rsdp", setup_acpi_rsdp); ...@@ -189,12 +189,15 @@ early_param("acpi_rsdp", setup_acpi_rsdp);
acpi_physical_address __init acpi_os_get_root_pointer(void) acpi_physical_address __init acpi_os_get_root_pointer(void)
{ {
acpi_physical_address pa = 0; acpi_physical_address pa;
#ifdef CONFIG_KEXEC #ifdef CONFIG_KEXEC
if (acpi_rsdp) if (acpi_rsdp)
return acpi_rsdp; return acpi_rsdp;
#endif #endif
pa = acpi_arch_get_root_pointer();
if (pa)
return pa;
if (efi_enabled(EFI_CONFIG_TABLES)) { if (efi_enabled(EFI_CONFIG_TABLES)) {
if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#define P4D_SHIFT PGDIR_SHIFT #define P4D_SHIFT PGDIR_SHIFT
#define P4D_SIZE PGDIR_SIZE #define P4D_SIZE PGDIR_SIZE
#define P4D_MASK PGDIR_MASK #define P4D_MASK PGDIR_MASK
#define MAX_PTRS_PER_P4D 1
#define PTRS_PER_P4D 1 #define PTRS_PER_P4D 1
#define p4d_t pgd_t #define p4d_t pgd_t
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
typedef struct { pgd_t pgd; } p4d_t; typedef struct { pgd_t pgd; } p4d_t;
#define P4D_SHIFT PGDIR_SHIFT #define P4D_SHIFT PGDIR_SHIFT
#define MAX_PTRS_PER_P4D 1
#define PTRS_PER_P4D 1 #define PTRS_PER_P4D 1
#define P4D_SIZE (1UL << P4D_SHIFT) #define P4D_SIZE (1UL << P4D_SHIFT)
#define P4D_MASK (~(P4D_SIZE-1)) #define P4D_MASK (~(P4D_SIZE-1))
......
...@@ -623,6 +623,13 @@ bool acpi_gtdt_c3stop(int type); ...@@ -623,6 +623,13 @@ bool acpi_gtdt_c3stop(int type);
int acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, int *timer_count); int acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, int *timer_count);
#endif #endif
#ifndef ACPI_HAVE_ARCH_GET_ROOT_POINTER
static inline u64 acpi_arch_get_root_pointer(void)
{
return 0;
}
#endif
#else /* !CONFIG_ACPI */ #else /* !CONFIG_ACPI */
#define acpi_disabled 1 #define acpi_disabled 1
......
...@@ -18,7 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE]; ...@@ -18,7 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE];
extern pte_t kasan_zero_pte[PTRS_PER_PTE]; extern pte_t kasan_zero_pte[PTRS_PER_PTE];
extern pmd_t kasan_zero_pmd[PTRS_PER_PMD]; extern pmd_t kasan_zero_pmd[PTRS_PER_PMD];
extern pud_t kasan_zero_pud[PTRS_PER_PUD]; extern pud_t kasan_zero_pud[PTRS_PER_PUD];
extern p4d_t kasan_zero_p4d[PTRS_PER_P4D]; extern p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D];
void kasan_populate_zero_shadow(const void *shadow_start, void kasan_populate_zero_shadow(const void *shadow_start,
const void *shadow_end); const void *shadow_end);
......
...@@ -816,10 +816,6 @@ int local_memory_node(int node_id); ...@@ -816,10 +816,6 @@ int local_memory_node(int node_id);
static inline int local_memory_node(int node_id) { return node_id; }; static inline int local_memory_node(int node_id) { return node_id; };
#endif #endif
#ifdef CONFIG_NEED_NODE_MEMMAP_SIZE
unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
#endif
/* /*
* zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc. * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc.
*/ */
...@@ -1289,7 +1285,6 @@ struct mminit_pfnnid_cache { ...@@ -1289,7 +1285,6 @@ struct mminit_pfnnid_cache {
#endif #endif
void memory_present(int nid, unsigned long start, unsigned long end); void memory_present(int nid, unsigned long start, unsigned long end);
unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
/* /*
* If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;
#if CONFIG_PGTABLE_LEVELS > 4 #if CONFIG_PGTABLE_LEVELS > 4
p4d_t kasan_zero_p4d[PTRS_PER_P4D] __page_aligned_bss; p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D] __page_aligned_bss;
#endif #endif
#if CONFIG_PGTABLE_LEVELS > 3 #if CONFIG_PGTABLE_LEVELS > 3
pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;
......
...@@ -235,28 +235,6 @@ void __init memory_present(int nid, unsigned long start, unsigned long end) ...@@ -235,28 +235,6 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
} }
} }
/*
* Only used by the i386 NUMA architecures, but relatively
* generic code.
*/
unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn,
unsigned long end_pfn)
{
unsigned long pfn;
unsigned long nr_pages = 0;
mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
if (nid != early_pfn_to_nid(pfn))
continue;
if (pfn_present(pfn))
nr_pages += PAGES_PER_SECTION;
}
return nr_pages * sizeof(struct page);
}
/* /*
* Subtle, we encode the real pfn into the mem_map such that * Subtle, we encode the real pfn into the mem_map such that
* the identity pfn - section_mem_map will return the actual * the identity pfn - section_mem_map will return the actual
......
...@@ -84,18 +84,19 @@ ...@@ -84,18 +84,19 @@
* This is made more complicated by various memory models and PAE. * This is made more complicated by various memory models and PAE.
*/ */
#ifndef MAX_PHYSMEM_BITS #ifndef MAX_POSSIBLE_PHYSMEM_BITS
#ifdef CONFIG_HIGHMEM64G #ifdef MAX_PHYSMEM_BITS
#define MAX_PHYSMEM_BITS 36 #define MAX_POSSIBLE_PHYSMEM_BITS MAX_PHYSMEM_BITS
#else /* !CONFIG_HIGHMEM64G */ #else
/* /*
* If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
* be PAGE_SHIFT * be PAGE_SHIFT
*/ */
#define MAX_PHYSMEM_BITS BITS_PER_LONG #define MAX_POSSIBLE_PHYSMEM_BITS BITS_PER_LONG
#endif #endif
#endif #endif
#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
#define _PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT)
/* /*
* Memory for allocating for handle keeps object position by * Memory for allocating for handle keeps object position by
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment