Commit 56b1069c authored by Alexander Gordeev's avatar Alexander Gordeev

s390/boot: Rework deployment of the kernel image

Rework deployment of kernel image for both compressed and
uncompressed variants as defined by CONFIG_KERNEL_UNCOMPRESSED
kernel configuration variable.

In case CONFIG_KERNEL_UNCOMPRESSED is disabled avoid uncompressing
the kernel to a temporary buffer and copying it to the target
address. Instead, uncompress it directly to the target destination.

In case CONFIG_KERNEL_UNCOMPRESSED is enabled avoid moving the
kernel to default 0x100000 location when KASLR is disabled or
failed. Instead, use the uncompressed kernel image directly.

In case KASLR is disabled or failed .amode31 section location in
memory is not randomized and precedes the kernel image. In case
CONFIG_KERNEL_UNCOMPRESSED is disabled that location overlaps the
area used by the decompression algorithm. That is fine, since that
area is not used after the decompression finished and the size of
.amode31 section is not expected to exceed BOOT_HEAP_SIZE ever.

There is no decompression in case CONFIG_KERNEL_UNCOMPRESSED is
enabled. Therefore, rename decompress_kernel() to deploy_kernel(),
which better describes both uncompressed and compressed cases.

Introduce AMODE31_SIZE macro to avoid immediate value of 0x3000
(the size of .amode31 section) in the decompressor linker script.
Modify the vmlinux linker script to force the size of .amode31
section to AMODE31_SIZE (the value of (_eamode31 - _samode31)
could otherwise differ as result of compiler options used).

Introduce __START_KERNEL macro that defines the kernel ELF image
entry point and set it to the currrent value of 0x100000.
Signed-off-by: default avatarAlexander Gordeev <agordeev@linux.ibm.com>
parent 54f2ecc3
...@@ -17,7 +17,6 @@ struct machine_info { ...@@ -17,7 +17,6 @@ struct machine_info {
}; };
struct vmlinux_info { struct vmlinux_info {
unsigned long default_lma;
unsigned long entry; unsigned long entry;
unsigned long image_size; /* does not include .bss */ unsigned long image_size; /* does not include .bss */
unsigned long bss_size; /* uncompressed image .bss size */ unsigned long bss_size; /* uncompressed image .bss size */
......
...@@ -63,24 +63,13 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE; ...@@ -63,24 +63,13 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE;
#include "../../../../lib/decompress_unzstd.c" #include "../../../../lib/decompress_unzstd.c"
#endif #endif
#define decompress_offset ALIGN((unsigned long)_end + BOOT_HEAP_SIZE, PAGE_SIZE)
unsigned long mem_safe_offset(void) unsigned long mem_safe_offset(void)
{ {
/* return ALIGN(free_mem_end_ptr, PAGE_SIZE);
* due to 4MB HEAD_SIZE for bzip2
* 'decompress_offset + vmlinux.image_size' could be larger than
* kernel at final position + its .bss, so take the larger of two
*/
return max(decompress_offset + vmlinux.image_size,
vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size);
} }
void *decompress_kernel(void) void deploy_kernel(void *output)
{ {
void *output = (void *)decompress_offset;
__decompress(_compressed_start, _compressed_end - _compressed_start, __decompress(_compressed_start, _compressed_end - _compressed_start,
NULL, NULL, output, vmlinux.image_size, NULL, error); NULL, NULL, output, vmlinux.image_size, NULL, error);
return output;
} }
...@@ -2,11 +2,9 @@ ...@@ -2,11 +2,9 @@
#ifndef BOOT_COMPRESSED_DECOMPRESSOR_H #ifndef BOOT_COMPRESSED_DECOMPRESSOR_H
#define BOOT_COMPRESSED_DECOMPRESSOR_H #define BOOT_COMPRESSED_DECOMPRESSOR_H
#ifdef CONFIG_KERNEL_UNCOMPRESSED #ifndef CONFIG_KERNEL_UNCOMPRESSED
static inline void *decompress_kernel(void) { return NULL; }
#else
void *decompress_kernel(void);
#endif
unsigned long mem_safe_offset(void); unsigned long mem_safe_offset(void);
void deploy_kernel(void *output);
#endif
#endif /* BOOT_COMPRESSED_DECOMPRESSOR_H */ #endif /* BOOT_COMPRESSED_DECOMPRESSOR_H */
...@@ -109,9 +109,19 @@ static void setup_lpp(void) ...@@ -109,9 +109,19 @@ static void setup_lpp(void)
} }
#ifdef CONFIG_KERNEL_UNCOMPRESSED #ifdef CONFIG_KERNEL_UNCOMPRESSED
unsigned long mem_safe_offset(void) static unsigned long mem_safe_offset(void)
{ {
return vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size; return (unsigned long)_compressed_start;
}
static void deploy_kernel(void *output)
{
void *uncompressed_start = (void *)_compressed_start;
if (output == uncompressed_start)
return;
memmove(output, uncompressed_start, vmlinux.image_size);
memset(uncompressed_start, 0, vmlinux.image_size);
} }
#endif #endif
...@@ -154,18 +164,18 @@ static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr, ...@@ -154,18 +164,18 @@ static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr,
rela_end = (Elf64_Rela *) vmlinux.rela_dyn_end; rela_end = (Elf64_Rela *) vmlinux.rela_dyn_end;
dynsym = (Elf64_Sym *) vmlinux.dynsym_start; dynsym = (Elf64_Sym *) vmlinux.dynsym_start;
for (rela = rela_start; rela < rela_end; rela++) { for (rela = rela_start; rela < rela_end; rela++) {
loc = rela->r_offset + phys_offset; loc = rela->r_offset + phys_offset - __START_KERNEL;
val = rela->r_addend; val = rela->r_addend;
r_sym = ELF64_R_SYM(rela->r_info); r_sym = ELF64_R_SYM(rela->r_info);
if (r_sym) { if (r_sym) {
if (dynsym[r_sym].st_shndx != SHN_UNDEF) if (dynsym[r_sym].st_shndx != SHN_UNDEF)
val += dynsym[r_sym].st_value + offset; val += dynsym[r_sym].st_value + offset - __START_KERNEL;
} else { } else {
/* /*
* 0 == undefined symbol table index (STN_UNDEF), * 0 == undefined symbol table index (SHN_UNDEF),
* used for R_390_RELATIVE, only add KASLR offset * used for R_390_RELATIVE, only add KASLR offset
*/ */
val += offset; val += offset - __START_KERNEL;
} }
r_type = ELF64_R_TYPE(rela->r_info); r_type = ELF64_R_TYPE(rela->r_info);
rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0); rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0);
...@@ -206,7 +216,7 @@ static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr, ...@@ -206,7 +216,7 @@ static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr,
loc = (long)*reloc + phys_offset; loc = (long)*reloc + phys_offset;
if (loc < min_addr || loc > max_addr) if (loc < min_addr || loc > max_addr)
error("64-bit relocation outside of kernel!\n"); error("64-bit relocation outside of kernel!\n");
*(u64 *)loc += offset; *(u64 *)loc += offset - __START_KERNEL;
} }
} }
...@@ -219,7 +229,7 @@ static void kaslr_adjust_got(unsigned long offset) ...@@ -219,7 +229,7 @@ static void kaslr_adjust_got(unsigned long offset)
* reason. Adjust the GOT entries. * reason. Adjust the GOT entries.
*/ */
for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++)
*entry += offset; *entry += offset - __START_KERNEL;
} }
#endif #endif
...@@ -294,6 +304,7 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) ...@@ -294,6 +304,7 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page); vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page);
/* choose kernel address space layout: 4 or 3 levels. */ /* choose kernel address space layout: 4 or 3 levels. */
BUILD_BUG_ON(!IS_ALIGNED(__START_KERNEL, THREAD_SIZE));
BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE)); BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE));
BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE); BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE);
vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE); vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE);
...@@ -383,9 +394,9 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) ...@@ -383,9 +394,9 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
/* /*
* This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's. * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's.
*/ */
static void clear_bss_section(unsigned long vmlinux_lma) static void clear_bss_section(unsigned long kernel_start)
{ {
memset((void *)vmlinux_lma + vmlinux.image_size, 0, vmlinux.bss_size); memset((void *)kernel_start + vmlinux.image_size, 0, vmlinux.bss_size);
} }
/* /*
...@@ -402,7 +413,7 @@ static void setup_vmalloc_size(void) ...@@ -402,7 +413,7 @@ static void setup_vmalloc_size(void)
vmalloc_size = max(size, vmalloc_size); vmalloc_size = max(size, vmalloc_size);
} }
static void kaslr_adjust_vmlinux_info(unsigned long offset) static void kaslr_adjust_vmlinux_info(long offset)
{ {
vmlinux.bootdata_off += offset; vmlinux.bootdata_off += offset;
vmlinux.bootdata_preserved_off += offset; vmlinux.bootdata_preserved_off += offset;
...@@ -426,24 +437,30 @@ static void kaslr_adjust_vmlinux_info(unsigned long offset) ...@@ -426,24 +437,30 @@ static void kaslr_adjust_vmlinux_info(unsigned long offset)
#endif #endif
} }
static void fixup_vmlinux_info(void)
{
vmlinux.entry -= __START_KERNEL;
kaslr_adjust_vmlinux_info(-__START_KERNEL);
}
void startup_kernel(void) void startup_kernel(void)
{ {
unsigned long max_physmem_end; unsigned long kernel_size = vmlinux.image_size + vmlinux.bss_size;
unsigned long vmlinux_lma = 0; unsigned long nokaslr_offset_phys = mem_safe_offset();
unsigned long amode31_lma = 0; unsigned long amode31_lma = 0;
unsigned long kernel_size; unsigned long max_physmem_end;
unsigned long asce_limit; unsigned long asce_limit;
unsigned long safe_addr; unsigned long safe_addr;
void *img;
psw_t psw; psw_t psw;
fixup_vmlinux_info();
setup_lpp(); setup_lpp();
safe_addr = mem_safe_offset(); safe_addr = PAGE_ALIGN(nokaslr_offset_phys + kernel_size);
/* /*
* Reserve decompressor memory together with decompression heap, buffer and * Reserve decompressor memory together with decompression heap,
* memory which might be occupied by uncompressed kernel at default 1Mb * buffer and memory which might be occupied by uncompressed kernel
* position (if KASLR is off or failed). * (if KASLR is off or failed).
*/ */
physmem_reserve(RR_DECOMPRESSOR, 0, safe_addr); physmem_reserve(RR_DECOMPRESSOR, 0, safe_addr);
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && parmarea.initrd_size) if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && parmarea.initrd_size)
...@@ -463,7 +480,6 @@ void startup_kernel(void) ...@@ -463,7 +480,6 @@ void startup_kernel(void)
max_physmem_end = detect_max_physmem_end(); max_physmem_end = detect_max_physmem_end();
setup_ident_map_size(max_physmem_end); setup_ident_map_size(max_physmem_end);
setup_vmalloc_size(); setup_vmalloc_size();
kernel_size = vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size;
asce_limit = setup_kernel_memory_layout(kernel_size); asce_limit = setup_kernel_memory_layout(kernel_size);
/* got final ident_map_size, physmem allocations could be performed now */ /* got final ident_map_size, physmem allocations could be performed now */
physmem_set_usable_limit(ident_map_size); physmem_set_usable_limit(ident_map_size);
...@@ -472,32 +488,20 @@ void startup_kernel(void) ...@@ -472,32 +488,20 @@ void startup_kernel(void)
rescue_initrd(safe_addr, ident_map_size); rescue_initrd(safe_addr, ident_map_size);
rescue_relocs(); rescue_relocs();
if (kaslr_enabled()) { if (kaslr_enabled())
vmlinux_lma = randomize_within_range(vmlinux.image_size + vmlinux.bss_size, __kaslr_offset_phys = randomize_within_range(kernel_size, THREAD_SIZE, 0, ident_map_size);
THREAD_SIZE, vmlinux.default_lma, if (!__kaslr_offset_phys)
ident_map_size); __kaslr_offset_phys = nokaslr_offset_phys;
if (vmlinux_lma) {
__kaslr_offset_phys = vmlinux_lma - vmlinux.default_lma;
kaslr_adjust_vmlinux_info(__kaslr_offset_phys); kaslr_adjust_vmlinux_info(__kaslr_offset_phys);
} physmem_reserve(RR_VMLINUX, __kaslr_offset_phys, kernel_size);
} deploy_kernel((void *)__kaslr_offset_phys);
vmlinux_lma = vmlinux_lma ?: vmlinux.default_lma;
physmem_reserve(RR_VMLINUX, vmlinux_lma, vmlinux.image_size + vmlinux.bss_size);
if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) {
img = decompress_kernel();
memmove((void *)vmlinux_lma, img, vmlinux.image_size);
} else if (__kaslr_offset_phys) {
img = (void *)vmlinux.default_lma;
memmove((void *)vmlinux_lma, img, vmlinux.image_size);
memset(img, 0, vmlinux.image_size);
}
/* vmlinux decompression is done, shrink reserved low memory */ /* vmlinux decompression is done, shrink reserved low memory */
physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end); physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end);
if (kaslr_enabled()) if (kaslr_enabled())
amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G); amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G);
amode31_lma = amode31_lma ?: vmlinux.default_lma - vmlinux.amode31_size; if (!amode31_lma)
amode31_lma = __kaslr_offset_phys - vmlinux.amode31_size;
physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size); physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size);
/* /*
...@@ -513,8 +517,8 @@ void startup_kernel(void) ...@@ -513,8 +517,8 @@ void startup_kernel(void)
* - copy_bootdata() must follow setup_vmem() to propagate changes * - copy_bootdata() must follow setup_vmem() to propagate changes
* to bootdata made by setup_vmem() * to bootdata made by setup_vmem()
*/ */
clear_bss_section(vmlinux_lma); clear_bss_section(__kaslr_offset_phys);
kaslr_adjust_relocs(vmlinux_lma, vmlinux_lma + vmlinux.image_size, kaslr_adjust_relocs(__kaslr_offset_phys, __kaslr_offset_phys + vmlinux.image_size,
__kaslr_offset, __kaslr_offset_phys); __kaslr_offset, __kaslr_offset_phys);
kaslr_adjust_got(__kaslr_offset); kaslr_adjust_got(__kaslr_offset);
free_relocs(); free_relocs();
......
...@@ -453,9 +453,6 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l ...@@ -453,9 +453,6 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l
* To allow prefixing the lowcore must be mapped with 4KB pages. * To allow prefixing the lowcore must be mapped with 4KB pages.
* To prevent creation of a large page at address 0 first map * To prevent creation of a large page at address 0 first map
* the lowcore and create the identity mapping only afterwards. * the lowcore and create the identity mapping only afterwards.
*
* Skip 0x100000 bytes for kernel pgtables, as per the linker script:
* . = 0x100000;
*/ */
pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT); pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT);
for_each_physmem_usable_range(i, &start, &end) { for_each_physmem_usable_range(i, &start, &end) {
...@@ -463,7 +460,7 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l ...@@ -463,7 +460,7 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l
(unsigned long)__identity_va(end), (unsigned long)__identity_va(end),
POPULATE_IDENTITY); POPULATE_IDENTITY);
} }
pgtable_populate(kernel_start + 0x100000, kernel_end, POPULATE_KERNEL); pgtable_populate(kernel_start, kernel_end, POPULATE_KERNEL);
pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT); pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT);
pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore),
POPULATE_ABS_LOWCORE); POPULATE_ABS_LOWCORE);
...@@ -471,7 +468,7 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l ...@@ -471,7 +468,7 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l
POPULATE_NONE); POPULATE_NONE);
memcpy_real_ptep = __identity_va(__virt_to_kpte(__memcpy_real_area)); memcpy_real_ptep = __identity_va(__virt_to_kpte(__memcpy_real_area));
kasan_populate_shadow(kernel_start + 0x100000, kernel_end); kasan_populate_shadow(kernel_start, kernel_end);
S390_lowcore.kernel_asce.val = swapper_pg_dir | asce_bits; S390_lowcore.kernel_asce.val = swapper_pg_dir | asce_bits;
S390_lowcore.user_asce = s390_invalid_asce; S390_lowcore.user_asce = s390_invalid_asce;
......
...@@ -100,7 +100,8 @@ SECTIONS ...@@ -100,7 +100,8 @@ SECTIONS
_decompressor_end = .; _decompressor_end = .;
#ifdef CONFIG_KERNEL_UNCOMPRESSED #ifdef CONFIG_KERNEL_UNCOMPRESSED
. = 0x100000; . = ALIGN(PAGE_SIZE);
. += AMODE31_SIZE; /* .amode31 section */
#else #else
. = ALIGN(8); . = ALIGN(8);
#endif #endif
......
...@@ -273,7 +273,10 @@ static inline unsigned long virt_to_pfn(const void *kaddr) ...@@ -273,7 +273,10 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
#include <asm-generic/memory_model.h> #include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h> #include <asm-generic/getorder.h>
#define AMODE31_SIZE (3 * PAGE_SIZE)
#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
#define __START_KERNEL 0x100000
#define __NO_KASLR_START_KERNEL CONFIG_KERNEL_IMAGE_BASE #define __NO_KASLR_START_KERNEL CONFIG_KERNEL_IMAGE_BASE
#define __NO_KASLR_END_KERNEL (__NO_KASLR_START_KERNEL + KERNEL_IMAGE_SIZE) #define __NO_KASLR_END_KERNEL (__NO_KASLR_START_KERNEL + KERNEL_IMAGE_SIZE)
......
...@@ -39,7 +39,7 @@ PHDRS { ...@@ -39,7 +39,7 @@ PHDRS {
SECTIONS SECTIONS
{ {
. = 0x100000; . = __START_KERNEL;
.text : { .text : {
_stext = .; /* Start of text section */ _stext = .; /* Start of text section */
_text = .; /* Text and read-only data */ _text = .; /* Text and read-only data */
...@@ -183,7 +183,7 @@ SECTIONS ...@@ -183,7 +183,7 @@ SECTIONS
.amode31.data : { .amode31.data : {
*(.amode31.data) *(.amode31.data)
} }
. = ALIGN(PAGE_SIZE); . = _samode31 + AMODE31_SIZE;
_eamode31 = .; _eamode31 = .;
/* early.c uses stsi, which requires page aligned data. */ /* early.c uses stsi, which requires page aligned data. */
...@@ -230,7 +230,6 @@ SECTIONS ...@@ -230,7 +230,6 @@ SECTIONS
* it should match struct vmlinux_info * it should match struct vmlinux_info
*/ */
.vmlinux.info 0 (INFO) : { .vmlinux.info 0 (INFO) : {
QUAD(_stext) /* default_lma */
QUAD(startup_continue) /* entry */ QUAD(startup_continue) /* entry */
QUAD(__bss_start - _stext) /* image_size */ QUAD(__bss_start - _stext) /* image_size */
QUAD(__bss_stop - __bss_start) /* bss_size */ QUAD(__bss_stop - __bss_start) /* bss_size */
......
...@@ -280,7 +280,7 @@ static int do_reloc(struct section *sec, Elf_Rel *rel) ...@@ -280,7 +280,7 @@ static int do_reloc(struct section *sec, Elf_Rel *rel)
case R_390_GOTOFF64: case R_390_GOTOFF64:
break; break;
case R_390_64: case R_390_64:
add_reloc(&relocs64, offset - (ehdr.e_entry - 0x100000)); add_reloc(&relocs64, offset - ehdr.e_entry);
break; break;
default: default:
die("Unsupported relocation type: %d\n", r_type); die("Unsupported relocation type: %d\n", r_type);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment