Commit 47624248 authored by Russell King's avatar Russell King

Merge branch 'xip_zdata' of http://git.linaro.org/people/nicolas.pitre/linux into devel-testing

This contains important fixes to the XIP linker script, some more linker
script cleanups, .bss clearing and .data copying speedups related to the
above, and an opt-in config option for XIP kernels that allows for
compressing .data in ROM that depend on those other patches to work
properly.
parents 2bd6bf03 ca8b5d97
...@@ -2005,6 +2005,17 @@ config XIP_PHYS_ADDR ...@@ -2005,6 +2005,17 @@ config XIP_PHYS_ADDR
be linked for and stored to. This address is dependent on your be linked for and stored to. This address is dependent on your
own flash usage. own flash usage.
config XIP_DEFLATED_DATA
bool "Store kernel .data section compressed in ROM"
depends on XIP_KERNEL
select ZLIB_INFLATE
help
Before the kernel is actually executed, its .data section has to be
copied to RAM from ROM. This option allows for storing that data
in compressed form and decompressed to RAM rather than merely being
copied, saving some precious ROM space. A possible drawback is a
slightly longer boot delay.
config KEXEC config KEXEC
bool "Kexec system call (EXPERIMENTAL)" bool "Kexec system call (EXPERIMENTAL)"
depends on (!SMP || PM_SLEEP_SMP) depends on (!SMP || PM_SLEEP_SMP)
......
...@@ -31,8 +31,19 @@ targets := Image zImage xipImage bootpImage uImage ...@@ -31,8 +31,19 @@ targets := Image zImage xipImage bootpImage uImage
ifeq ($(CONFIG_XIP_KERNEL),y) ifeq ($(CONFIG_XIP_KERNEL),y)
cmd_deflate_xip_data = $(CONFIG_SHELL) -c \
'$(srctree)/$(src)/deflate_xip_data.sh $< $@ || { rm -f $@; false; }'
ifeq ($(CONFIG_XIP_DEFLATED_DATA),y)
quiet_cmd_mkxip = XIPZ $@
cmd_mkxip = $(cmd_objcopy) && $(cmd_deflate_xip_data)
else
quiet_cmd_mkxip = $(quiet_cmd_objcopy)
cmd_mkxip = $(cmd_objcopy)
endif
$(obj)/xipImage: vmlinux FORCE $(obj)/xipImage: vmlinux FORCE
$(call if_changed,objcopy) $(call if_changed,mkxip)
@$(kecho) ' Physical Address of xipImage: $(CONFIG_XIP_PHYS_ADDR)' @$(kecho) ' Physical Address of xipImage: $(CONFIG_XIP_PHYS_ADDR)'
$(obj)/Image $(obj)/zImage: FORCE $(obj)/Image $(obj)/zImage: FORCE
......
#!/bin/sh
# XIP kernel .data segment compressor
#
# Created by: Nicolas Pitre, August 2017
# Copyright: (C) 2017 Linaro Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
# This script locates the start of the .data section in xipImage and
# substitutes it with a compressed version. The needed offsets are obtained
# from symbol addresses in vmlinux. It is expected that .data extends to
# the end of xipImage.
set -e
VMLINUX="$1"
XIPIMAGE="$2"
DD="dd status=none"
# Use "make V=1" to debug this script.
case "$KBUILD_VERBOSE" in
*1*)
set -x
;;
esac
sym_val() {
# extract hex value for symbol in $1
local val=$($NM "$VMLINUX" | sed -n "/ $1$/{s/ .*$//p;q}")
[ "$val" ] || { echo "can't find $1 in $VMLINUX" 1>&2; exit 1; }
# convert from hex to decimal
echo $((0x$val))
}
__data_loc=$(sym_val __data_loc)
_edata_loc=$(sym_val _edata_loc)
base_offset=$(sym_val _xiprom)
# convert to file based offsets
data_start=$(($__data_loc - $base_offset))
data_end=$(($_edata_loc - $base_offset))
# Make sure data occupies the last part of the file.
file_end=$(stat -c "%s" "$XIPIMAGE")
if [ "$file_end" != "$data_end" ]; then
printf "end of xipImage doesn't match with _edata_loc (%#x vs %#x)\n" \
$(($file_end + $base_offset)) $_edata_loc 2>&1
exit 1;
fi
# be ready to clean up
trap 'rm -f "$XIPIMAGE.tmp"' 0 1 2 3
# substitute the data section by a compressed version
$DD if="$XIPIMAGE" count=$data_start iflag=count_bytes of="$XIPIMAGE.tmp"
$DD if="$XIPIMAGE" skip=$data_start iflag=skip_bytes |
gzip -9 >> "$XIPIMAGE.tmp"
# replace kernel binary
mv -f "$XIPIMAGE.tmp" "$XIPIMAGE"
...@@ -87,6 +87,11 @@ head-y := head$(MMUEXT).o ...@@ -87,6 +87,11 @@ head-y := head$(MMUEXT).o
obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_DEBUG_LL) += debug.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
# This is executed very early using a temporary stack when no memory allocator
# nor global data is available. Everything has to be allocated on the stack.
CFLAGS_head-inflate-data.o := $(call cc-option,-Wframe-larger-than=10240)
obj-$(CONFIG_XIP_DEFLATED_DATA) += head-inflate-data.o
obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o
AFLAGS_hyp-stub.o :=-Wa,-march=armv7-a AFLAGS_hyp-stub.o :=-Wa,-march=armv7-a
ifeq ($(CONFIG_ARM_PSCI),y) ifeq ($(CONFIG_ARM_PSCI),y)
......
...@@ -79,47 +79,68 @@ ENDPROC(__vet_atags) ...@@ -79,47 +79,68 @@ ENDPROC(__vet_atags)
*/ */
__INIT __INIT
__mmap_switched: __mmap_switched:
adr r3, __mmap_switched_data
mov r7, r1
ldmia r3!, {r4, r5, r6, r7} mov r8, r2
cmp r4, r5 @ Copy data segment if needed mov r10, r0
1: cmpne r5, r6
ldrne fp, [r4], #4 adr r4, __mmap_switched_data
strne fp, [r5], #4 mov fp, #0
bne 1b
#if defined(CONFIG_XIP_DEFLATED_DATA)
mov fp, #0 @ Clear BSS (and zero fp) ARM( ldr sp, [r4], #4 )
1: cmp r6, r7 THUMB( ldr sp, [r4] )
strcc fp, [r6],#4 THUMB( add r4, #4 )
bcc 1b bl __inflate_kernel_data @ decompress .data to RAM
teq r0, #0
ARM( ldmia r3, {r4, r5, r6, r7, sp}) bne __error
THUMB( ldmia r3, {r4, r5, r6, r7} ) #elif defined(CONFIG_XIP_KERNEL)
THUMB( ldr sp, [r3, #16] ) ARM( ldmia r4!, {r0, r1, r2, sp} )
str r9, [r4] @ Save processor ID THUMB( ldmia r4!, {r0, r1, r2, r3} )
str r1, [r5] @ Save machine type THUMB( mov sp, r3 )
str r2, [r6] @ Save atags pointer sub r2, r2, r1
cmp r7, #0 bl memcpy @ copy .data to RAM
strne r0, [r7] @ Save control register values #endif
ARM( ldmia r4!, {r0, r1, sp} )
THUMB( ldmia r4!, {r0, r1, r3} )
THUMB( mov sp, r3 )
sub r1, r1, r0
bl __memzero @ clear .bss
ldmia r4, {r0, r1, r2, r3}
str r9, [r0] @ Save processor ID
str r7, [r1] @ Save machine type
str r8, [r2] @ Save atags pointer
cmp r3, #0
strne r10, [r3] @ Save control register values
b start_kernel b start_kernel
ENDPROC(__mmap_switched) ENDPROC(__mmap_switched)
.align 2 .align 2
.type __mmap_switched_data, %object .type __mmap_switched_data, %object
__mmap_switched_data: __mmap_switched_data:
.long __data_loc @ r4 #ifdef CONFIG_XIP_KERNEL
.long _sdata @ r5 #ifndef CONFIG_XIP_DEFLATED_DATA
.long __bss_start @ r6 .long _sdata @ r0
.long _end @ r7 .long __data_loc @ r1
.long processor_id @ r4 .long _edata_loc @ r2
.long __machine_arch_type @ r5 #endif
.long __atags_pointer @ r6 .long __bss_stop @ sp (temporary stack in .bss)
#endif
.long __bss_start @ r0
.long __bss_stop @ r1
.long init_thread_union + THREAD_START_SP @ sp
.long processor_id @ r0
.long __machine_arch_type @ r1
.long __atags_pointer @ r2
#ifdef CONFIG_CPU_CP15 #ifdef CONFIG_CPU_CP15
.long cr_alignment @ r7 .long cr_alignment @ r3
#else #else
.long 0 @ r7 .long 0 @ r3
#endif #endif
.long init_thread_union + THREAD_START_SP @ sp
.size __mmap_switched_data, . - __mmap_switched_data .size __mmap_switched_data, . - __mmap_switched_data
/* /*
......
/*
* XIP kernel .data segment decompressor
*
* Created by: Nicolas Pitre, August 2017
* Copyright: (C) 2017 Linaro Limited
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/init.h>
#include <linux/zutil.h>
/* for struct inflate_state */
#include "../../../lib/zlib_inflate/inftrees.h"
#include "../../../lib/zlib_inflate/inflate.h"
#include "../../../lib/zlib_inflate/infutil.h"
extern char __data_loc[];
extern char _edata_loc[];
extern char _sdata[];
/*
* This code is called very early during the boot process to decompress
* the .data segment stored compressed in ROM. Therefore none of the global
* variables are valid yet, hence no kernel services such as memory
* allocation is available. Everything must be allocated on the stack and
* we must avoid any global data access. We use a temporary stack located
* in the .bss area. The linker script makes sure the .bss is big enough
* to hold our stack frame plus some room for called functions.
*
* We mimic the code in lib/decompress_inflate.c to use the smallest work
* area possible. And because everything is statically allocated on the
* stack then there is no need to clean up before returning.
*/
int __init __inflate_kernel_data(void)
{
struct z_stream_s stream, *strm = &stream;
struct inflate_state state;
char *in = __data_loc;
int rc;
/* Check and skip gzip header (assume no filename) */
if (in[0] != 0x1f || in[1] != 0x8b || in[2] != 0x08 || in[3] & ~3)
return -1;
in += 10;
strm->workspace = &state;
strm->next_in = in;
strm->avail_in = _edata_loc - __data_loc; /* upper bound */
strm->next_out = _sdata;
strm->avail_out = _edata_loc - __data_loc;
zlib_inflateInit2(strm, -MAX_WBITS);
WS(strm)->inflate_state.wsize = 0;
WS(strm)->inflate_state.window = NULL;
rc = zlib_inflate(strm, Z_FINISH);
if (rc == Z_OK || rc == Z_STREAM_END)
rc = strm->avail_out; /* should be 0 */
return rc;
}
...@@ -77,9 +77,7 @@ SECTIONS ...@@ -77,9 +77,7 @@ SECTIONS
*(.text.fixup) *(.text.fixup)
*(__ex_table) *(__ex_table)
#endif #endif
#ifndef CONFIG_SMP_ON_UP
*(.alt.smp.init) *(.alt.smp.init)
#endif
*(.discard) *(.discard)
*(.discard.*) *(.discard.*)
} }
...@@ -181,19 +179,7 @@ SECTIONS ...@@ -181,19 +179,7 @@ SECTIONS
*(.taglist.init) *(.taglist.init)
__tagtable_end = .; __tagtable_end = .;
} }
#ifdef CONFIG_SMP_ON_UP .init.rodata : {
.init.smpalt : {
__smpalt_begin = .;
*(.alt.smp.init)
__smpalt_end = .;
}
#endif
.init.pv_table : {
__pv_table_begin = .;
*(.pv_table)
__pv_table_end = .;
}
.init.data : {
INIT_SETUP(16) INIT_SETUP(16)
INIT_CALLS INIT_CALLS
CON_INITCALL CON_INITCALL
...@@ -201,48 +187,46 @@ SECTIONS ...@@ -201,48 +187,46 @@ SECTIONS
INIT_RAM_FS INIT_RAM_FS
} }
#ifdef CONFIG_SMP
PERCPU_SECTION(L1_CACHE_BYTES)
#endif
_exiprom = .; /* End of XIP ROM area */ _exiprom = .; /* End of XIP ROM area */
__data_loc = ALIGN(4); /* location in binary */
. = PAGE_OFFSET + TEXT_OFFSET;
.data : AT(__data_loc) {
_data = .; /* address in memory */
_sdata = .;
/* /*
* first, the init task union, aligned * From this point, stuff is considered writable and will be copied to RAM
* to an 8192 byte boundary. */
*/ __data_loc = ALIGN(4); /* location in file */
INIT_TASK_DATA(THREAD_SIZE) . = PAGE_OFFSET + TEXT_OFFSET; /* location in memory */
#undef LOAD_OFFSET
#define LOAD_OFFSET (PAGE_OFFSET + TEXT_OFFSET - __data_loc)
. = ALIGN(THREAD_SIZE);
_sdata = .;
RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
.data.ro_after_init : AT(ADDR(.data.ro_after_init) - LOAD_OFFSET) {
*(.data..ro_after_init)
}
_edata = .;
. = ALIGN(PAGE_SIZE); . = ALIGN(PAGE_SIZE);
__init_begin = .; __init_begin = .;
.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
INIT_DATA INIT_DATA
}
.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
ARM_EXIT_KEEP(EXIT_DATA) ARM_EXIT_KEEP(EXIT_DATA)
. = ALIGN(PAGE_SIZE);
__init_end = .;
*(.data..ro_after_init)
NOSAVE_DATA
CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
READ_MOSTLY_DATA(L1_CACHE_BYTES)
/*
* and the usual data section
*/
DATA_DATA
CONSTRUCTORS
_edata = .;
} }
_edata_loc = __data_loc + SIZEOF(.data); #ifdef CONFIG_SMP
PERCPU_SECTION(L1_CACHE_BYTES)
#endif
/*
* End of copied data. We need a dummy section to get its LMA.
* Also located before final ALIGN() as trailing padding is not stored
* in the resulting binary file and useless to copy.
*/
.data.endmark : AT(ADDR(.data.endmark) - LOAD_OFFSET) { }
_edata_loc = LOADADDR(.data.endmark);
BUG_TABLE . = ALIGN(PAGE_SIZE);
__init_end = .;
#ifdef CONFIG_HAVE_TCM #ifdef CONFIG_HAVE_TCM
/* /*
...@@ -301,7 +285,7 @@ SECTIONS ...@@ -301,7 +285,7 @@ SECTIONS
} }
#endif #endif
BSS_SECTION(0, 0, 0) BSS_SECTION(0, 0, 8)
_end = .; _end = .;
STABS_DEBUG STABS_DEBUG
...@@ -322,3 +306,11 @@ ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined") ...@@ -322,3 +306,11 @@ ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
*/ */
ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE, ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE,
"HYP init code too big or misaligned") "HYP init code too big or misaligned")
#ifdef CONFIG_XIP_DEFLATED_DATA
/*
* The .bss is used as a stack area for __inflate_kernel_data() whose stack
* frame is 9568 bytes. Make sure it has extra room left.
*/
ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA")
#endif
...@@ -214,14 +214,9 @@ SECTIONS ...@@ -214,14 +214,9 @@ SECTIONS
*(.pv_table) *(.pv_table)
__pv_table_end = .; __pv_table_end = .;
} }
.init.data : {
INIT_DATA INIT_DATA_SECTION(16)
INIT_SETUP(16)
INIT_CALLS
CON_INITCALL
SECURITY_INITCALL
INIT_RAM_FS
}
.exit.data : { .exit.data : {
ARM_EXIT_KEEP(EXIT_DATA) ARM_EXIT_KEEP(EXIT_DATA)
} }
...@@ -236,33 +231,10 @@ SECTIONS ...@@ -236,33 +231,10 @@ SECTIONS
. = ALIGN(THREAD_SIZE); . = ALIGN(THREAD_SIZE);
#endif #endif
__init_end = .; __init_end = .;
__data_loc = .;
.data : AT(__data_loc) {
_data = .; /* address in memory */
_sdata = .;
/*
* first, the init task union, aligned
* to an 8192 byte boundary.
*/
INIT_TASK_DATA(THREAD_SIZE)
NOSAVE_DATA
CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
READ_MOSTLY_DATA(L1_CACHE_BYTES)
/*
* and the usual data section
*/
DATA_DATA
CONSTRUCTORS
_edata = .;
}
_edata_loc = __data_loc + SIZEOF(.data);
BUG_TABLE _sdata = .;
RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
_edata = .;
#ifdef CONFIG_HAVE_TCM #ifdef CONFIG_HAVE_TCM
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment