Commit 79db4d22 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'clang-lto-v5.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux

Pull clang LTO updates from Kees Cook:
 "Clang Link Time Optimization.

  This is built on the work done preparing for LTO by arm64 folks,
  tracing folks, etc. This includes the core changes as well as the
  remaining pieces for arm64 (LTO has been the default build method on
  Android for about 3 years now, as it is the prerequisite for the
  Control Flow Integrity protections).

  While x86 LTO enablement is done, it depends on some pending objtool
  clean-ups. It's possible that I'll send a "part 2" pull request for
  LTO that includes x86 support.

  For merge log posterity, and as detailed in commit dc5723b0
  ("kbuild: add support for Clang LTO"), here is the lt;dr to do an LTO
  build:

        make LLVM=1 LLVM_IAS=1 defconfig
        scripts/config -e LTO_CLANG_THIN
        make LLVM=1 LLVM_IAS=1

  (To do a cross-compile of arm64, add "CROSS_COMPILE=aarch64-linux-gnu-"
  and "ARCH=arm64" to the "make" command lines.)

  Summary:

   - Clang LTO build infrastructure and arm64-specific enablement (Sami
     Tolvanen)

   - Recursive build CC_FLAGS_LTO fix (Alexander Lobakin)"

* tag 'clang-lto-v5.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
  kbuild: prevent CC_FLAGS_LTO self-bloating on recursive rebuilds
  arm64: allow LTO to be selected
  arm64: disable recordmcount with DYNAMIC_FTRACE_WITH_REGS
  arm64: vdso: disable LTO
  drivers/misc/lkdtm: disable LTO for rodata.o
  efi/libstub: disable LTO
  scripts/mod: disable LTO for empty.c
  modpost: lto: strip .lto from module names
  PCI: Fix PREL32 relocations for LTO
  init: lto: fix PREL32 relocations
  init: lto: ensure initcall ordering
  kbuild: lto: add a default list of used symbols
  kbuild: lto: merge module sections
  kbuild: lto: limit inlining
  kbuild: lto: fix module versioning
  kbuild: add support for Clang LTO
  tracing: move function tracer options to Kconfig
parents 3b9cdafb 2b868952
......@@ -42,6 +42,7 @@
*.so.dbg
*.su
*.symtypes
*.symversions
*.tab.[ch]
*.tar
*.xz
......
......@@ -853,12 +853,8 @@ KBUILD_CFLAGS += $(DEBUG_CFLAGS)
export DEBUG_CFLAGS
ifdef CONFIG_FUNCTION_TRACER
ifdef CONFIG_FTRACE_MCOUNT_RECORD
# gcc 5 supports generating the mcount tables directly
ifeq ($(call cc-option-yn,-mrecord-mcount),y)
CC_FLAGS_FTRACE += -mrecord-mcount
export CC_USING_RECORD_MCOUNT := 1
endif
ifdef CONFIG_FTRACE_MCOUNT_USE_CC
CC_FLAGS_FTRACE += -mrecord-mcount
ifdef CONFIG_HAVE_NOP_MCOUNT
ifeq ($(call cc-option-yn, -mnop-mcount),y)
CC_FLAGS_FTRACE += -mnop-mcount
......@@ -866,6 +862,12 @@ ifdef CONFIG_FTRACE_MCOUNT_RECORD
endif
endif
endif
ifdef CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
ifdef CONFIG_HAVE_C_RECORDMCOUNT
BUILD_C_RECORDMCOUNT := y
export BUILD_C_RECORDMCOUNT
endif
endif
ifdef CONFIG_HAVE_FENTRY
ifeq ($(call cc-option-yn, -mfentry),y)
CC_FLAGS_FTRACE += -mfentry
......@@ -875,12 +877,6 @@ endif
export CC_FLAGS_FTRACE
KBUILD_CFLAGS += $(CC_FLAGS_FTRACE) $(CC_FLAGS_USING)
KBUILD_AFLAGS += $(CC_FLAGS_USING)
ifdef CONFIG_DYNAMIC_FTRACE
ifdef CONFIG_HAVE_C_RECORDMCOUNT
BUILD_C_RECORDMCOUNT := y
export BUILD_C_RECORDMCOUNT
endif
endif
endif
# We trigger additional mismatches with less inlining
......@@ -899,6 +895,24 @@ KBUILD_CFLAGS += $(CC_FLAGS_SCS)
export CC_FLAGS_SCS
endif
ifdef CONFIG_LTO_CLANG
ifdef CONFIG_LTO_CLANG_THIN
CC_FLAGS_LTO := -flto=thin -fsplit-lto-unit
KBUILD_LDFLAGS += --thinlto-cache-dir=$(extmod-prefix).thinlto-cache
else
CC_FLAGS_LTO := -flto
endif
CC_FLAGS_LTO += -fvisibility=hidden
# Limit inlining across translation units to reduce binary size
KBUILD_LDFLAGS += -mllvm -import-instr-limit=5
endif
ifdef CONFIG_LTO
KBUILD_CFLAGS += $(CC_FLAGS_LTO)
export CC_FLAGS_LTO
endif
ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_32B
KBUILD_CFLAGS += -falign-functions=32
endif
......@@ -1493,7 +1507,7 @@ MRPROPER_FILES += include/config include/generated \
*.spec
# Directories & files removed with 'make distclean'
DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS
DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS .thinlto-cache
# clean - Delete most, but leave enough to build external modules
#
......@@ -1739,7 +1753,7 @@ PHONY += compile_commands.json
clean-dirs := $(KBUILD_EXTMOD)
clean: rm-files := $(KBUILD_EXTMOD)/Module.symvers $(KBUILD_EXTMOD)/modules.nsdeps \
$(KBUILD_EXTMOD)/compile_commands.json
$(KBUILD_EXTMOD)/compile_commands.json $(KBUILD_EXTMOD)/.thinlto-cache
PHONY += help
help:
......@@ -1836,7 +1850,8 @@ clean: $(clean-dirs)
-o -name '.tmp_*.o.*' \
-o -name '*.c.[012]*.*' \
-o -name '*.ll' \
-o -name '*.gcno' \) -type f -print | xargs rm -f
-o -name '*.gcno' \
-o -name '*.*.symversions' \) -type f -print | xargs rm -f
# Generate tags for editors
# ---------------------------------------------------------------------------
......
......@@ -603,6 +603,96 @@ config SHADOW_CALL_STACK
reading and writing arbitrary memory may be able to locate them
and hijack control flow by modifying the stacks.
config LTO
bool
help
Selected if the kernel will be built using the compiler's LTO feature.
config LTO_CLANG
bool
select LTO
help
Selected if the kernel will be built using Clang's LTO feature.
config ARCH_SUPPORTS_LTO_CLANG
bool
help
An architecture should select this option if it supports:
- compiling with Clang,
- compiling inline assembly with Clang's integrated assembler,
- and linking with LLD.
config ARCH_SUPPORTS_LTO_CLANG_THIN
bool
help
An architecture should select this option if it can support Clang's
ThinLTO mode.
config HAS_LTO_CLANG
def_bool y
# Clang >= 11: https://github.com/ClangBuiltLinux/linux/issues/510
depends on CC_IS_CLANG && CLANG_VERSION >= 110000 && LD_IS_LLD
depends on $(success,test $(LLVM) -eq 1)
depends on $(success,test $(LLVM_IAS) -eq 1)
depends on $(success,$(NM) --help | head -n 1 | grep -qi llvm)
depends on $(success,$(AR) --help | head -n 1 | grep -qi llvm)
depends on ARCH_SUPPORTS_LTO_CLANG
depends on !FTRACE_MCOUNT_USE_RECORDMCOUNT
depends on !KASAN
depends on !GCOV_KERNEL
help
The compiler and Kconfig options support building with Clang's
LTO.
choice
prompt "Link Time Optimization (LTO)"
default LTO_NONE
help
This option enables Link Time Optimization (LTO), which allows the
compiler to optimize binaries globally.
If unsure, select LTO_NONE. Note that LTO is very resource-intensive
so it's disabled by default.
config LTO_NONE
bool "None"
help
Build the kernel normally, without Link Time Optimization (LTO).
config LTO_CLANG_FULL
bool "Clang Full LTO (EXPERIMENTAL)"
depends on HAS_LTO_CLANG
depends on !COMPILE_TEST
select LTO_CLANG
help
This option enables Clang's full Link Time Optimization (LTO), which
allows the compiler to optimize the kernel globally. If you enable
this option, the compiler generates LLVM bitcode instead of ELF
object files, and the actual compilation from bitcode happens at
the LTO link step, which may take several minutes depending on the
kernel configuration. More information can be found from LLVM's
documentation:
https://llvm.org/docs/LinkTimeOptimization.html
During link time, this option can use a large amount of RAM, and
may take much longer than the ThinLTO option.
config LTO_CLANG_THIN
bool "Clang ThinLTO (EXPERIMENTAL)"
depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN
select LTO_CLANG
help
This option enables Clang's ThinLTO, which allows for parallel
optimization and faster incremental compiles compared to the
CONFIG_LTO_CLANG_FULL option. More information can be found
from Clang's documentation:
https://clang.llvm.org/docs/ThinLTO.html
If unsure, say Y.
endchoice
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool
help
......
......@@ -73,6 +73,8 @@ config ARM64
select ARCH_SUPPORTS_DEBUG_PAGEALLOC
select ARCH_SUPPORTS_MEMORY_FAILURE
select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
select ARCH_SUPPORTS_LTO_CLANG if CPU_LITTLE_ENDIAN
select ARCH_SUPPORTS_LTO_CLANG_THIN
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
select ARCH_SUPPORTS_NUMA_BALANCING
......@@ -162,6 +164,8 @@ config ARM64
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS \
if $(cc-option,-fpatchable-function-entry=2)
select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \
if DYNAMIC_FTRACE_WITH_REGS
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
......
......@@ -29,7 +29,8 @@ ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS)
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) \
$(CC_FLAGS_LTO)
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
OBJECT_FILES_NON_STANDARD := y
......
......@@ -38,6 +38,8 @@ KBUILD_CFLAGS := $(cflags-y) -Os -DDISABLE_BRANCH_PROFILING \
# remove SCS flags from all objects in this directory
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
# disable LTO
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS))
GCOV_PROFILE := n
# Sanitizer runtimes are unavailable and cannot be linked here.
......
......@@ -16,6 +16,7 @@ lkdtm-$(CONFIG_PPC_BOOK3S_64) += powerpc.o
KASAN_SANITIZE_rodata.o := n
KASAN_SANITIZE_stackleak.o := n
KCOV_INSTRUMENT_rodata.o := n
CFLAGS_REMOVE_rodata.o += $(CC_FLAGS_LTO)
OBJCOPYFLAGS :=
OBJCOPYFLAGS_rodata_objcopy.o := \
......
......@@ -90,10 +90,13 @@
* .data. We don't want to pull in .data..other sections, which Linux
* has defined. Same for text and bss.
*
* With LTO_CLANG, the linker also splits sections by default, so we need
* these macros to combine the sections during the final link.
*
* RODATA_MAIN is not used because existing code already defines .rodata.x
* sections to be brought in with rodata.
*/
#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
#define TEXT_MAIN .text .text.[0-9a-zA-Z_]*
#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$Lubsan_*
#define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]*
......
......@@ -184,19 +184,80 @@ extern bool initcall_debug;
* as KEEP() in the linker script.
*/
/* Format: <modname>__<counter>_<line>_<fn> */
#define __initcall_id(fn) \
__PASTE(__KBUILD_MODNAME, \
__PASTE(__, \
__PASTE(__COUNTER__, \
__PASTE(_, \
__PASTE(__LINE__, \
__PASTE(_, fn))))))
/* Format: __<prefix>__<iid><id> */
#define __initcall_name(prefix, __iid, id) \
__PASTE(__, \
__PASTE(prefix, \
__PASTE(__, \
__PASTE(__iid, id))))
#ifdef CONFIG_LTO_CLANG
/*
* With LTO, the compiler doesn't necessarily obey link order for
* initcalls. In order to preserve the correct order, we add each
* variable into its own section and generate a linker script (in
* scripts/link-vmlinux.sh) to specify the order of the sections.
*/
#define __initcall_section(__sec, __iid) \
#__sec ".init.." #__iid
/*
* With LTO, the compiler can rename static functions to avoid
* global naming collisions. We use a global stub function for
* initcalls to create a stable symbol name whose address can be
* taken in inline assembly when PREL32 relocations are used.
*/
#define __initcall_stub(fn, __iid, id) \
__initcall_name(initstub, __iid, id)
#define __define_initcall_stub(__stub, fn) \
int __init __stub(void); \
int __init __stub(void) \
{ \
return fn(); \
} \
__ADDRESSABLE(__stub)
#else
#define __initcall_section(__sec, __iid) \
#__sec ".init"
#define __initcall_stub(fn, __iid, id) fn
#define __define_initcall_stub(__stub, fn) \
__ADDRESSABLE(fn)
#endif
#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
#define ___define_initcall(fn, id, __sec) \
__ADDRESSABLE(fn) \
asm(".section \"" #__sec ".init\", \"a\" \n" \
"__initcall_" #fn #id ": \n" \
".long " #fn " - . \n" \
#define ____define_initcall(fn, __stub, __name, __sec) \
__define_initcall_stub(__stub, fn) \
asm(".section \"" __sec "\", \"a\" \n" \
__stringify(__name) ": \n" \
".long " __stringify(__stub) " - . \n" \
".previous \n");
#else
#define ___define_initcall(fn, id, __sec) \
static initcall_t __initcall_##fn##id __used \
__attribute__((__section__(#__sec ".init"))) = fn;
#define ____define_initcall(fn, __unused, __name, __sec) \
static initcall_t __name __used \
__attribute__((__section__(__sec))) = fn;
#endif
#define __unique_initcall(fn, id, __sec, __iid) \
____define_initcall(fn, \
__initcall_stub(fn, __iid, id), \
__initcall_name(initcall, __iid, id), \
__initcall_section(__sec, __iid))
#define ___define_initcall(fn, id, __sec) \
__unique_initcall(fn, id, __sec, __initcall_id(fn))
#define __define_initcall(fn, id) ___define_initcall(fn, id, .initcall##id)
/*
......@@ -236,7 +297,7 @@ extern bool initcall_debug;
#define __exitcall(fn) \
static exitcall_t __exitcall_##fn __exit_call = fn
#define console_initcall(fn) ___define_initcall(fn,, .con_initcall)
#define console_initcall(fn) ___define_initcall(fn, con, .con_initcall)
struct obs_kernel_param {
const char *str;
......
......@@ -1926,7 +1926,7 @@ enum pci_fixup_pass {
};
#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
#define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \
#define ___DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \
class_shift, hook) \
__ADDRESSABLE(hook) \
asm(".section " #sec ", \"a\" \n" \
......@@ -1935,10 +1935,33 @@ enum pci_fixup_pass {
".long " #class ", " #class_shift " \n" \
".long " #hook " - . \n" \
".previous \n");
/*
* Clang's LTO may rename static functions in C, but has no way to
* handle such renamings when referenced from inline asm. To work
* around this, create global C stubs for these cases.
*/
#ifdef CONFIG_LTO_CLANG
#define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \
class_shift, hook, stub) \
void stub(struct pci_dev *dev); \
void stub(struct pci_dev *dev) \
{ \
hook(dev); \
} \
___DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \
class_shift, stub)
#else
#define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \
class_shift, hook, stub) \
___DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \
class_shift, hook)
#endif
#define DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \
class_shift, hook) \
__DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \
class_shift, hook)
class_shift, hook, __UNIQUE_ID(hook))
#else
/* Anonymous variables would be nice... */
#define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, class, \
......
......@@ -2307,6 +2307,7 @@ config TRIM_UNUSED_KSYMS
config UNUSED_KSYMS_WHITELIST
string "Whitelist of symbols to keep in ksymtab"
depends on TRIM_UNUSED_KSYMS
default "scripts/lto-used-symbollist.txt" if LTO_CLANG
help
By default, all unused exported symbols will be un-exported from the
build when TRIM_UNUSED_KSYMS is selected.
......
......@@ -602,6 +602,22 @@ config FTRACE_MCOUNT_RECORD
depends on DYNAMIC_FTRACE
depends on HAVE_FTRACE_MCOUNT_RECORD
config FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
bool
depends on FTRACE_MCOUNT_RECORD
config FTRACE_MCOUNT_USE_CC
def_bool y
depends on $(cc-option,-mrecord-mcount)
depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
depends on FTRACE_MCOUNT_RECORD
config FTRACE_MCOUNT_USE_RECORDMCOUNT
def_bool y
depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
depends on !FTRACE_MCOUNT_USE_CC
depends on FTRACE_MCOUNT_RECORD
config TRACING_MAP
bool
depends on ARCH_HAVE_NMI_SAFE_CMPXCHG
......
......@@ -111,7 +111,7 @@ endif
# ---------------------------------------------------------------------------
quiet_cmd_cc_s_c = CC $(quiet_modtag) $@
cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS), $(c_flags)) -fverbose-asm -S -o $@ $<
cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS) $(CC_FLAGS_LTO), $(c_flags)) -fverbose-asm -S -o $@ $<
$(obj)/%.s: $(src)/%.c FORCE
$(call if_changed_dep,cc_s_c)
......@@ -166,6 +166,15 @@ ifdef CONFIG_MODVERSIONS
# the actual value of the checksum generated by genksyms
# o remove .tmp_<file>.o to <file>.o
ifdef CONFIG_LTO_CLANG
# Generate .o.symversions files for each .o with exported symbols, and link these
# to the kernel and/or modules at the end.
cmd_modversions_c = \
if $(NM) $@ 2>/dev/null | grep -q __ksymtab; then \
$(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
> $@.symversions; \
fi;
else
cmd_modversions_c = \
if $(OBJDUMP) -h $@ | grep -q __ksymtab; then \
$(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \
......@@ -177,9 +186,9 @@ cmd_modversions_c = \
rm -f $(@D)/.tmp_$(@F:.o=.ver); \
fi
endif
endif
ifdef CONFIG_FTRACE_MCOUNT_RECORD
ifndef CC_USING_RECORD_MCOUNT
ifdef CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
# compiler will not generate __mcount_loc use recordmcount or recordmcount.pl
ifdef BUILD_C_RECORDMCOUNT
ifeq ("$(origin RECORDMCOUNT_WARN)", "command line")
......@@ -206,8 +215,7 @@ recordmcount_source := $(srctree)/scripts/recordmcount.pl
endif # BUILD_C_RECORDMCOUNT
cmd_record_mcount = $(if $(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags)), \
$(sub_cmd_record_mcount))
endif # CC_USING_RECORD_MCOUNT
endif # CONFIG_FTRACE_MCOUNT_RECORD
endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
ifdef CONFIG_STACK_VALIDATION
ifneq ($(SKIP_STACK_VALIDATION),1)
......@@ -388,6 +396,18 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler
$(subdir-builtin): $(obj)/%/built-in.a: $(obj)/% ;
$(subdir-modorder): $(obj)/%/modules.order: $(obj)/% ;
# combine symversions for later processing
quiet_cmd_update_lto_symversions = SYMVER $@
ifeq ($(CONFIG_LTO_CLANG) $(CONFIG_MODVERSIONS),y y)
cmd_update_lto_symversions = \
rm -f $@.symversions \
$(foreach n, $(filter-out FORCE,$^), \
$(if $(wildcard $(n).symversions), \
; cat $(n).symversions >> $@.symversions))
else
cmd_update_lto_symversions = echo >/dev/null
endif
#
# Rule to compile a set of .o files into one .a file (without symbol table)
#
......@@ -395,8 +415,11 @@ $(subdir-modorder): $(obj)/%/modules.order: $(obj)/% ;
quiet_cmd_ar_builtin = AR $@
cmd_ar_builtin = rm -f $@; $(AR) cDPrST $@ $(real-prereqs)
quiet_cmd_ar_and_symver = AR $@
cmd_ar_and_symver = $(cmd_update_lto_symversions); $(cmd_ar_builtin)
$(obj)/built-in.a: $(real-obj-y) FORCE
$(call if_changed,ar_builtin)
$(call if_changed,ar_and_symver)
#
# Rule to create modules.order file
......@@ -416,15 +439,26 @@ $(obj)/modules.order: $(obj-m) FORCE
#
# Rule to compile a set of .o files into one .a file (with symbol table)
#
quiet_cmd_ar_lib = AR $@
cmd_ar_lib = $(cmd_update_lto_symversions); $(cmd_ar)
$(obj)/lib.a: $(lib-y) FORCE
$(call if_changed,ar)
$(call if_changed,ar_lib)
# NOTE:
# Do not replace $(filter %.o,^) with $(real-prereqs). When a single object
# module is turned into a multi object module, $^ will contain header file
# dependencies recorded in the .*.cmd file.
ifdef CONFIG_LTO_CLANG
quiet_cmd_link_multi-m = AR [M] $@
cmd_link_multi-m = \
$(cmd_update_lto_symversions); \
rm -f $@; \
$(AR) cDPrsT $@ $(filter %.o,$^)
else
quiet_cmd_link_multi-m = LD [M] $@
cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(filter %.o,$^)
endif
$(multi-used-m): FORCE
$(call if_changed,link_multi-m)
......
......@@ -119,9 +119,11 @@ target-stem = $(basename $(patsubst $(obj)/%,%,$@))
# These flags are needed for modversions and compiling, so we define them here
# $(modname_flags) defines KBUILD_MODNAME as the name of the module it will
# end up in (or would, if it gets compiled in)
name-fix = $(call stringify,$(subst $(comma),_,$(subst -,_,$1)))
name-fix-token = $(subst $(comma),_,$(subst -,_,$1))
name-fix = $(call stringify,$(call name-fix-token,$1))
basename_flags = -DKBUILD_BASENAME=$(call name-fix,$(basetarget))
modname_flags = -DKBUILD_MODNAME=$(call name-fix,$(modname))
modname_flags = -DKBUILD_MODNAME=$(call name-fix,$(modname)) \
-D__KBUILD_MODNAME=kmod_$(call name-fix-token,$(modname))
modfile_flags = -DKBUILD_MODFILE=$(call stringify,$(modfile))
_c_flags = $(filter-out $(CFLAGS_REMOVE_$(target-stem).o), \
......
......@@ -30,6 +30,12 @@ quiet_cmd_cc_o_c = CC [M] $@
ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
ifdef CONFIG_LTO_CLANG
# With CONFIG_LTO_CLANG, reuse the object file we compiled for modpost to
# avoid a second slow LTO link
prelink-ext := .lto
endif
quiet_cmd_ld_ko_o = LD [M] $@
cmd_ld_ko_o = \
$(LD) -r $(KBUILD_LDFLAGS) \
......@@ -53,8 +59,9 @@ if_changed_except = $(if $(call newer_prereqs_except,$(2))$(cmd-check), \
$(cmd); \
printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd, @:)
# Re-generate module BTFs if either module's .ko or vmlinux changed
$(modules): %.ko: %.o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE
$(modules): %.ko: %$(prelink-ext).o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE
+$(call if_changed_except,ld_ko_o,vmlinux)
ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+$(if $(newer-prereqs),$(call cmd,btf_ko))
......
......@@ -43,6 +43,9 @@ __modpost:
include include/config/auto.conf
include scripts/Kbuild.include
# for ld_flags
include scripts/Makefile.lib
MODPOST = scripts/mod/modpost \
$(if $(CONFIG_MODVERSIONS),-m) \
$(if $(CONFIG_MODULE_SRCVERSION_ALL),-a) \
......@@ -102,12 +105,30 @@ $(input-symdump):
@echo >&2 'WARNING: Symbol version dump "$@" is missing.'
@echo >&2 ' Modules may not have dependencies or modversions.'
ifdef CONFIG_LTO_CLANG
# With CONFIG_LTO_CLANG, .o files might be LLVM bitcode, so we need to run
# LTO to compile them into native code before running modpost
prelink-ext := .lto
quiet_cmd_cc_lto_link_modules = LTO [M] $@
cmd_cc_lto_link_modules = \
$(LD) $(ld_flags) -r -o $@ \
$(shell [ -s $(@:.lto.o=.o.symversions) ] && \
echo -T $(@:.lto.o=.o.symversions)) \
--whole-archive $^
%.lto.o: %.o
$(call if_changed,cc_lto_link_modules)
endif
modules := $(sort $(shell cat $(MODORDER)))
# Read out modules.order to pass in modpost.
# Otherwise, allmodconfig would fail with "Argument list too long".
quiet_cmd_modpost = MODPOST $@
cmd_modpost = sed 's/ko$$/o/' $< | $(MODPOST) -T -
cmd_modpost = sed 's/\.ko$$/$(prelink-ext)\.o/' $< | $(MODPOST) -T -
$(output-symdump): $(MODORDER) $(input-symdump) FORCE
$(output-symdump): $(MODORDER) $(input-symdump) $(modules:.ko=$(prelink-ext).o) FORCE
$(call if_changed,modpost)
targets += $(output-symdump)
......
#!/usr/bin/env perl
# SPDX-License-Identifier: GPL-2.0
#
# Generates a linker script that specifies the correct initcall order.
#
# Copyright (C) 2019 Google LLC
use strict;
use warnings;
use IO::Handle;
use IO::Select;
use POSIX ":sys_wait_h";
my $nm = $ENV{'NM'} || die "$0: ERROR: NM not set?";
my $objtree = $ENV{'objtree'} || '.';
## currently active child processes
my $jobs = {}; # child process pid -> file handle
## results from child processes
my $results = {}; # object index -> [ { level, secname }, ... ]
## reads _NPROCESSORS_ONLN to determine the maximum number of processes to
## start
sub get_online_processors {
open(my $fh, "getconf _NPROCESSORS_ONLN 2>/dev/null |")
or die "$0: ERROR: failed to execute getconf: $!";
my $procs = <$fh>;
close($fh);
if (!($procs =~ /^\d+$/)) {
return 1;
}
return int($procs);
}
## writes results to the parent process
## format: <file index> <initcall level> <base initcall section name>
sub write_results {
my ($index, $initcalls) = @_;
# sort by the counter value to ensure the order of initcalls within
# each object file is correct
foreach my $counter (sort { $a <=> $b } keys(%{$initcalls})) {
my $level = $initcalls->{$counter}->{'level'};
# section name for the initcall function
my $secname = $initcalls->{$counter}->{'module'} . '__' .
$counter . '_' .
$initcalls->{$counter}->{'line'} . '_' .
$initcalls->{$counter}->{'function'};
print "$index $level $secname\n";
}
}
## reads a result line from a child process and adds it to the $results array
sub read_results{
my ($fh) = @_;
# each child prints out a full line w/ autoflush and exits after the
# last line, so even if buffered I/O blocks here, it shouldn't block
# very long
my $data = <$fh>;
if (!defined($data)) {
return 0;
}
chomp($data);
my ($index, $level, $secname) = $data =~
/^(\d+)\ ([^\ ]+)\ (.*)$/;
if (!defined($index) ||
!defined($level) ||
!defined($secname)) {
die "$0: ERROR: child process returned invalid data: $data\n";
}
$index = int($index);
if (!exists($results->{$index})) {
$results->{$index} = [];
}
push (@{$results->{$index}}, {
'level' => $level,
'secname' => $secname
});
return 1;
}
## finds initcalls from an object file or all object files in an archive, and
## writes results back to the parent process
sub find_initcalls {
my ($index, $file) = @_;
die "$0: ERROR: file $file doesn't exist?" if (! -f $file);
open(my $fh, "\"$nm\" --defined-only \"$file\" 2>/dev/null |")
or die "$0: ERROR: failed to execute \"$nm\": $!";
my $initcalls = {};
while (<$fh>) {
chomp;
# check for the start of a new object file (if processing an
# archive)
my ($path)= $_ =~ /^(.+)\:$/;
if (defined($path)) {
write_results($index, $initcalls);
$initcalls = {};
next;
}
# look for an initcall
my ($module, $counter, $line, $symbol) = $_ =~
/[a-z]\s+__initcall__(\S*)__(\d+)_(\d+)_(.*)$/;
if (!defined($module)) {
$module = ''
}
if (!defined($counter) ||
!defined($line) ||
!defined($symbol)) {
next;
}
# parse initcall level
my ($function, $level) = $symbol =~
/^(.*)((early|rootfs|con|[0-9])s?)$/;
die "$0: ERROR: invalid initcall name $symbol in $file($path)"
if (!defined($function) || !defined($level));
$initcalls->{$counter} = {
'module' => $module,
'line' => $line,
'function' => $function,
'level' => $level,
};
}
close($fh);
write_results($index, $initcalls);
}
## waits for any child process to complete, reads the results, and adds them to
## the $results array for later processing
sub wait_for_results {
my ($select) = @_;
my $pid = 0;
do {
# unblock children that may have a full write buffer
foreach my $fh ($select->can_read(0)) {
read_results($fh);
}
# check for children that have exited, read the remaining data
# from them, and clean up
$pid = waitpid(-1, WNOHANG);
if ($pid > 0) {
if (!exists($jobs->{$pid})) {
next;
}
my $fh = $jobs->{$pid};
$select->remove($fh);
while (read_results($fh)) {
# until eof
}
close($fh);
delete($jobs->{$pid});
}
} while ($pid > 0);
}
## forks a child to process each file passed in the command line and collects
## the results
sub process_files {
my $index = 0;
my $njobs = $ENV{'PARALLELISM'} || get_online_processors();
my $select = IO::Select->new();
while (my $file = shift(@ARGV)) {
# fork a child process and read it's stdout
my $pid = open(my $fh, '-|');
if (!defined($pid)) {
die "$0: ERROR: failed to fork: $!";
} elsif ($pid) {
# save the child process pid and the file handle
$select->add($fh);
$jobs->{$pid} = $fh;
} else {
# in the child process
STDOUT->autoflush(1);
find_initcalls($index, "$objtree/$file");
exit;
}
$index++;
# limit the number of children to $njobs
if (scalar(keys(%{$jobs})) >= $njobs) {
wait_for_results($select);
}
}
# wait for the remaining children to complete
while (scalar(keys(%{$jobs})) > 0) {
wait_for_results($select);
}
}
sub generate_initcall_lds() {
process_files();
my $sections = {}; # level -> [ secname, ...]
# sort results to retain link order and split to sections per
# initcall level
foreach my $index (sort { $a <=> $b } keys(%{$results})) {
foreach my $result (@{$results->{$index}}) {
my $level = $result->{'level'};
if (!exists($sections->{$level})) {
$sections->{$level} = [];
}
push(@{$sections->{$level}}, $result->{'secname'});
}
}
die "$0: ERROR: no initcalls?" if (!keys(%{$sections}));
# print out a linker script that defines the order of initcalls for
# each level
print "SECTIONS {\n";
foreach my $level (sort(keys(%{$sections}))) {
my $section;
if ($level eq 'con') {
$section = '.con_initcall.init';
} else {
$section = ".initcall${level}.init";
}
print "\t${section} : {\n";
foreach my $secname (@{$sections->{$level}}) {
print "\t\t*(${section}..${secname}) ;\n";
}
print "\t}\n";
}
print "}\n";
}
generate_initcall_lds();
......@@ -43,11 +43,37 @@ info()
fi
}
# Generate a linker script to ensure correct ordering of initcalls.
gen_initcalls()
{
info GEN .tmp_initcalls.lds
${PYTHON} ${srctree}/scripts/jobserver-exec \
${PERL} ${srctree}/scripts/generate_initcall_order.pl \
${KBUILD_VMLINUX_OBJS} ${KBUILD_VMLINUX_LIBS} \
> .tmp_initcalls.lds
}
# If CONFIG_LTO_CLANG is selected, collect generated symbol versions into
# .tmp_symversions.lds
gen_symversions()
{
info GEN .tmp_symversions.lds
rm -f .tmp_symversions.lds
for o in ${KBUILD_VMLINUX_OBJS} ${KBUILD_VMLINUX_LIBS}; do
if [ -f ${o}.symversions ]; then
cat ${o}.symversions >> .tmp_symversions.lds
fi
done
}
# Link of vmlinux.o used for section mismatch analysis
# ${1} output file
modpost_link()
{
local objects
local lds=""
objects="--whole-archive \
${KBUILD_VMLINUX_OBJS} \
......@@ -56,7 +82,23 @@ modpost_link()
${KBUILD_VMLINUX_LIBS} \
--end-group"
${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${objects}
if [ -n "${CONFIG_LTO_CLANG}" ]; then
gen_initcalls
lds="-T .tmp_initcalls.lds"
if [ -n "${CONFIG_MODVERSIONS}" ]; then
gen_symversions
lds="${lds} -T .tmp_symversions.lds"
fi
# This might take a while, so indicate that we're doing
# an LTO link
info LTO ${1}
else
info LD ${1}
fi
${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${lds} ${objects}
}
objtool_link()
......@@ -103,13 +145,22 @@ vmlinux_link()
fi
if [ "${SRCARCH}" != "um" ]; then
objects="--whole-archive \
${KBUILD_VMLINUX_OBJS} \
--no-whole-archive \
--start-group \
${KBUILD_VMLINUX_LIBS} \
--end-group \
${@}"
if [ -n "${CONFIG_LTO_CLANG}" ]; then
# Use vmlinux.o instead of performing the slow LTO
# link again.
objects="--whole-archive \
vmlinux.o \
--no-whole-archive \
${@}"
else
objects="--whole-archive \
${KBUILD_VMLINUX_OBJS} \
--no-whole-archive \
--start-group \
${KBUILD_VMLINUX_LIBS} \
--end-group \
${@}"
fi
${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} \
${strip_debug#-Wl,} \
......@@ -225,6 +276,8 @@ cleanup()
{
rm -f .btf.*
rm -f .tmp_System.map
rm -f .tmp_initcalls.lds
rm -f .tmp_symversions.lds
rm -f .tmp_vmlinux*
rm -f System.map
rm -f vmlinux
......@@ -274,7 +327,6 @@ fi;
${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init need-builtin=1
#link vmlinux.o
info LD vmlinux.o
modpost_link vmlinux.o
objtool_link vmlinux.o
......
memcpy
memmove
memset
__stack_chk_fail
__stack_chk_guard
# SPDX-License-Identifier: GPL-2.0
OBJECT_FILES_NON_STANDARD := y
CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO)
hostprogs-always-y += modpost mk_elfconfig
always-y += empty.o
......
......@@ -17,7 +17,6 @@
#include <ctype.h>
#include <string.h>
#include <limits.h>
#include <stdbool.h>
#include <errno.h>
#include "modpost.h"
#include "../../include/linux/license.h"
......@@ -84,14 +83,6 @@ modpost_log(enum loglevel loglevel, const char *fmt, ...)
error_occurred = true;
}
static inline bool strends(const char *str, const char *postfix)
{
if (strlen(str) < strlen(postfix))
return false;
return strcmp(str + strlen(str) - strlen(postfix), postfix) == 0;
}
void *do_nofail(void *ptr, const char *expr)
{
if (!ptr)
......@@ -1988,6 +1979,10 @@ static char *remove_dot(char *s)
size_t m = strspn(s + n + 1, "0123456789");
if (m && (s[n + m] == '.' || s[n + m] == 0))
s[n] = 0;
/* strip trailing .lto */
if (strends(s, ".lto"))
s[strlen(s) - 4] = '\0';
}
return s;
}
......@@ -2011,6 +2006,9 @@ static void read_symbols(const char *modname)
/* strip trailing .o */
tmp = NOFAIL(strdup(modname));
tmp[strlen(tmp) - 2] = '\0';
/* strip trailing .lto */
if (strends(tmp, ".lto"))
tmp[strlen(tmp) - 4] = '\0';
mod = new_module(tmp);
free(tmp);
}
......
......@@ -2,6 +2,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <stdbool.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
......@@ -180,6 +181,14 @@ static inline unsigned int get_secindex(const struct elf_info *info,
return info->symtab_shndx_start[sym - info->symtab_start];
}
static inline bool strends(const char *str, const char *postfix)
{
if (strlen(str) < strlen(postfix))
return false;
return strcmp(str + strlen(str) - strlen(postfix), postfix) == 0;
}
/* file2alias.c */
extern unsigned int cross_build;
void handle_moddevtable(struct module *mod, struct elf_info *info,
......
......@@ -391,10 +391,14 @@ void get_src_version(const char *modname, char sum[], unsigned sumlen)
struct md4_ctx md;
char *fname;
char filelist[PATH_MAX + 1];
int postfix_len = 1;
if (strends(modname, ".lto.o"))
postfix_len = 5;
/* objects for a module are listed in the first line of *.mod file. */
snprintf(filelist, sizeof(filelist), "%.*smod",
(int)strlen(modname) - 1, modname);
(int)strlen(modname) - postfix_len, modname);
buf = read_text_file(filelist);
......
......@@ -23,6 +23,30 @@ SECTIONS {
.init_array 0 : ALIGN(8) { *(SORT(.init_array.*)) *(.init_array) }
__jump_table 0 : ALIGN(8) { KEEP(*(__jump_table)) }
__patchable_function_entries : { *(__patchable_function_entries) }
/*
* With CONFIG_LTO_CLANG, LLD always enables -fdata-sections and
* -ffunction-sections, which increases the size of the final module.
* Merge the split sections in the final binary.
*/
.bss : {
*(.bss .bss.[0-9a-zA-Z_]*)
*(.bss..L*)
}
.data : {
*(.data .data.[0-9a-zA-Z_]*)
*(.data..L*)
}
.rodata : {
*(.rodata .rodata.[0-9a-zA-Z_]*)
*(.rodata..L*)
}
.text : { *(.text .text.[0-9a-zA-Z_]*) }
}
/* bring in arch-specific sections */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment