Commit d09e356a authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'mm-readonly-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull read-only kernel memory updates from Ingo Molnar:
 "This tree adds two (security related) enhancements to the kernel's
  handling of read-only kernel memory:

   - extend read-only kernel memory to a new class of formerly writable
     kernel data: 'post-init read-only memory' via the __ro_after_init
     attribute, and mark the ARM and x86 vDSO as such read-only memory.

     This kind of attribute can be used for data that requires a once
     per bootup initialization sequence, but is otherwise never modified
     after that point.

     This feature was based on the work by PaX Team and Brad Spengler.

     (by Kees Cook, the ARM vDSO bits by David Brown.)

   - make CONFIG_DEBUG_RODATA always enabled on x86 and remove the
     Kconfig option.  This simplifies the kernel and also signals that
     read-only memory is the default model and a first-class citizen.
     (Kees Cook)"

* 'mm-readonly-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  ARM/vdso: Mark the vDSO code read-only after init
  x86/vdso: Mark the vDSO code read-only after init
  lkdtm: Verify that '__ro_after_init' works correctly
  arch: Introduce post-init read-only memory
  x86/mm: Always enable CONFIG_DEBUG_RODATA and remove the Kconfig option
  mm/init: Add 'rodata=off' boot cmdline parameter to disable read-only kernel mappings
  asm-generic: Consolidate mark_rodata_ro()
parents 5ec94246 11bf9b86
......@@ -3491,6 +3491,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
ro [KNL] Mount root device read-only on boot
rodata= [KNL]
on Mark read-only kernel memory as read-only (default).
off Leave read-only kernel memory writable for debugging.
root= [KNL] Root filesystem
See name_to_dev_t comment in init/do_mounts.c.
......
......@@ -491,7 +491,6 @@ static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
#endif
#ifdef CONFIG_DEBUG_RODATA
void mark_rodata_ro(void);
void set_kernel_text_rw(void);
void set_kernel_text_ro(void);
#else
......
......@@ -23,9 +23,8 @@
#include <linux/const.h>
#include <asm/page.h>
__PAGE_ALIGNED_DATA
.globl vdso_start, vdso_end
.section .data..ro_after_init
.balign PAGE_SIZE
vdso_start:
.incbin "arch/arm/vdso/vdso.so"
......
......@@ -156,8 +156,4 @@ int set_memory_rw(unsigned long addr, int numpages);
int set_memory_x(unsigned long addr, int numpages);
int set_memory_nx(unsigned long addr, int numpages);
#ifdef CONFIG_DEBUG_RODATA
void mark_rodata_ro(void);
#endif
#endif
......@@ -22,6 +22,9 @@
#define __read_mostly __attribute__((__section__(".data..read_mostly")))
/* Read-only memory is marked before mark_rodata_ro() is called. */
#define __ro_after_init __read_mostly
void parisc_cache_init(void); /* initializes cache-flushing */
void disable_sr_hashing_asm(int); /* low level support for above */
void disable_sr_hashing(void); /* turns off space register hashing */
......
......@@ -121,10 +121,6 @@ flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vma
}
}
#ifdef CONFIG_DEBUG_RODATA
void mark_rodata_ro(void);
#endif
#include <asm/kmap_types.h>
#define ARCH_HAS_KMAP
......
......@@ -303,6 +303,9 @@ config ARCH_SUPPORTS_UPROBES
config FIX_EARLYCON_MEM
def_bool y
config DEBUG_RODATA
def_bool y
config PGTABLE_LEVELS
int
default 4 if X86_64
......
......@@ -74,28 +74,16 @@ config EFI_PGT_DUMP
issues with the mapping of the EFI runtime regions into that
table.
config DEBUG_RODATA
bool "Write protect kernel read-only data structures"
default y
depends on DEBUG_KERNEL
---help---
Mark the kernel read-only data as write-protected in the pagetables,
in order to catch accidental (and incorrect) writes to such const
data. This is recommended so that we can catch kernel bugs sooner.
If in doubt, say "Y".
config DEBUG_RODATA_TEST
bool "Testcase for the DEBUG_RODATA feature"
depends on DEBUG_RODATA
bool "Testcase for the marking rodata read-only"
default y
---help---
This option enables a testcase for the DEBUG_RODATA
feature as well as for the change_page_attr() infrastructure.
This option enables a testcase for the setting rodata read-only
as well as for the change_page_attr() infrastructure.
If in doubt, say "N"
config DEBUG_WX
bool "Warn on W+X mappings at boot"
depends on DEBUG_RODATA
select X86_PTDUMP_CORE
---help---
Generate a warning if any W+X mappings are found at boot.
......
......@@ -140,7 +140,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
fprintf(outfile, "#include <asm/vdso.h>\n");
fprintf(outfile, "\n");
fprintf(outfile,
"static unsigned char raw_data[%lu] __page_aligned_data = {",
"static unsigned char raw_data[%lu] __ro_after_init __aligned(PAGE_SIZE) = {",
mapping_size);
for (j = 0; j < stripped_len; j++) {
if (j % 10 == 0)
......
......@@ -91,16 +91,10 @@ void clflush_cache_range(void *addr, unsigned int size);
#define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
#ifdef CONFIG_DEBUG_RODATA
void mark_rodata_ro(void);
extern const int rodata_test_data;
extern int kernel_set_to_readonly;
void set_kernel_text_rw(void);
void set_kernel_text_ro(void);
#else
static inline void set_kernel_text_rw(void) { }
static inline void set_kernel_text_ro(void) { }
#endif
#ifdef CONFIG_DEBUG_RODATA_TEST
int rodata_test(void);
......
......@@ -17,15 +17,8 @@ static inline bool kvm_check_and_clear_guest_paused(void)
}
#endif /* CONFIG_KVM_GUEST */
#ifdef CONFIG_DEBUG_RODATA
#define KVM_HYPERCALL \
ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL)
#else
/* On AMD processors, vmcall will generate a trap that we will
* then rewrite to the appropriate instruction.
*/
#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
#endif
/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
* instruction. The hypervisor may replace it with something else but only the
......
......@@ -7,7 +7,7 @@
extern char __brk_base[], __brk_limit[];
extern struct exception_table_entry __stop___ex_table[];
#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
#if defined(CONFIG_X86_64)
extern char __end_rodata_hpage_align[];
#endif
......
......@@ -81,9 +81,9 @@ within(unsigned long addr, unsigned long start, unsigned long end)
static unsigned long text_ip_addr(unsigned long ip)
{
/*
* On x86_64, kernel text mappings are mapped read-only with
* CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead
* of the kernel text mapping to modify the kernel text.
* On x86_64, kernel text mappings are mapped read-only, so we use
* the kernel identity mapping instead of the kernel text mapping
* to modify the kernel text.
*
* For 32bit kernels, these mappings are same and we can use
* kernel identity mapping to modify code.
......
......@@ -750,9 +750,7 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
{
int err;
#ifdef CONFIG_DEBUG_RODATA
char opc[BREAK_INSTR_SIZE];
#endif /* CONFIG_DEBUG_RODATA */
bpt->type = BP_BREAKPOINT;
err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
......@@ -761,7 +759,6 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
return err;
err = probe_kernel_write((char *)bpt->bpt_addr,
arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
#ifdef CONFIG_DEBUG_RODATA
if (!err)
return err;
/*
......@@ -778,13 +775,12 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE))
return -EINVAL;
bpt->type = BP_POKE_BREAKPOINT;
#endif /* CONFIG_DEBUG_RODATA */
return err;
}
int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
{
#ifdef CONFIG_DEBUG_RODATA
int err;
char opc[BREAK_INSTR_SIZE];
......@@ -801,8 +797,8 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE))
goto knl_write;
return err;
knl_write:
#endif /* CONFIG_DEBUG_RODATA */
return probe_kernel_write((char *)bpt->bpt_addr,
(char *)bpt->saved_instr, BREAK_INSTR_SIZE);
}
......
......@@ -142,7 +142,6 @@ static int test_NX(void)
* by the error message
*/
#ifdef CONFIG_DEBUG_RODATA
/* Test 3: Check if the .rodata section is executable */
if (rodata_test_data != 0xC3) {
printk(KERN_ERR "test_nx: .rodata marker has invalid value\n");
......@@ -151,7 +150,6 @@ static int test_NX(void)
printk(KERN_ERR "test_nx: .rodata section is executable\n");
ret = -ENODEV;
}
#endif
#if 0
/* Test 4: Check if the .data section of a module is executable */
......
......@@ -76,5 +76,5 @@ int rodata_test(void)
}
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Testcase for the DEBUG_RODATA infrastructure");
MODULE_DESCRIPTION("Testcase for marking rodata as read-only");
MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
......@@ -41,29 +41,28 @@ ENTRY(phys_startup_64)
jiffies_64 = jiffies;
#endif
#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
#if defined(CONFIG_X86_64)
/*
* On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA
* we retain large page mappings for boundaries spanning kernel text, rodata
* and data sections.
* On 64-bit, align RODATA to 2MB so we retain large page mappings for
* boundaries spanning kernel text, rodata and data sections.
*
* However, kernel identity mappings will have different RWX permissions
* to the pages mapping to text and to the pages padding (which are freed) the
* text section. Hence kernel identity mappings will be broken to smaller
* pages. For 64-bit, kernel text and kernel identity mappings are different,
* so we can enable protection checks that come with CONFIG_DEBUG_RODATA,
* as well as retain 2MB large page mappings for kernel text.
* so we can enable protection checks as well as retain 2MB large page
* mappings for kernel text.
*/
#define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE);
#define X64_ALIGN_RODATA_BEGIN . = ALIGN(HPAGE_SIZE);
#define X64_ALIGN_DEBUG_RODATA_END \
#define X64_ALIGN_RODATA_END \
. = ALIGN(HPAGE_SIZE); \
__end_rodata_hpage_align = .;
#else
#define X64_ALIGN_DEBUG_RODATA_BEGIN
#define X64_ALIGN_DEBUG_RODATA_END
#define X64_ALIGN_RODATA_BEGIN
#define X64_ALIGN_RODATA_END
#endif
......@@ -112,13 +111,11 @@ SECTIONS
EXCEPTION_TABLE(16) :text = 0x9090
#if defined(CONFIG_DEBUG_RODATA)
/* .text should occupy whole number of pages */
. = ALIGN(PAGE_SIZE);
#endif
X64_ALIGN_DEBUG_RODATA_BEGIN
X64_ALIGN_RODATA_BEGIN
RO_DATA(PAGE_SIZE)
X64_ALIGN_DEBUG_RODATA_END
X64_ALIGN_RODATA_END
/* Data */
.data : AT(ADDR(.data) - LOAD_OFFSET) {
......
......@@ -871,7 +871,6 @@ static noinline int do_test_wp_bit(void)
return flag;
}
#ifdef CONFIG_DEBUG_RODATA
const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data);
......@@ -960,5 +959,3 @@ void mark_rodata_ro(void)
if (__supported_pte_mask & _PAGE_NX)
debug_checkwx();
}
#endif
......@@ -1074,7 +1074,6 @@ void __init mem_init(void)
mem_init_print_info(NULL);
}
#ifdef CONFIG_DEBUG_RODATA
const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data);
......@@ -1166,8 +1165,6 @@ void mark_rodata_ro(void)
debug_checkwx();
}
#endif
int kern_addr_valid(unsigned long addr)
{
unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
......
......@@ -283,7 +283,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
__pa_symbol(__end_rodata) >> PAGE_SHIFT))
pgprot_val(forbidden) |= _PAGE_RW;
#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
#if defined(CONFIG_X86_64)
/*
* Once the kernel maps the text as RO (kernel_set_to_readonly is set),
* kernel text mappings for the large page aligned text, rodata sections
......
......@@ -103,6 +103,7 @@ enum ctype {
CT_EXEC_USERSPACE,
CT_ACCESS_USERSPACE,
CT_WRITE_RO,
CT_WRITE_RO_AFTER_INIT,
CT_WRITE_KERN,
};
......@@ -140,6 +141,7 @@ static char* cp_type[] = {
"EXEC_USERSPACE",
"ACCESS_USERSPACE",
"WRITE_RO",
"WRITE_RO_AFTER_INIT",
"WRITE_KERN",
};
......@@ -162,6 +164,7 @@ static DEFINE_SPINLOCK(lock_me_up);
static u8 data_area[EXEC_SIZE];
static const unsigned long rodata = 0xAA55AA55;
static unsigned long ro_after_init __ro_after_init = 0x55AA5500;
module_param(recur_count, int, 0644);
MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test");
......@@ -503,11 +506,28 @@ static void lkdtm_do_action(enum ctype which)
break;
}
case CT_WRITE_RO: {
unsigned long *ptr;
/* Explicitly cast away "const" for the test. */
unsigned long *ptr = (unsigned long *)&rodata;
ptr = (unsigned long *)&rodata;
pr_info("attempting bad rodata write at %p\n", ptr);
*ptr ^= 0xabcd1234;
pr_info("attempting bad write at %p\n", ptr);
break;
}
case CT_WRITE_RO_AFTER_INIT: {
unsigned long *ptr = &ro_after_init;
/*
* Verify we were written to during init. Since an Oops
* is considered a "success", a failure is to just skip the
* real test.
*/
if ((*ptr & 0xAA) != 0xAA) {
pr_info("%p was NOT written during init!?\n", ptr);
break;
}
pr_info("attempting bad ro_after_init write at %p\n", ptr);
*ptr ^= 0xabcd1234;
break;
......@@ -817,6 +837,9 @@ static int __init lkdtm_module_init(void)
int n_debugfs_entries = 1; /* Assume only the direct entry */
int i;
/* Make sure we can write to __ro_after_init values during __init */
ro_after_init |= 0xAA;
/* Register debugfs interface */
lkdtm_debugfs_root = debugfs_create_dir("provoke-crash", NULL);
if (!lkdtm_debugfs_root) {
......
......@@ -256,6 +256,7 @@
.rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__start_rodata) = .; \
*(.rodata) *(.rodata.*) \
*(.data..ro_after_init) /* Read only after init */ \
*(__vermagic) /* Kernel version magic */ \
. = ALIGN(8); \
VMLINUX_SYMBOL(__start___tracepoints_ptrs) = .; \
......
......@@ -12,10 +12,24 @@
#define SMP_CACHE_BYTES L1_CACHE_BYTES
#endif
/*
* __read_mostly is used to keep rarely changing variables out of frequently
* updated cachelines. If an architecture doesn't support it, ignore the
* hint.
*/
#ifndef __read_mostly
#define __read_mostly
#endif
/*
* __ro_after_init is used to mark things that are read-only after init (i.e.
* after mark_rodata_ro() has been called). These are effectively read-only,
* but may get written to during init, so can't live in .rodata (via "const").
*/
#ifndef __ro_after_init
#define __ro_after_init __attribute__((__section__(".data..ro_after_init")))
#endif
#ifndef ____cacheline_aligned
#define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
#endif
......
......@@ -142,6 +142,10 @@ void prepare_namespace(void);
void __init load_default_modules(void);
int __init init_rootfs(void);
#ifdef CONFIG_DEBUG_RODATA
void mark_rodata_ro(void);
#endif
extern void (*late_time_init)(void);
extern bool initcall_debug;
......
......@@ -93,9 +93,6 @@ static int kernel_init(void *);
extern void init_IRQ(void);
extern void fork_init(void);
extern void radix_tree_init(void);
#ifndef CONFIG_DEBUG_RODATA
static inline void mark_rodata_ro(void) { }
#endif
/*
* Debug helper: via this flag we know that we are in 'early bootup code'
......@@ -924,6 +921,28 @@ static int try_to_run_init_process(const char *init_filename)
static noinline void __init kernel_init_freeable(void);
#ifdef CONFIG_DEBUG_RODATA
static bool rodata_enabled = true;
static int __init set_debug_rodata(char *str)
{
return strtobool(str, &rodata_enabled);
}
__setup("rodata=", set_debug_rodata);
static void mark_readonly(void)
{
if (rodata_enabled)
mark_rodata_ro();
else
pr_info("Kernel memory protection disabled.\n");
}
#else
static inline void mark_readonly(void)
{
pr_warn("This architecture does not have kernel memory protection.\n");
}
#endif
static int __ref kernel_init(void *unused)
{
int ret;
......@@ -932,7 +951,7 @@ static int __ref kernel_init(void *unused)
/* need to finish all async __init code before freeing the memory */
async_synchronize_full();
free_initmem();
mark_rodata_ro();
mark_readonly();
system_state = SYSTEM_RUNNING;
numa_default_policy();
......
......@@ -153,13 +153,11 @@ static int _kdb_bp_install(struct pt_regs *regs, kdb_bp_t *bp)
} else {
kdb_printf("%s: failed to set breakpoint at 0x%lx\n",
__func__, bp->bp_addr);
#ifdef CONFIG_DEBUG_RODATA
if (!bp->bp_type) {
kdb_printf("Software breakpoints are unavailable.\n"
" Change the kernel CONFIG_DEBUG_RODATA=n\n"
" Boot the kernel with rodata=off\n"
" OR use hw breaks: help bph\n");
}
#endif
return 1;
}
return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment