Commit d82c0a37 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'execve-v6.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux

Pull execve updates from Kees Cook:

 - Support non-BSS ELF segments with zero filesz

   Eric Biederman and I refactored ELF segment loading to handle the
   case where a segment has a smaller filesz than memsz. Traditionally
   linkers only did this for .bss and it was always the last segment. As
   a result, the kernel only handled this case when it was the last
   segment. We've had two recent cases where linkers were trying to use
   these kinds of segments for other reasons, and the were in the middle
   of the segment list. There was no good reason for the kernel not to
   support this, and the refactor actually ends up making things more
   readable too.

 - Enable namespaced binfmt_misc

   Christian Brauner has made it possible to use binfmt_misc with mount
   namespaces. This means some traditionally root-only interfaces (for
   adding/removing formats) are now more exposed (but believed to be
   safe).

 - Remove struct tag 'dynamic' from ELF UAPI

   Alejandro Colomar noticed that the ELF UAPI has been polluting the
   struct namespace with an unused and overly generic tag named
   "dynamic" for no discernible reason for many many years. After
   double-checking various distro source repositories, it has been
   removed.

 - Clean up binfmt_elf_fdpic debug output (Greg Ungerer)

* tag 'execve-v6.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
  binfmt_misc: enable sandboxed mounts
  binfmt_misc: cleanup on filesystem umount
  binfmt_elf_fdpic: clean up debug warnings
  mm: Remove unused vm_brk()
  binfmt_elf: Only report padzero() errors when PROT_WRITE
  binfmt_elf: Use elf_load() for library
  binfmt_elf: Use elf_load() for interpreter
  binfmt_elf: elf_bss no longer used by load_elf_binary()
  binfmt_elf: Support segments with 0 filesz and misaligned starts
  elf, uapi: Remove struct tag 'dynamic'
parents 5e372699 21ca59b3
......@@ -110,38 +110,19 @@ static struct linux_binfmt elf_format = {
#define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
static int set_brk(unsigned long start, unsigned long end, int prot)
{
start = ELF_PAGEALIGN(start);
end = ELF_PAGEALIGN(end);
if (end > start) {
/*
* Map the last of the bss segment.
* If the header is requesting these pages to be
* executable, honour that (ppc32 needs this).
*/
int error = vm_brk_flags(start, end - start,
prot & PROT_EXEC ? VM_EXEC : 0);
if (error)
return error;
}
current->mm->start_brk = current->mm->brk = end;
return 0;
}
/* We need to explicitly zero any fractional pages
after the data section (i.e. bss). This would
contain the junk from the file that should not
be in memory
/*
* We need to explicitly zero any trailing portion of the page that follows
* p_filesz when it ends before the page ends (e.g. bss), otherwise this
* memory will contain the junk from the file that should not be present.
*/
static int padzero(unsigned long elf_bss)
static int padzero(unsigned long address)
{
unsigned long nbyte;
nbyte = ELF_PAGEOFFSET(elf_bss);
nbyte = ELF_PAGEOFFSET(address);
if (nbyte) {
nbyte = ELF_MIN_ALIGN - nbyte;
if (clear_user((void __user *) elf_bss, nbyte))
if (clear_user((void __user *)address, nbyte))
return -EFAULT;
}
return 0;
......@@ -367,6 +348,11 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
return 0;
}
/*
* Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset"
* into memory at "addr". (Note that p_filesz is rounded up to the
* next page, so any extra bytes from the file must be wiped.)
*/
static unsigned long elf_map(struct file *filep, unsigned long addr,
const struct elf_phdr *eppnt, int prot, int type,
unsigned long total_size)
......@@ -406,6 +392,60 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
return(map_addr);
}
/*
* Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset"
* into memory at "addr". Memory from "p_filesz" through "p_memsz"
* rounded up to the next page is zeroed.
*/
static unsigned long elf_load(struct file *filep, unsigned long addr,
const struct elf_phdr *eppnt, int prot, int type,
unsigned long total_size)
{
unsigned long zero_start, zero_end;
unsigned long map_addr;
if (eppnt->p_filesz) {
map_addr = elf_map(filep, addr, eppnt, prot, type, total_size);
if (BAD_ADDR(map_addr))
return map_addr;
if (eppnt->p_memsz > eppnt->p_filesz) {
zero_start = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
eppnt->p_filesz;
zero_end = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
eppnt->p_memsz;
/*
* Zero the end of the last mapped page but ignore
* any errors if the segment isn't writable.
*/
if (padzero(zero_start) && (prot & PROT_WRITE))
return -EFAULT;
}
} else {
map_addr = zero_start = ELF_PAGESTART(addr);
zero_end = zero_start + ELF_PAGEOFFSET(eppnt->p_vaddr) +
eppnt->p_memsz;
}
if (eppnt->p_memsz > eppnt->p_filesz) {
/*
* Map the last of the segment.
* If the header is requesting these pages to be
* executable, honour that (ppc32 needs this).
*/
int error;
zero_start = ELF_PAGEALIGN(zero_start);
zero_end = ELF_PAGEALIGN(zero_end);
error = vm_brk_flags(zero_start, zero_end - zero_start,
prot & PROT_EXEC ? VM_EXEC : 0);
if (error)
map_addr = error;
}
return map_addr;
}
static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr)
{
elf_addr_t min_addr = -1;
......@@ -596,8 +636,6 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
struct elf_phdr *eppnt;
unsigned long load_addr = 0;
int load_addr_set = 0;
unsigned long last_bss = 0, elf_bss = 0;
int bss_prot = 0;
unsigned long error = ~0UL;
unsigned long total_size;
int i;
......@@ -634,7 +672,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
else if (no_base && interp_elf_ex->e_type == ET_DYN)
load_addr = -vaddr;
map_addr = elf_map(interpreter, load_addr + vaddr,
map_addr = elf_load(interpreter, load_addr + vaddr,
eppnt, elf_prot, elf_type, total_size);
total_size = 0;
error = map_addr;
......@@ -660,51 +698,9 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
error = -ENOMEM;
goto out;
}
/*
* Find the end of the file mapping for this phdr, and
* keep track of the largest address we see for this.
*/
k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
if (k > elf_bss)
elf_bss = k;
/*
* Do the same thing for the memory mapping - between
* elf_bss and last_bss is the bss section.
*/
k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
if (k > last_bss) {
last_bss = k;
bss_prot = elf_prot;
}
}
}
/*
* Now fill out the bss section: first pad the last page from
* the file up to the page boundary, and zero it from elf_bss
* up to the end of the page.
*/
if (padzero(elf_bss)) {
error = -EFAULT;
goto out;
}
/*
* Next, align both the file and mem bss up to the page size,
* since this is where elf_bss was just zeroed up to, and where
* last_bss will end after the vm_brk_flags() below.
*/
elf_bss = ELF_PAGEALIGN(elf_bss);
last_bss = ELF_PAGEALIGN(last_bss);
/* Finally, if there is still more bss to allocate, do it. */
if (last_bss > elf_bss) {
error = vm_brk_flags(elf_bss, last_bss - elf_bss,
bss_prot & PROT_EXEC ? VM_EXEC : 0);
if (error)
goto out;
}
error = load_addr;
out:
return error;
......@@ -828,8 +824,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
unsigned long error;
struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
struct elf_phdr *elf_property_phdata = NULL;
unsigned long elf_bss, elf_brk;
int bss_prot = 0;
unsigned long elf_brk;
int retval, i;
unsigned long elf_entry;
unsigned long e_entry;
......@@ -1020,7 +1015,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
if (retval < 0)
goto out_free_dentry;
elf_bss = 0;
elf_brk = 0;
start_code = ~0UL;
......@@ -1040,33 +1034,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
if (elf_ppnt->p_type != PT_LOAD)
continue;
if (unlikely (elf_brk > elf_bss)) {
unsigned long nbyte;
/* There was a PT_LOAD segment with p_memsz > p_filesz
before this one. Map anonymous pages, if needed,
and clear the area. */
retval = set_brk(elf_bss + load_bias,
elf_brk + load_bias,
bss_prot);
if (retval)
goto out_free_dentry;
nbyte = ELF_PAGEOFFSET(elf_bss);
if (nbyte) {
nbyte = ELF_MIN_ALIGN - nbyte;
if (nbyte > elf_brk - elf_bss)
nbyte = elf_brk - elf_bss;
if (clear_user((void __user *)elf_bss +
load_bias, nbyte)) {
/*
* This bss-zeroing can fail if the ELF
* file specifies odd protections. So
* we don't check the return value
*/
}
}
}
elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
!!interpreter, false);
......@@ -1162,7 +1129,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
}
}
error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt,
elf_prot, elf_flags, total_size);
if (BAD_ADDR(error)) {
retval = IS_ERR_VALUE(error) ?
......@@ -1210,40 +1177,24 @@ static int load_elf_binary(struct linux_binprm *bprm)
k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
if (k > elf_bss)
elf_bss = k;
if ((elf_ppnt->p_flags & PF_X) && end_code < k)
end_code = k;
if (end_data < k)
end_data = k;
k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
if (k > elf_brk) {
bss_prot = elf_prot;
if (k > elf_brk)
elf_brk = k;
}
}
e_entry = elf_ex->e_entry + load_bias;
phdr_addr += load_bias;
elf_bss += load_bias;
elf_brk += load_bias;
start_code += load_bias;
end_code += load_bias;
start_data += load_bias;
end_data += load_bias;
/* Calling set_brk effectively mmaps the pages that we need
* for the bss and break sections. We must do this before
* mapping in the interpreter, to make sure it doesn't wind
* up getting placed where the bss needs to go.
*/
retval = set_brk(elf_bss, elf_brk, bss_prot);
if (retval)
goto out_free_dentry;
if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
retval = -EFAULT; /* Nobody gets to see this, but.. */
goto out_free_dentry;
}
current->mm->start_brk = current->mm->brk = ELF_PAGEALIGN(elf_brk);
if (interpreter) {
elf_entry = load_elf_interp(interp_elf_ex,
......@@ -1369,7 +1320,6 @@ static int load_elf_library(struct file *file)
{
struct elf_phdr *elf_phdata;
struct elf_phdr *eppnt;
unsigned long elf_bss, bss, len;
int retval, error, i, j;
struct elfhdr elf_ex;
......@@ -1414,30 +1364,15 @@ static int load_elf_library(struct file *file)
eppnt++;
/* Now use mmap to map the library into memory. */
error = vm_mmap(file,
ELF_PAGESTART(eppnt->p_vaddr),
(eppnt->p_filesz +
ELF_PAGEOFFSET(eppnt->p_vaddr)),
error = elf_load(file, ELF_PAGESTART(eppnt->p_vaddr),
eppnt,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_FIXED_NOREPLACE | MAP_PRIVATE,
(eppnt->p_offset -
ELF_PAGEOFFSET(eppnt->p_vaddr)));
0);
if (error != ELF_PAGESTART(eppnt->p_vaddr))
goto out_free_ph;
elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
if (padzero(elf_bss)) {
error = -EFAULT;
goto out_free_ph;
}
len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
if (bss > len) {
error = vm_brk(len, bss - len);
if (error)
goto out_free_ph;
}
error = 0;
out_free_ph:
......
......@@ -899,10 +899,12 @@ static int elf_fdpic_map_file(struct elf_fdpic_params *params,
kdebug("- DYNAMIC[]: %lx", params->dynamic_addr);
seg = loadmap->segs;
for (loop = 0; loop < loadmap->nsegs; loop++, seg++)
kdebug("- LOAD[%d] : %08x-%08x [va=%x ms=%x]",
kdebug("- LOAD[%d] : %08llx-%08llx [va=%llx ms=%llx]",
loop,
seg->addr, seg->addr + seg->p_memsz - 1,
seg->p_vaddr, seg->p_memsz);
(unsigned long long) seg->addr,
(unsigned long long) seg->addr + seg->p_memsz - 1,
(unsigned long long) seg->p_vaddr,
(unsigned long long) seg->p_memsz);
return 0;
......@@ -1081,9 +1083,10 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
maddr = vm_mmap(file, maddr, phdr->p_memsz + disp, prot, flags,
phdr->p_offset - disp);
kdebug("mmap[%d] <file> sz=%lx pr=%x fl=%x of=%lx --> %08lx",
loop, phdr->p_memsz + disp, prot, flags,
phdr->p_offset - disp, maddr);
kdebug("mmap[%d] <file> sz=%llx pr=%x fl=%x of=%llx --> %08lx",
loop, (unsigned long long) phdr->p_memsz + disp,
prot, flags, (unsigned long long) phdr->p_offset - disp,
maddr);
if (IS_ERR_VALUE(maddr))
return (int) maddr;
......@@ -1145,8 +1148,9 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
#else
if (excess > 0) {
kdebug("clear[%d] ad=%lx sz=%lx",
loop, maddr + phdr->p_filesz, excess);
kdebug("clear[%d] ad=%llx sz=%lx", loop,
(unsigned long long) maddr + phdr->p_filesz,
excess);
if (clear_user((void *) maddr + phdr->p_filesz, excess))
return -EFAULT;
}
......
This diff is collapsed.
......@@ -90,6 +90,16 @@ struct linux_binfmt {
#endif
} __randomize_layout;
#if IS_ENABLED(CONFIG_BINFMT_MISC)
struct binfmt_misc {
struct list_head entries;
rwlock_t entries_lock;
bool enabled;
} __randomize_layout;
extern struct binfmt_misc init_binfmt_misc;
#endif
extern void __register_binfmt(struct linux_binfmt *fmt, int insert);
/* Registration of default binfmt handlers */
......
......@@ -3308,8 +3308,7 @@ static inline void mm_populate(unsigned long addr, unsigned long len)
static inline void mm_populate(unsigned long addr, unsigned long len) {}
#endif
/* These take the mm semaphore themselves */
extern int __must_check vm_brk(unsigned long, unsigned long);
/* This takes the mm semaphore itself */
extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long);
extern int vm_munmap(unsigned long, size_t);
extern unsigned long __must_check vm_mmap(struct file *, unsigned long,
......
......@@ -65,6 +65,10 @@ enum rlimit_type {
UCOUNT_RLIMIT_COUNTS,
};
#if IS_ENABLED(CONFIG_BINFMT_MISC)
struct binfmt_misc;
#endif
struct user_namespace {
struct uid_gid_map uid_map;
struct uid_gid_map gid_map;
......@@ -102,6 +106,10 @@ struct user_namespace {
struct ucounts *ucounts;
long ucount_max[UCOUNT_COUNTS];
long rlimit_max[UCOUNT_RLIMIT_COUNTS];
#if IS_ENABLED(CONFIG_BINFMT_MISC)
struct binfmt_misc *binfmt_misc;
#endif
} __randomize_layout;
struct ucounts {
......
......@@ -140,7 +140,7 @@ typedef __s64 Elf64_Sxword;
#define ELF64_ST_BIND(x) ELF_ST_BIND(x)
#define ELF64_ST_TYPE(x) ELF_ST_TYPE(x)
typedef struct dynamic {
typedef struct {
Elf32_Sword d_tag;
union {
Elf32_Sword d_val;
......
......@@ -18,8 +18,18 @@
#include <linux/interrupt.h>
#include <linux/export.h>
#include <linux/user_namespace.h>
#include <linux/binfmts.h>
#include <linux/proc_ns.h>
#if IS_ENABLED(CONFIG_BINFMT_MISC)
struct binfmt_misc init_binfmt_misc = {
.entries = LIST_HEAD_INIT(init_binfmt_misc.entries),
.enabled = true,
.entries_lock = __RW_LOCK_UNLOCKED(init_binfmt_misc.entries_lock),
};
EXPORT_SYMBOL_GPL(init_binfmt_misc);
#endif
/*
* userns count is 1 for root user, 1 for init_uts_ns,
* and 1 for... ?
......@@ -67,6 +77,9 @@ struct user_namespace init_user_ns = {
.keyring_name_list = LIST_HEAD_INIT(init_user_ns.keyring_name_list),
.keyring_sem = __RWSEM_INITIALIZER(init_user_ns.keyring_sem),
#endif
#if IS_ENABLED(CONFIG_BINFMT_MISC)
.binfmt_misc = &init_binfmt_misc,
#endif
};
EXPORT_SYMBOL_GPL(init_user_ns);
......
......@@ -213,6 +213,9 @@ static void free_user_ns(struct work_struct *work)
kfree(ns->projid_map.forward);
kfree(ns->projid_map.reverse);
}
#if IS_ENABLED(CONFIG_BINFMT_MISC)
kfree(ns->binfmt_misc);
#endif
retire_userns_sysctls(ns);
key_free_user_ns(ns);
ns_free_inum(&ns->ns);
......
......@@ -3194,12 +3194,6 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
}
EXPORT_SYMBOL(vm_brk_flags);
int vm_brk(unsigned long addr, unsigned long len)
{
return vm_brk_flags(addr, len, 0);
}
EXPORT_SYMBOL(vm_brk);
/* Release all mmaps. */
void exit_mmap(struct mm_struct *mm)
{
......
......@@ -1531,11 +1531,6 @@ void exit_mmap(struct mm_struct *mm)
mmap_write_unlock(mm);
}
int vm_brk(unsigned long addr, unsigned long len)
{
return -ENOMEM;
}
/*
* expand (or shrink) an existing mapping, potentially moving it at the same
* time (controlled by the MREMAP_MAYMOVE flag and available VM space)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment