Commit eee4e230 authored by Palmer Dabbelt's avatar Palmer Dabbelt

Merge branch 'base.set_fs' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs into for-next

This is a dependency for Christoph's removal of set_fs.

* 'base.set_fs' of ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  powerpc: remove address space overrides using set_fs()
  powerpc: use non-set_fs based maccess routines
  x86: remove address space overrides using set_fs()
  x86: make TASK_SIZE_MAX usable from assembly code
  x86: move PAGE_OFFSET, TASK_SIZE & friends to page_{32,64}_types.h
  lkdtm: remove set_fs-based tests
  test_bitmap: remove user bitmap tests
  uaccess: add infrastructure for kernel builds with set_fs()
  fs: don't allow splice read/write without explicit ops
  fs: don't allow kernel reads and writes without iter ops
  sysctl: Convert to iter interfaces
  proc: add a read_iter method to proc proc_ops
  proc: cleanup the compat vs no compat file ops
  proc: remove a level of indentation in proc_get_inode
parents de22d210 5ae4998b
...@@ -24,6 +24,9 @@ config KEXEC_ELF ...@@ -24,6 +24,9 @@ config KEXEC_ELF
config HAVE_IMA_KEXEC config HAVE_IMA_KEXEC
bool bool
config SET_FS
bool
config HOTPLUG_SMT config HOTPLUG_SMT
bool bool
......
...@@ -39,6 +39,7 @@ config ALPHA ...@@ -39,6 +39,7 @@ config ALPHA
select OLD_SIGSUSPEND select OLD_SIGSUSPEND
select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67 select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
select MMU_GATHER_NO_RANGE select MMU_GATHER_NO_RANGE
select SET_FS
help help
The Alpha is a 64-bit general-purpose processor designed and The Alpha is a 64-bit general-purpose processor designed and
marketed by the Digital Equipment Corporation of blessed memory, marketed by the Digital Equipment Corporation of blessed memory,
......
...@@ -48,6 +48,7 @@ config ARC ...@@ -48,6 +48,7 @@ config ARC
select PCI_SYSCALL if PCI select PCI_SYSCALL if PCI
select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING
select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32 select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32
select SET_FS
config ARCH_HAS_CACHE_LINE_SIZE config ARCH_HAS_CACHE_LINE_SIZE
def_bool y def_bool y
......
...@@ -118,6 +118,7 @@ config ARM ...@@ -118,6 +118,7 @@ config ARM
select PCI_SYSCALL if PCI select PCI_SYSCALL if PCI
select PERF_USE_VMALLOC select PERF_USE_VMALLOC
select RTC_LIB select RTC_LIB
select SET_FS
select SYS_SUPPORTS_APM_EMULATION select SYS_SUPPORTS_APM_EMULATION
# Above selects are sorted alphabetically; please add new ones # Above selects are sorted alphabetically; please add new ones
# according to that. Thanks. # according to that. Thanks.
......
...@@ -192,6 +192,7 @@ config ARM64 ...@@ -192,6 +192,7 @@ config ARM64
select PCI_SYSCALL if PCI select PCI_SYSCALL if PCI
select POWER_RESET select POWER_RESET
select POWER_SUPPLY select POWER_SUPPLY
select SET_FS
select SPARSE_IRQ select SPARSE_IRQ
select SWIOTLB select SWIOTLB
select SYSCTL_EXCEPTION_TRACE select SYSCTL_EXCEPTION_TRACE
......
...@@ -22,6 +22,7 @@ config C6X ...@@ -22,6 +22,7 @@ config C6X
select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS
select MODULES_USE_ELF_RELA select MODULES_USE_ELF_RELA
select MMU_GATHER_NO_RANGE if MMU select MMU_GATHER_NO_RANGE if MMU
select SET_FS
config MMU config MMU
def_bool n def_bool n
......
...@@ -78,6 +78,7 @@ config CSKY ...@@ -78,6 +78,7 @@ config CSKY
select PCI_DOMAINS_GENERIC if PCI select PCI_DOMAINS_GENERIC if PCI
select PCI_SYSCALL if PCI select PCI_SYSCALL if PCI
select PCI_MSI if PCI select PCI_MSI if PCI
select SET_FS
config LOCKDEP_SUPPORT config LOCKDEP_SUPPORT
def_bool y def_bool y
......
...@@ -25,6 +25,7 @@ config H8300 ...@@ -25,6 +25,7 @@ config H8300
select HAVE_ARCH_KGDB select HAVE_ARCH_KGDB
select HAVE_ARCH_HASH select HAVE_ARCH_HASH
select CPU_NO_EFFICIENT_FFS select CPU_NO_EFFICIENT_FFS
select SET_FS
select UACCESS_MEMCPY select UACCESS_MEMCPY
config CPU_BIG_ENDIAN config CPU_BIG_ENDIAN
......
...@@ -31,6 +31,7 @@ config HEXAGON ...@@ -31,6 +31,7 @@ config HEXAGON
select GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_CLOCKEVENTS_BROADCAST
select MODULES_USE_ELF_RELA select MODULES_USE_ELF_RELA
select GENERIC_CPU_DEVICES select GENERIC_CPU_DEVICES
select SET_FS
help help
Qualcomm Hexagon is a processor architecture designed for high Qualcomm Hexagon is a processor architecture designed for high
performance and low power across a wide variety of applications. performance and low power across a wide variety of applications.
......
...@@ -56,6 +56,7 @@ config IA64 ...@@ -56,6 +56,7 @@ config IA64
select NEED_DMA_MAP_STATE select NEED_DMA_MAP_STATE
select NEED_SG_DMA_LENGTH select NEED_SG_DMA_LENGTH
select NUMA if !FLATMEM select NUMA if !FLATMEM
select SET_FS
default y default y
help help
The Itanium Processor Family is Intel's 64-bit successor to The Itanium Processor Family is Intel's 64-bit successor to
......
...@@ -32,6 +32,7 @@ config M68K ...@@ -32,6 +32,7 @@ config M68K
select OLD_SIGSUSPEND3 select OLD_SIGSUSPEND3
select OLD_SIGACTION select OLD_SIGACTION
select MMU_GATHER_NO_RANGE if MMU select MMU_GATHER_NO_RANGE if MMU
select SET_FS
config CPU_BIG_ENDIAN config CPU_BIG_ENDIAN
def_bool y def_bool y
......
...@@ -46,6 +46,7 @@ config MICROBLAZE ...@@ -46,6 +46,7 @@ config MICROBLAZE
select CPU_NO_EFFICIENT_FFS select CPU_NO_EFFICIENT_FFS
select MMU_GATHER_NO_RANGE if MMU select MMU_GATHER_NO_RANGE if MMU
select SPARSE_IRQ select SPARSE_IRQ
select SET_FS
# Endianness selection # Endianness selection
choice choice
......
...@@ -87,6 +87,7 @@ config MIPS ...@@ -87,6 +87,7 @@ config MIPS
select MODULES_USE_ELF_RELA if MODULES && 64BIT select MODULES_USE_ELF_RELA if MODULES && 64BIT
select PERF_USE_VMALLOC select PERF_USE_VMALLOC
select RTC_LIB select RTC_LIB
select SET_FS
select SYSCTL_EXCEPTION_TRACE select SYSCTL_EXCEPTION_TRACE
select VIRT_TO_BUS select VIRT_TO_BUS
......
...@@ -48,6 +48,7 @@ config NDS32 ...@@ -48,6 +48,7 @@ config NDS32
select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE
select SET_FS
help help
Andes(nds32) Linux support. Andes(nds32) Linux support.
......
...@@ -27,6 +27,7 @@ config NIOS2 ...@@ -27,6 +27,7 @@ config NIOS2
select USB_ARCH_HAS_HCD if USB_SUPPORT select USB_ARCH_HAS_HCD if USB_SUPPORT
select CPU_NO_EFFICIENT_FFS select CPU_NO_EFFICIENT_FFS
select MMU_GATHER_NO_RANGE if MMU select MMU_GATHER_NO_RANGE if MMU
select SET_FS
config GENERIC_CSUM config GENERIC_CSUM
def_bool y def_bool y
......
...@@ -39,6 +39,7 @@ config OPENRISC ...@@ -39,6 +39,7 @@ config OPENRISC
select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_FRAME_POINTERS
select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_MULTI_HANDLER
select MMU_GATHER_NO_RANGE if MMU select MMU_GATHER_NO_RANGE if MMU
select SET_FS
config CPU_BIG_ENDIAN config CPU_BIG_ENDIAN
def_bool y def_bool y
......
...@@ -63,6 +63,7 @@ config PARISC ...@@ -63,6 +63,7 @@ config PARISC
select HAVE_FTRACE_MCOUNT_RECORD if HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD if HAVE_DYNAMIC_FTRACE
select HAVE_KPROBES_ON_FTRACE select HAVE_KPROBES_ON_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_DYNAMIC_FTRACE_WITH_REGS
select SET_FS
help help
The PA-RISC microprocessor is designed by Hewlett-Packard and used The PA-RISC microprocessor is designed by Hewlett-Packard and used
......
...@@ -83,10 +83,6 @@ struct task_struct; ...@@ -83,10 +83,6 @@ struct task_struct;
void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp); void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp);
void release_thread(struct task_struct *); void release_thread(struct task_struct *);
typedef struct {
unsigned long seg;
} mm_segment_t;
#define TS_FPR(i) fp_state.fpr[i][TS_FPROFFSET] #define TS_FPR(i) fp_state.fpr[i][TS_FPROFFSET]
#define TS_CKFPR(i) ckfp_state.fpr[i][TS_FPROFFSET] #define TS_CKFPR(i) ckfp_state.fpr[i][TS_FPROFFSET]
...@@ -148,7 +144,6 @@ struct thread_struct { ...@@ -148,7 +144,6 @@ struct thread_struct {
unsigned long ksp_vsid; unsigned long ksp_vsid;
#endif #endif
struct pt_regs *regs; /* Pointer to saved register state */ struct pt_regs *regs; /* Pointer to saved register state */
mm_segment_t addr_limit; /* for get_fs() validation */
#ifdef CONFIG_BOOKE #ifdef CONFIG_BOOKE
/* BookE base exception scratch space; align on cacheline */ /* BookE base exception scratch space; align on cacheline */
unsigned long normsave[8] ____cacheline_aligned; unsigned long normsave[8] ____cacheline_aligned;
...@@ -295,7 +290,6 @@ struct thread_struct { ...@@ -295,7 +290,6 @@ struct thread_struct {
#define INIT_THREAD { \ #define INIT_THREAD { \
.ksp = INIT_SP, \ .ksp = INIT_SP, \
.ksp_limit = INIT_SP_LIMIT, \ .ksp_limit = INIT_SP_LIMIT, \
.addr_limit = KERNEL_DS, \
.pgdir = swapper_pg_dir, \ .pgdir = swapper_pg_dir, \
.fpexc_mode = MSR_FE0 | MSR_FE1, \ .fpexc_mode = MSR_FE0 | MSR_FE1, \
SPEFSCR_INIT \ SPEFSCR_INIT \
...@@ -303,7 +297,6 @@ struct thread_struct { ...@@ -303,7 +297,6 @@ struct thread_struct {
#else #else
#define INIT_THREAD { \ #define INIT_THREAD { \
.ksp = INIT_SP, \ .ksp = INIT_SP, \
.addr_limit = KERNEL_DS, \
.fpexc_mode = 0, \ .fpexc_mode = 0, \
} }
#endif #endif
......
...@@ -90,7 +90,6 @@ void arch_setup_new_exec(void); ...@@ -90,7 +90,6 @@ void arch_setup_new_exec(void);
#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_SYSCALL_TRACE 0 /* syscall trace active */
#define TIF_SIGPENDING 1 /* signal pending */ #define TIF_SIGPENDING 1 /* signal pending */
#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */
#define TIF_FSCHECK 3 /* Check FS is USER_DS on return */
#define TIF_SYSCALL_EMU 4 /* syscall emulation active */ #define TIF_SYSCALL_EMU 4 /* syscall emulation active */
#define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */
#define TIF_PATCH_PENDING 6 /* pending live patching update */ #define TIF_PATCH_PENDING 6 /* pending live patching update */
...@@ -130,7 +129,6 @@ void arch_setup_new_exec(void); ...@@ -130,7 +129,6 @@ void arch_setup_new_exec(void);
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
#define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE) #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
#define _TIF_NOHZ (1<<TIF_NOHZ) #define _TIF_NOHZ (1<<TIF_NOHZ)
#define _TIF_FSCHECK (1<<TIF_FSCHECK)
#define _TIF_SYSCALL_EMU (1<<TIF_SYSCALL_EMU) #define _TIF_SYSCALL_EMU (1<<TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
...@@ -138,8 +136,7 @@ void arch_setup_new_exec(void); ...@@ -138,8 +136,7 @@ void arch_setup_new_exec(void);
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE | \ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
_TIF_RESTORE_TM | _TIF_PATCH_PENDING | \ _TIF_RESTORE_TM | _TIF_PATCH_PENDING)
_TIF_FSCHECK)
#define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR) #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
/* Bits in local_flags */ /* Bits in local_flags */
......
...@@ -8,62 +8,21 @@ ...@@ -8,62 +8,21 @@
#include <asm/extable.h> #include <asm/extable.h>
#include <asm/kup.h> #include <asm/kup.h>
/*
* The fs value determines whether argument validity checking should be
* performed or not. If get_fs() == USER_DS, checking is performed, with
* get_fs() == KERNEL_DS, checking is bypassed.
*
* For historical reasons, these macros are grossly misnamed.
*
* The fs/ds values are now the highest legal address in the "segment".
* This simplifies the checking in the routines below.
*/
#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
#define KERNEL_DS MAKE_MM_SEG(~0UL)
#ifdef __powerpc64__ #ifdef __powerpc64__
/* We use TASK_SIZE_USER64 as TASK_SIZE is not constant */ /* We use TASK_SIZE_USER64 as TASK_SIZE is not constant */
#define USER_DS MAKE_MM_SEG(TASK_SIZE_USER64 - 1) #define TASK_SIZE_MAX TASK_SIZE_USER64
#else #else
#define USER_DS MAKE_MM_SEG(TASK_SIZE - 1) #define TASK_SIZE_MAX TASK_SIZE
#endif #endif
#define get_fs() (current->thread.addr_limit) static inline bool __access_ok(unsigned long addr, unsigned long size)
static inline void set_fs(mm_segment_t fs)
{
current->thread.addr_limit = fs;
/* On user-mode return check addr_limit (fs) is correct */
set_thread_flag(TIF_FSCHECK);
}
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
#define user_addr_max() (get_fs().seg)
#ifdef __powerpc64__
/*
* This check is sufficient because there is a large enough
* gap between user addresses and the kernel addresses
*/
#define __access_ok(addr, size, segment) \
(((addr) <= (segment).seg) && ((size) <= (segment).seg))
#else
static inline int __access_ok(unsigned long addr, unsigned long size,
mm_segment_t seg)
{ {
if (addr > seg.seg) return addr < TASK_SIZE_MAX && size <= TASK_SIZE_MAX - addr;
return 0;
return (size == 0 || size - 1 <= seg.seg - addr);
} }
#endif
#define access_ok(addr, size) \ #define access_ok(addr, size) \
(__chk_user_ptr(addr), \ (__chk_user_ptr(addr), \
__access_ok((__force unsigned long)(addr), (size), get_fs())) __access_ok((unsigned long)(addr), (size)))
/* /*
* These are the main single-value transfer routines. They automatically * These are the main single-value transfer routines. They automatically
...@@ -623,4 +582,20 @@ do { \ ...@@ -623,4 +582,20 @@ do { \
__put_user_goto(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), e);\ __put_user_goto(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), e);\
} while (0) } while (0)
#define HAVE_GET_KERNEL_NOFAULT
#define __get_kernel_nofault(dst, src, type, err_label) \
do { \
int __kr_err; \
\
__get_user_size_allowed(*((type *)(dst)), (__force type __user *)(src),\
sizeof(type), __kr_err); \
if (unlikely(__kr_err)) \
goto err_label; \
} while (0)
#define __put_kernel_nofault(dst, src, type, err_label) \
__put_user_size_goto(*((type *)(src)), \
(__force type __user *)(dst), sizeof(type), err_label)
#endif /* _ARCH_POWERPC_UACCESS_H */ #endif /* _ARCH_POWERPC_UACCESS_H */
...@@ -312,9 +312,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) ...@@ -312,9 +312,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
{ {
user_exit(); user_exit();
/* Check valid addr_limit, TIF check is done there */
addr_limit_user_check();
if (thread_info_flags & _TIF_UPROBE) if (thread_info_flags & _TIF_UPROBE)
uprobe_notify_resume(regs); uprobe_notify_resume(regs);
......
...@@ -108,11 +108,11 @@ static nokprobe_inline long address_ok(struct pt_regs *regs, ...@@ -108,11 +108,11 @@ static nokprobe_inline long address_ok(struct pt_regs *regs,
{ {
if (!user_mode(regs)) if (!user_mode(regs))
return 1; return 1;
if (__access_ok(ea, nb, USER_DS)) if (__access_ok(ea, nb))
return 1; return 1;
if (__access_ok(ea, 1, USER_DS)) if (__access_ok(ea, 1))
/* Access overlaps the end of the user region */ /* Access overlaps the end of the user region */
regs->dar = USER_DS.seg; regs->dar = TASK_SIZE_MAX - 1;
else else
regs->dar = ea; regs->dar = ea;
return 0; return 0;
......
...@@ -87,6 +87,7 @@ config RISCV ...@@ -87,6 +87,7 @@ config RISCV
select SPARSE_IRQ select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK select THREAD_INFO_IN_TASK
select SET_FS
config ARCH_MMAP_RND_BITS_MIN config ARCH_MMAP_RND_BITS_MIN
default 18 if 64BIT default 18 if 64BIT
......
...@@ -185,6 +185,7 @@ config S390 ...@@ -185,6 +185,7 @@ config S390
select OLD_SIGSUSPEND3 select OLD_SIGSUSPEND3
select PCI_DOMAINS if PCI select PCI_DOMAINS if PCI
select PCI_MSI if PCI select PCI_MSI if PCI
select SET_FS
select SPARSE_IRQ select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK select THREAD_INFO_IN_TASK
......
...@@ -71,6 +71,7 @@ config SUPERH ...@@ -71,6 +71,7 @@ config SUPERH
select PERF_EVENTS select PERF_EVENTS
select PERF_USE_VMALLOC select PERF_USE_VMALLOC
select RTC_LIB select RTC_LIB
select SET_FS
select SPARSE_IRQ select SPARSE_IRQ
help help
The SuperH is a RISC processor targeted for use in embedded systems The SuperH is a RISC processor targeted for use in embedded systems
......
...@@ -49,6 +49,7 @@ config SPARC ...@@ -49,6 +49,7 @@ config SPARC
select LOCKDEP_SMALL if LOCKDEP select LOCKDEP_SMALL if LOCKDEP
select NEED_DMA_MAP_STATE select NEED_DMA_MAP_STATE
select NEED_SG_DMA_LENGTH select NEED_SG_DMA_LENGTH
select SET_FS
config SPARC32 config SPARC32
def_bool !64BIT def_bool !64BIT
......
...@@ -19,6 +19,7 @@ config UML ...@@ -19,6 +19,7 @@ config UML
select GENERIC_CPU_DEVICES select GENERIC_CPU_DEVICES
select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS
select HAVE_GCC_PLUGINS select HAVE_GCC_PLUGINS
select SET_FS
select TTY # Needed for line.c select TTY # Needed for line.c
config MMU config MMU
......
...@@ -239,7 +239,6 @@ static int load_aout_binary(struct linux_binprm *bprm) ...@@ -239,7 +239,6 @@ static int load_aout_binary(struct linux_binprm *bprm)
(regs)->ss = __USER32_DS; (regs)->ss = __USER32_DS;
regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r8 = regs->r9 = regs->r10 = regs->r11 =
regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
set_fs(USER_DS);
return 0; return 0;
} }
......
...@@ -41,6 +41,17 @@ ...@@ -41,6 +41,17 @@
#define __VIRTUAL_MASK_SHIFT 32 #define __VIRTUAL_MASK_SHIFT 32
#endif /* CONFIG_X86_PAE */ #endif /* CONFIG_X86_PAE */
/*
* User space process size: 3GB (default).
*/
#define IA32_PAGE_OFFSET __PAGE_OFFSET
#define TASK_SIZE __PAGE_OFFSET
#define TASK_SIZE_LOW TASK_SIZE
#define TASK_SIZE_MAX TASK_SIZE
#define DEFAULT_MAP_WINDOW TASK_SIZE
#define STACK_TOP TASK_SIZE
#define STACK_TOP_MAX STACK_TOP
/* /*
* Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S) * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S)
*/ */
......
...@@ -58,6 +58,44 @@ ...@@ -58,6 +58,44 @@
#define __VIRTUAL_MASK_SHIFT 47 #define __VIRTUAL_MASK_SHIFT 47
#endif #endif
/*
* User space process size. This is the first address outside the user range.
* There are a few constraints that determine this:
*
* On Intel CPUs, if a SYSCALL instruction is at the highest canonical
* address, then that syscall will enter the kernel with a
* non-canonical return address, and SYSRET will explode dangerously.
* We avoid this particular problem by preventing anything executable
* from being mapped at the maximum canonical address.
*
* On AMD CPUs in the Ryzen family, there's a nasty bug in which the
* CPUs malfunction if they execute code from the highest canonical page.
* They'll speculate right off the end of the canonical space, and
* bad things happen. This is worked around in the same way as the
* Intel problem.
*
* With page table isolation enabled, we map the LDT in ... [stay tuned]
*/
#define TASK_SIZE_MAX ((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
/* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
*/
#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
0xc0000000 : 0xFFFFe000)
#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \
IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \
IA32_PAGE_OFFSET : TASK_SIZE_MAX)
#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
IA32_PAGE_OFFSET : TASK_SIZE_MAX)
#define STACK_TOP TASK_SIZE_LOW
#define STACK_TOP_MAX TASK_SIZE_MAX
/* /*
* Maximum kernel image size is limited to 1 GiB, due to the fixmap living * Maximum kernel image size is limited to 1 GiB, due to the fixmap living
* in the next 1 GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). * in the next 1 GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S).
......
...@@ -482,10 +482,6 @@ extern unsigned int fpu_user_xstate_size; ...@@ -482,10 +482,6 @@ extern unsigned int fpu_user_xstate_size;
struct perf_event; struct perf_event;
typedef struct {
unsigned long seg;
} mm_segment_t;
struct thread_struct { struct thread_struct {
/* Cached TLS descriptors: */ /* Cached TLS descriptors: */
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
...@@ -538,8 +534,6 @@ struct thread_struct { ...@@ -538,8 +534,6 @@ struct thread_struct {
*/ */
unsigned long iopl_emul; unsigned long iopl_emul;
mm_segment_t addr_limit;
unsigned int sig_on_uaccess_err:1; unsigned int sig_on_uaccess_err:1;
/* Floating point and extended processor state */ /* Floating point and extended processor state */
...@@ -782,67 +776,15 @@ static inline void spin_lock_prefetch(const void *x) ...@@ -782,67 +776,15 @@ static inline void spin_lock_prefetch(const void *x)
}) })
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
/*
* User space process size: 3GB (default).
*/
#define IA32_PAGE_OFFSET PAGE_OFFSET
#define TASK_SIZE PAGE_OFFSET
#define TASK_SIZE_LOW TASK_SIZE
#define TASK_SIZE_MAX TASK_SIZE
#define DEFAULT_MAP_WINDOW TASK_SIZE
#define STACK_TOP TASK_SIZE
#define STACK_TOP_MAX STACK_TOP
#define INIT_THREAD { \ #define INIT_THREAD { \
.sp0 = TOP_OF_INIT_STACK, \ .sp0 = TOP_OF_INIT_STACK, \
.sysenter_cs = __KERNEL_CS, \ .sysenter_cs = __KERNEL_CS, \
.addr_limit = KERNEL_DS, \
} }
#define KSTK_ESP(task) (task_pt_regs(task)->sp) #define KSTK_ESP(task) (task_pt_regs(task)->sp)
#else #else
/* #define INIT_THREAD { }
* User space process size. This is the first address outside the user range.
* There are a few constraints that determine this:
*
* On Intel CPUs, if a SYSCALL instruction is at the highest canonical
* address, then that syscall will enter the kernel with a
* non-canonical return address, and SYSRET will explode dangerously.
* We avoid this particular problem by preventing anything executable
* from being mapped at the maximum canonical address.
*
* On AMD CPUs in the Ryzen family, there's a nasty bug in which the
* CPUs malfunction if they execute code from the highest canonical page.
* They'll speculate right off the end of the canonical space, and
* bad things happen. This is worked around in the same way as the
* Intel problem.
*
* With page table isolation enabled, we map the LDT in ... [stay tuned]
*/
#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
/* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
*/
#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? \
0xc0000000 : 0xFFFFe000)
#define TASK_SIZE_LOW (test_thread_flag(TIF_ADDR32) ? \
IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
#define TASK_SIZE (test_thread_flag(TIF_ADDR32) ? \
IA32_PAGE_OFFSET : TASK_SIZE_MAX)
#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
IA32_PAGE_OFFSET : TASK_SIZE_MAX)
#define STACK_TOP TASK_SIZE_LOW
#define STACK_TOP_MAX TASK_SIZE_MAX
#define INIT_THREAD { \
.addr_limit = KERNEL_DS, \
}
extern unsigned long KSTK_ESP(struct task_struct *task); extern unsigned long KSTK_ESP(struct task_struct *task);
......
...@@ -102,7 +102,6 @@ struct thread_info { ...@@ -102,7 +102,6 @@ struct thread_info {
#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */
#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */ #define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
#define TIF_X32 30 /* 32-bit native x86-64 binary */ #define TIF_X32 30 /* 32-bit native x86-64 binary */
#define TIF_FSCHECK 31 /* Check FS is USER_DS on return */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
...@@ -131,7 +130,6 @@ struct thread_info { ...@@ -131,7 +130,6 @@ struct thread_info {
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_ADDR32 (1 << TIF_ADDR32) #define _TIF_ADDR32 (1 << TIF_ADDR32)
#define _TIF_X32 (1 << TIF_X32) #define _TIF_X32 (1 << TIF_X32)
#define _TIF_FSCHECK (1 << TIF_FSCHECK)
/* flags to check in __switch_to() */ /* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW_BASE \ #define _TIF_WORK_CTXSW_BASE \
......
...@@ -12,30 +12,6 @@ ...@@ -12,30 +12,6 @@
#include <asm/smap.h> #include <asm/smap.h>
#include <asm/extable.h> #include <asm/extable.h>
/*
* The fs value determines whether argument validity checking should be
* performed or not. If get_fs() == USER_DS, checking is performed, with
* get_fs() == KERNEL_DS, checking is bypassed.
*
* For historical reasons, these macros are grossly misnamed.
*/
#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
#define KERNEL_DS MAKE_MM_SEG(-1UL)
#define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX)
#define get_fs() (current->thread.addr_limit)
static inline void set_fs(mm_segment_t fs)
{
current->thread.addr_limit = fs;
/* On user-mode return, check fs is correct */
set_thread_flag(TIF_FSCHECK);
}
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
#define user_addr_max() (current->thread.addr_limit.seg)
/* /*
* Test whether a block of memory is a valid user space address. * Test whether a block of memory is a valid user space address.
* Returns 0 if the range is valid, nonzero otherwise. * Returns 0 if the range is valid, nonzero otherwise.
...@@ -93,7 +69,7 @@ static inline bool pagefault_disabled(void); ...@@ -93,7 +69,7 @@ static inline bool pagefault_disabled(void);
#define access_ok(addr, size) \ #define access_ok(addr, size) \
({ \ ({ \
WARN_ON_IN_IRQ(); \ WARN_ON_IN_IRQ(); \
likely(!__range_not_ok(addr, size, user_addr_max())); \ likely(!__range_not_ok(addr, size, TASK_SIZE_MAX)); \
}) })
/* /*
......
...@@ -37,9 +37,6 @@ static void __used common(void) ...@@ -37,9 +37,6 @@ static void __used common(void)
OFFSET(TASK_stack_canary, task_struct, stack_canary); OFFSET(TASK_stack_canary, task_struct, stack_canary);
#endif #endif
BLANK();
OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
BLANK(); BLANK();
OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
......
...@@ -35,10 +35,19 @@ ...@@ -35,10 +35,19 @@
#include <asm/smap.h> #include <asm/smap.h>
#include <asm/export.h> #include <asm/export.h>
#ifdef CONFIG_X86_5LEVEL
#define LOAD_TASK_SIZE_MINUS_N(n) \
ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rdx), \
__stringify(mov $((1 << 56) - 4096 - (n)),%rdx), X86_FEATURE_LA57
#else
#define LOAD_TASK_SIZE_MINUS_N(n) \
mov $(TASK_SIZE_MAX - (n)),%_ASM_DX
#endif
.text .text
SYM_FUNC_START(__get_user_1) SYM_FUNC_START(__get_user_1)
mov PER_CPU_VAR(current_task), %_ASM_DX LOAD_TASK_SIZE_MINUS_N(0)
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX cmp %_ASM_DX,%_ASM_AX
jae bad_get_user jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX and %_ASM_DX, %_ASM_AX
...@@ -51,15 +60,13 @@ SYM_FUNC_END(__get_user_1) ...@@ -51,15 +60,13 @@ SYM_FUNC_END(__get_user_1)
EXPORT_SYMBOL(__get_user_1) EXPORT_SYMBOL(__get_user_1)
SYM_FUNC_START(__get_user_2) SYM_FUNC_START(__get_user_2)
add $1,%_ASM_AX LOAD_TASK_SIZE_MINUS_N(1)
jc bad_get_user cmp %_ASM_DX,%_ASM_AX
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX and %_ASM_DX, %_ASM_AX
ASM_STAC ASM_STAC
2: movzwl -1(%_ASM_AX),%edx 2: movzwl (%_ASM_AX),%edx
xor %eax,%eax xor %eax,%eax
ASM_CLAC ASM_CLAC
ret ret
...@@ -67,15 +74,13 @@ SYM_FUNC_END(__get_user_2) ...@@ -67,15 +74,13 @@ SYM_FUNC_END(__get_user_2)
EXPORT_SYMBOL(__get_user_2) EXPORT_SYMBOL(__get_user_2)
SYM_FUNC_START(__get_user_4) SYM_FUNC_START(__get_user_4)
add $3,%_ASM_AX LOAD_TASK_SIZE_MINUS_N(3)
jc bad_get_user cmp %_ASM_DX,%_ASM_AX
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX and %_ASM_DX, %_ASM_AX
ASM_STAC ASM_STAC
3: movl -3(%_ASM_AX),%edx 3: movl (%_ASM_AX),%edx
xor %eax,%eax xor %eax,%eax
ASM_CLAC ASM_CLAC
ret ret
...@@ -84,29 +89,25 @@ EXPORT_SYMBOL(__get_user_4) ...@@ -84,29 +89,25 @@ EXPORT_SYMBOL(__get_user_4)
SYM_FUNC_START(__get_user_8) SYM_FUNC_START(__get_user_8)
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
add $7,%_ASM_AX LOAD_TASK_SIZE_MINUS_N(7)
jc bad_get_user cmp %_ASM_DX,%_ASM_AX
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX and %_ASM_DX, %_ASM_AX
ASM_STAC ASM_STAC
4: movq -7(%_ASM_AX),%rdx 4: movq (%_ASM_AX),%rdx
xor %eax,%eax xor %eax,%eax
ASM_CLAC ASM_CLAC
ret ret
#else #else
add $7,%_ASM_AX LOAD_TASK_SIZE_MINUS_N(7)
jc bad_get_user_8 cmp %_ASM_DX,%_ASM_AX
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user_8 jae bad_get_user_8
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */ sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX and %_ASM_DX, %_ASM_AX
ASM_STAC ASM_STAC
4: movl -7(%_ASM_AX),%edx 4: movl (%_ASM_AX),%edx
5: movl -3(%_ASM_AX),%ecx 5: movl 4(%_ASM_AX),%ecx
xor %eax,%eax xor %eax,%eax
ASM_CLAC ASM_CLAC
ret ret
......
...@@ -31,12 +31,19 @@ ...@@ -31,12 +31,19 @@
* as they get called from within inline assembly. * as they get called from within inline assembly.
*/ */
#define ENTER mov PER_CPU_VAR(current_task), %_ASM_BX #ifdef CONFIG_X86_5LEVEL
#define LOAD_TASK_SIZE_MINUS_N(n) \
ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rbx), \
__stringify(mov $((1 << 56) - 4096 - (n)),%rbx), X86_FEATURE_LA57
#else
#define LOAD_TASK_SIZE_MINUS_N(n) \
mov $(TASK_SIZE_MAX - (n)),%_ASM_BX
#endif
.text .text
SYM_FUNC_START(__put_user_1) SYM_FUNC_START(__put_user_1)
ENTER LOAD_TASK_SIZE_MINUS_N(0)
cmp TASK_addr_limit(%_ASM_BX),%_ASM_CX cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user jae .Lbad_put_user
ASM_STAC ASM_STAC
1: movb %al,(%_ASM_CX) 1: movb %al,(%_ASM_CX)
...@@ -47,9 +54,7 @@ SYM_FUNC_END(__put_user_1) ...@@ -47,9 +54,7 @@ SYM_FUNC_END(__put_user_1)
EXPORT_SYMBOL(__put_user_1) EXPORT_SYMBOL(__put_user_1)
SYM_FUNC_START(__put_user_2) SYM_FUNC_START(__put_user_2)
ENTER LOAD_TASK_SIZE_MINUS_N(1)
mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
sub $1,%_ASM_BX
cmp %_ASM_BX,%_ASM_CX cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user jae .Lbad_put_user
ASM_STAC ASM_STAC
...@@ -61,9 +66,7 @@ SYM_FUNC_END(__put_user_2) ...@@ -61,9 +66,7 @@ SYM_FUNC_END(__put_user_2)
EXPORT_SYMBOL(__put_user_2) EXPORT_SYMBOL(__put_user_2)
SYM_FUNC_START(__put_user_4) SYM_FUNC_START(__put_user_4)
ENTER LOAD_TASK_SIZE_MINUS_N(3)
mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
sub $3,%_ASM_BX
cmp %_ASM_BX,%_ASM_CX cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user jae .Lbad_put_user
ASM_STAC ASM_STAC
...@@ -75,9 +78,7 @@ SYM_FUNC_END(__put_user_4) ...@@ -75,9 +78,7 @@ SYM_FUNC_END(__put_user_4)
EXPORT_SYMBOL(__put_user_4) EXPORT_SYMBOL(__put_user_4)
SYM_FUNC_START(__put_user_8) SYM_FUNC_START(__put_user_8)
ENTER LOAD_TASK_SIZE_MINUS_N(7)
mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
sub $7,%_ASM_BX
cmp %_ASM_BX,%_ASM_CX cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user jae .Lbad_put_user
ASM_STAC ASM_STAC
......
...@@ -41,6 +41,7 @@ config XTENSA ...@@ -41,6 +41,7 @@ config XTENSA
select IRQ_DOMAIN select IRQ_DOMAIN
select MODULES_USE_ELF_RELA select MODULES_USE_ELF_RELA
select PERF_USE_VMALLOC select PERF_USE_VMALLOC
select SET_FS
select VIRT_TO_BUS select VIRT_TO_BUS
help help
Xtensa processors are 32-bit RISC machines designed by Tensilica Xtensa processors are 32-bit RISC machines designed by Tensilica
......
...@@ -312,16 +312,6 @@ void lkdtm_CORRUPT_LIST_DEL(void) ...@@ -312,16 +312,6 @@ void lkdtm_CORRUPT_LIST_DEL(void)
pr_err("list_del() corruption not detected!\n"); pr_err("list_del() corruption not detected!\n");
} }
/* Test if unbalanced set_fs(KERNEL_DS)/set_fs(USER_DS) check exists. */
void lkdtm_CORRUPT_USER_DS(void)
{
pr_info("setting bad task size limit\n");
set_fs(KERNEL_DS);
/* Make sure we do not keep running with a KERNEL_DS! */
force_sig(SIGKILL);
}
/* Test that VMAP_STACK is actually allocating with a leading guard page */ /* Test that VMAP_STACK is actually allocating with a leading guard page */
void lkdtm_STACK_GUARD_PAGE_LEADING(void) void lkdtm_STACK_GUARD_PAGE_LEADING(void)
{ {
......
...@@ -112,7 +112,6 @@ static const struct crashtype crashtypes[] = { ...@@ -112,7 +112,6 @@ static const struct crashtype crashtypes[] = {
CRASHTYPE(CORRUPT_STACK_STRONG), CRASHTYPE(CORRUPT_STACK_STRONG),
CRASHTYPE(CORRUPT_LIST_ADD), CRASHTYPE(CORRUPT_LIST_ADD),
CRASHTYPE(CORRUPT_LIST_DEL), CRASHTYPE(CORRUPT_LIST_DEL),
CRASHTYPE(CORRUPT_USER_DS),
CRASHTYPE(STACK_GUARD_PAGE_LEADING), CRASHTYPE(STACK_GUARD_PAGE_LEADING),
CRASHTYPE(STACK_GUARD_PAGE_TRAILING), CRASHTYPE(STACK_GUARD_PAGE_TRAILING),
CRASHTYPE(UNSET_SMEP), CRASHTYPE(UNSET_SMEP),
...@@ -172,7 +171,6 @@ static const struct crashtype crashtypes[] = { ...@@ -172,7 +171,6 @@ static const struct crashtype crashtypes[] = {
CRASHTYPE(USERCOPY_STACK_FRAME_FROM), CRASHTYPE(USERCOPY_STACK_FRAME_FROM),
CRASHTYPE(USERCOPY_STACK_BEYOND), CRASHTYPE(USERCOPY_STACK_BEYOND),
CRASHTYPE(USERCOPY_KERNEL), CRASHTYPE(USERCOPY_KERNEL),
CRASHTYPE(USERCOPY_KERNEL_DS),
CRASHTYPE(STACKLEAK_ERASING), CRASHTYPE(STACKLEAK_ERASING),
CRASHTYPE(CFI_FORWARD_PROTO), CRASHTYPE(CFI_FORWARD_PROTO),
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
......
...@@ -27,7 +27,6 @@ void lkdtm_OVERFLOW_UNSIGNED(void); ...@@ -27,7 +27,6 @@ void lkdtm_OVERFLOW_UNSIGNED(void);
void lkdtm_ARRAY_BOUNDS(void); void lkdtm_ARRAY_BOUNDS(void);
void lkdtm_CORRUPT_LIST_ADD(void); void lkdtm_CORRUPT_LIST_ADD(void);
void lkdtm_CORRUPT_LIST_DEL(void); void lkdtm_CORRUPT_LIST_DEL(void);
void lkdtm_CORRUPT_USER_DS(void);
void lkdtm_STACK_GUARD_PAGE_LEADING(void); void lkdtm_STACK_GUARD_PAGE_LEADING(void);
void lkdtm_STACK_GUARD_PAGE_TRAILING(void); void lkdtm_STACK_GUARD_PAGE_TRAILING(void);
void lkdtm_UNSET_SMEP(void); void lkdtm_UNSET_SMEP(void);
...@@ -96,7 +95,6 @@ void lkdtm_USERCOPY_STACK_FRAME_TO(void); ...@@ -96,7 +95,6 @@ void lkdtm_USERCOPY_STACK_FRAME_TO(void);
void lkdtm_USERCOPY_STACK_FRAME_FROM(void); void lkdtm_USERCOPY_STACK_FRAME_FROM(void);
void lkdtm_USERCOPY_STACK_BEYOND(void); void lkdtm_USERCOPY_STACK_BEYOND(void);
void lkdtm_USERCOPY_KERNEL(void); void lkdtm_USERCOPY_KERNEL(void);
void lkdtm_USERCOPY_KERNEL_DS(void);
/* lkdtm_stackleak.c */ /* lkdtm_stackleak.c */
void lkdtm_STACKLEAK_ERASING(void); void lkdtm_STACKLEAK_ERASING(void);
......
...@@ -325,21 +325,6 @@ void lkdtm_USERCOPY_KERNEL(void) ...@@ -325,21 +325,6 @@ void lkdtm_USERCOPY_KERNEL(void)
vm_munmap(user_addr, PAGE_SIZE); vm_munmap(user_addr, PAGE_SIZE);
} }
void lkdtm_USERCOPY_KERNEL_DS(void)
{
char __user *user_ptr =
(char __user *)(0xFUL << (sizeof(unsigned long) * 8 - 4));
mm_segment_t old_fs = get_fs();
char buf[10] = {0};
pr_info("attempting copy_to_user() to noncanonical address: %px\n",
user_ptr);
set_fs(KERNEL_DS);
if (copy_to_user(user_ptr, buf, sizeof(buf)) == 0)
pr_err("copy_to_user() to noncanonical address succeeded!?\n");
set_fs(old_fs);
}
void __init lkdtm_usercopy_init(void) void __init lkdtm_usercopy_init(void)
{ {
/* Prepare cache that lacks SLAB_USERCOPY flag. */ /* Prepare cache that lacks SLAB_USERCOPY flag. */
......
...@@ -297,6 +297,21 @@ static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) ...@@ -297,6 +297,21 @@ static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
return rv; return rv;
} }
static ssize_t proc_reg_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct proc_dir_entry *pde = PDE(file_inode(iocb->ki_filp));
ssize_t ret;
if (pde_is_permanent(pde))
return pde->proc_ops->proc_read_iter(iocb, iter);
if (!use_pde(pde))
return -EIO;
ret = pde->proc_ops->proc_read_iter(iocb, iter);
unuse_pde(pde);
return ret;
}
static ssize_t pde_read(struct proc_dir_entry *pde, struct file *file, char __user *buf, size_t count, loff_t *ppos) static ssize_t pde_read(struct proc_dir_entry *pde, struct file *file, char __user *buf, size_t count, loff_t *ppos)
{ {
typeof_member(struct proc_ops, proc_read) read; typeof_member(struct proc_ops, proc_read) read;
...@@ -572,9 +587,18 @@ static const struct file_operations proc_reg_file_ops = { ...@@ -572,9 +587,18 @@ static const struct file_operations proc_reg_file_ops = {
.write = proc_reg_write, .write = proc_reg_write,
.poll = proc_reg_poll, .poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl, .unlocked_ioctl = proc_reg_unlocked_ioctl,
#ifdef CONFIG_COMPAT .mmap = proc_reg_mmap,
.compat_ioctl = proc_reg_compat_ioctl, .get_unmapped_area = proc_reg_get_unmapped_area,
#endif .open = proc_reg_open,
.release = proc_reg_release,
};
static const struct file_operations proc_iter_file_ops = {
.llseek = proc_reg_llseek,
.read_iter = proc_reg_read_iter,
.write = proc_reg_write,
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
.mmap = proc_reg_mmap, .mmap = proc_reg_mmap,
.get_unmapped_area = proc_reg_get_unmapped_area, .get_unmapped_area = proc_reg_get_unmapped_area,
.open = proc_reg_open, .open = proc_reg_open,
...@@ -582,12 +606,26 @@ static const struct file_operations proc_reg_file_ops = { ...@@ -582,12 +606,26 @@ static const struct file_operations proc_reg_file_ops = {
}; };
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
static const struct file_operations proc_reg_file_ops_no_compat = { static const struct file_operations proc_reg_file_ops_compat = {
.llseek = proc_reg_llseek, .llseek = proc_reg_llseek,
.read = proc_reg_read, .read = proc_reg_read,
.write = proc_reg_write, .write = proc_reg_write,
.poll = proc_reg_poll, .poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl, .unlocked_ioctl = proc_reg_unlocked_ioctl,
.compat_ioctl = proc_reg_compat_ioctl,
.mmap = proc_reg_mmap,
.get_unmapped_area = proc_reg_get_unmapped_area,
.open = proc_reg_open,
.release = proc_reg_release,
};
static const struct file_operations proc_iter_file_ops_compat = {
.llseek = proc_reg_llseek,
.read_iter = proc_reg_read_iter,
.write = proc_reg_write,
.poll = proc_reg_poll,
.unlocked_ioctl = proc_reg_unlocked_ioctl,
.compat_ioctl = proc_reg_compat_ioctl,
.mmap = proc_reg_mmap, .mmap = proc_reg_mmap,
.get_unmapped_area = proc_reg_get_unmapped_area, .get_unmapped_area = proc_reg_get_unmapped_area,
.open = proc_reg_open, .open = proc_reg_open,
...@@ -619,15 +657,19 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) ...@@ -619,15 +657,19 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
{ {
struct inode *inode = new_inode(sb); struct inode *inode = new_inode(sb);
if (inode) { if (!inode) {
pde_put(de);
return NULL;
}
inode->i_ino = de->low_ino; inode->i_ino = de->low_ino;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
PROC_I(inode)->pde = de; PROC_I(inode)->pde = de;
if (is_empty_pde(de)) { if (is_empty_pde(de)) {
make_empty_dir_inode(inode); make_empty_dir_inode(inode);
return inode; return inode;
} }
if (de->mode) { if (de->mode) {
inode->i_mode = de->mode; inode->i_mode = de->mode;
inode->i_uid = de->uid; inode->i_uid = de->uid;
...@@ -640,10 +682,16 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) ...@@ -640,10 +682,16 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
if (S_ISREG(inode->i_mode)) { if (S_ISREG(inode->i_mode)) {
inode->i_op = de->proc_iops; inode->i_op = de->proc_iops;
if (de->proc_ops->proc_read_iter)
inode->i_fop = &proc_iter_file_ops;
else
inode->i_fop = &proc_reg_file_ops; inode->i_fop = &proc_reg_file_ops;
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
if (!de->proc_ops->proc_compat_ioctl) { if (de->proc_ops->proc_compat_ioctl) {
inode->i_fop = &proc_reg_file_ops_no_compat; if (de->proc_ops->proc_read_iter)
inode->i_fop = &proc_iter_file_ops_compat;
else
inode->i_fop = &proc_reg_file_ops_compat;
} }
#endif #endif
} else if (S_ISDIR(inode->i_mode)) { } else if (S_ISDIR(inode->i_mode)) {
...@@ -652,9 +700,8 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) ...@@ -652,9 +700,8 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
} else if (S_ISLNK(inode->i_mode)) { } else if (S_ISLNK(inode->i_mode)) {
inode->i_op = de->proc_iops; inode->i_op = de->proc_iops;
inode->i_fop = NULL; inode->i_fop = NULL;
} else } else {
BUG(); BUG();
} else }
pde_put(de);
return inode; return inode;
} }
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/cred.h> #include <linux/cred.h>
#include <linux/namei.h> #include <linux/namei.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/uio.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/bpf-cgroup.h> #include <linux/bpf-cgroup.h>
#include <linux/mount.h> #include <linux/mount.h>
...@@ -540,13 +541,14 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, ...@@ -540,13 +541,14 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
return err; return err;
} }
static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf, static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter,
size_t count, loff_t *ppos, int write) int write)
{ {
struct inode *inode = file_inode(filp); struct inode *inode = file_inode(iocb->ki_filp);
struct ctl_table_header *head = grab_header(inode); struct ctl_table_header *head = grab_header(inode);
struct ctl_table *table = PROC_I(inode)->sysctl_entry; struct ctl_table *table = PROC_I(inode)->sysctl_entry;
void *kbuf; size_t count = iov_iter_count(iter);
char *kbuf;
ssize_t error; ssize_t error;
if (IS_ERR(head)) if (IS_ERR(head))
...@@ -569,32 +571,30 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf, ...@@ -569,32 +571,30 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf,
error = -ENOMEM; error = -ENOMEM;
if (count >= KMALLOC_MAX_SIZE) if (count >= KMALLOC_MAX_SIZE)
goto out; goto out;
kbuf = kzalloc(count + 1, GFP_KERNEL);
if (write) {
kbuf = memdup_user_nul(ubuf, count);
if (IS_ERR(kbuf)) {
error = PTR_ERR(kbuf);
goto out;
}
} else {
kbuf = kzalloc(count, GFP_KERNEL);
if (!kbuf) if (!kbuf)
goto out; goto out;
if (write) {
error = -EFAULT;
if (!copy_from_iter_full(kbuf, count, iter))
goto out_free_buf;
kbuf[count] = '\0';
} }
error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, &kbuf, &count, error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, &kbuf, &count,
ppos); &iocb->ki_pos);
if (error) if (error)
goto out_free_buf; goto out_free_buf;
/* careful: calling conventions are nasty here */ /* careful: calling conventions are nasty here */
error = table->proc_handler(table, write, kbuf, &count, ppos); error = table->proc_handler(table, write, kbuf, &count, &iocb->ki_pos);
if (error) if (error)
goto out_free_buf; goto out_free_buf;
if (!write) { if (!write) {
error = -EFAULT; error = -EFAULT;
if (copy_to_user(ubuf, kbuf, count)) if (copy_to_iter(kbuf, count, iter) < count)
goto out_free_buf; goto out_free_buf;
} }
...@@ -607,16 +607,14 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf, ...@@ -607,16 +607,14 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf,
return error; return error;
} }
static ssize_t proc_sys_read(struct file *filp, char __user *buf, static ssize_t proc_sys_read(struct kiocb *iocb, struct iov_iter *iter)
size_t count, loff_t *ppos)
{ {
return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0); return proc_sys_call_handler(iocb, iter, 0);
} }
static ssize_t proc_sys_write(struct file *filp, const char __user *buf, static ssize_t proc_sys_write(struct kiocb *iocb, struct iov_iter *iter)
size_t count, loff_t *ppos)
{ {
return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1); return proc_sys_call_handler(iocb, iter, 1);
} }
static int proc_sys_open(struct inode *inode, struct file *filp) static int proc_sys_open(struct inode *inode, struct file *filp)
...@@ -853,8 +851,10 @@ static int proc_sys_getattr(const struct path *path, struct kstat *stat, ...@@ -853,8 +851,10 @@ static int proc_sys_getattr(const struct path *path, struct kstat *stat,
static const struct file_operations proc_sys_file_operations = { static const struct file_operations proc_sys_file_operations = {
.open = proc_sys_open, .open = proc_sys_open,
.poll = proc_sys_poll, .poll = proc_sys_poll,
.read = proc_sys_read, .read_iter = proc_sys_read,
.write = proc_sys_write, .write_iter = proc_sys_write,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.llseek = default_llseek, .llseek = default_llseek,
}; };
......
...@@ -419,27 +419,41 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo ...@@ -419,27 +419,41 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo
return ret; return ret;
} }
static int warn_unsupported(struct file *file, const char *op)
{
pr_warn_ratelimited(
"kernel %s not supported for file %pD4 (pid: %d comm: %.20s)\n",
op, file, current->pid, current->comm);
return -EINVAL;
}
ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
{ {
mm_segment_t old_fs = get_fs(); struct kvec iov = {
.iov_base = buf,
.iov_len = min_t(size_t, count, MAX_RW_COUNT),
};
struct kiocb kiocb;
struct iov_iter iter;
ssize_t ret; ssize_t ret;
if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ))) if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ)))
return -EINVAL; return -EINVAL;
if (!(file->f_mode & FMODE_CAN_READ)) if (!(file->f_mode & FMODE_CAN_READ))
return -EINVAL; return -EINVAL;
/*
* Also fail if ->read_iter and ->read are both wired up as that
* implies very convoluted semantics.
*/
if (unlikely(!file->f_op->read_iter || file->f_op->read))
return warn_unsupported(file, "read");
if (count > MAX_RW_COUNT) init_sync_kiocb(&kiocb, file);
count = MAX_RW_COUNT; kiocb.ki_pos = *pos;
set_fs(KERNEL_DS); iov_iter_kvec(&iter, READ, &iov, 1, iov.iov_len);
if (file->f_op->read) ret = file->f_op->read_iter(&kiocb, &iter);
ret = file->f_op->read(file, (void __user *)buf, count, pos);
else if (file->f_op->read_iter)
ret = new_sync_read(file, (void __user *)buf, count, pos);
else
ret = -EINVAL;
set_fs(old_fs);
if (ret > 0) { if (ret > 0) {
*pos = kiocb.ki_pos;
fsnotify_access(file); fsnotify_access(file);
add_rchar(current, ret); add_rchar(current, ret);
} }
...@@ -510,28 +524,31 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t ...@@ -510,28 +524,31 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t
/* caller is responsible for file_start_write/file_end_write */ /* caller is responsible for file_start_write/file_end_write */
ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
{ {
mm_segment_t old_fs; struct kvec iov = {
const char __user *p; .iov_base = (void *)buf,
.iov_len = min_t(size_t, count, MAX_RW_COUNT),
};
struct kiocb kiocb;
struct iov_iter iter;
ssize_t ret; ssize_t ret;
if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE))) if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE)))
return -EBADF; return -EBADF;
if (!(file->f_mode & FMODE_CAN_WRITE)) if (!(file->f_mode & FMODE_CAN_WRITE))
return -EINVAL; return -EINVAL;
/*
* Also fail if ->write_iter and ->write are both wired up as that
* implies very convoluted semantics.
*/
if (unlikely(!file->f_op->write_iter || file->f_op->write))
return warn_unsupported(file, "write");
old_fs = get_fs(); init_sync_kiocb(&kiocb, file);
set_fs(KERNEL_DS); kiocb.ki_pos = *pos;
p = (__force const char __user *)buf; iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len);
if (count > MAX_RW_COUNT) ret = file->f_op->write_iter(&kiocb, &iter);
count = MAX_RW_COUNT;
if (file->f_op->write)
ret = file->f_op->write(file, p, count, pos);
else if (file->f_op->write_iter)
ret = new_sync_write(file, p, count, pos);
else
ret = -EINVAL;
set_fs(old_fs);
if (ret > 0) { if (ret > 0) {
*pos = kiocb.ki_pos;
fsnotify_modify(file); fsnotify_modify(file);
add_wchar(current, ret); add_wchar(current, ret);
} }
...@@ -1060,7 +1077,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, ...@@ -1060,7 +1077,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
} }
EXPORT_SYMBOL(vfs_iter_write); EXPORT_SYMBOL(vfs_iter_write);
ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
unsigned long vlen, loff_t *pos, rwf_t flags) unsigned long vlen, loff_t *pos, rwf_t flags)
{ {
struct iovec iovstack[UIO_FASTIOV]; struct iovec iovstack[UIO_FASTIOV];
......
...@@ -342,89 +342,6 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = { ...@@ -342,89 +342,6 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = {
}; };
EXPORT_SYMBOL(nosteal_pipe_buf_ops); EXPORT_SYMBOL(nosteal_pipe_buf_ops);
static ssize_t kernel_readv(struct file *file, const struct kvec *vec,
unsigned long vlen, loff_t offset)
{
mm_segment_t old_fs;
loff_t pos = offset;
ssize_t res;
old_fs = get_fs();
set_fs(KERNEL_DS);
/* The cast to a user pointer is valid due to the set_fs() */
res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos, 0);
set_fs(old_fs);
return res;
}
static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
struct kvec *vec, __vec[PIPE_DEF_BUFFERS];
struct iov_iter to;
struct page **pages;
unsigned int nr_pages;
unsigned int mask;
size_t offset, base, copied = 0;
ssize_t res;
int i;
if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
return -EAGAIN;
/*
* Try to keep page boundaries matching to source pagecache ones -
* it probably won't be much help, but...
*/
offset = *ppos & ~PAGE_MASK;
iov_iter_pipe(&to, READ, pipe, len + offset);
res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base);
if (res <= 0)
return -ENOMEM;
nr_pages = DIV_ROUND_UP(res + base, PAGE_SIZE);
vec = __vec;
if (nr_pages > PIPE_DEF_BUFFERS) {
vec = kmalloc_array(nr_pages, sizeof(struct kvec), GFP_KERNEL);
if (unlikely(!vec)) {
res = -ENOMEM;
goto out;
}
}
mask = pipe->ring_size - 1;
pipe->bufs[to.head & mask].offset = offset;
pipe->bufs[to.head & mask].len -= offset;
for (i = 0; i < nr_pages; i++) {
size_t this_len = min_t(size_t, len, PAGE_SIZE - offset);
vec[i].iov_base = page_address(pages[i]) + offset;
vec[i].iov_len = this_len;
len -= this_len;
offset = 0;
}
res = kernel_readv(in, vec, nr_pages, *ppos);
if (res > 0) {
copied = res;
*ppos += res;
}
if (vec != __vec)
kfree(vec);
out:
for (i = 0; i < nr_pages; i++)
put_page(pages[i]);
kvfree(pages);
iov_iter_advance(&to, copied); /* truncates and discards */
return res;
}
/* /*
* Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
* using sendpage(). Return the number of bytes sent. * using sendpage(). Return the number of bytes sent.
...@@ -788,33 +705,6 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, ...@@ -788,33 +705,6 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
EXPORT_SYMBOL(iter_file_splice_write); EXPORT_SYMBOL(iter_file_splice_write);
static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct splice_desc *sd)
{
int ret;
void *data;
loff_t tmp = sd->pos;
data = kmap(buf->page);
ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
kunmap(buf->page);
return ret;
}
static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
struct file *out, loff_t *ppos,
size_t len, unsigned int flags)
{
ssize_t ret;
ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf);
if (ret > 0)
*ppos += ret;
return ret;
}
/** /**
* generic_splice_sendpage - splice data from a pipe to a socket * generic_splice_sendpage - splice data from a pipe to a socket
* @pipe: pipe to splice from * @pipe: pipe to splice from
...@@ -836,15 +726,23 @@ ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, ...@@ -836,15 +726,23 @@ ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
EXPORT_SYMBOL(generic_splice_sendpage); EXPORT_SYMBOL(generic_splice_sendpage);
static int warn_unsupported(struct file *file, const char *op)
{
pr_debug_ratelimited(
"splice %s not supported for file %pD4 (pid: %d comm: %.20s)\n",
op, file, current->pid, current->comm);
return -EINVAL;
}
/* /*
* Attempt to initiate a splice from pipe to file. * Attempt to initiate a splice from pipe to file.
*/ */
static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags) loff_t *ppos, size_t len, unsigned int flags)
{ {
if (out->f_op->splice_write) if (unlikely(!out->f_op->splice_write))
return warn_unsupported(out, "write");
return out->f_op->splice_write(pipe, out, ppos, len, flags); return out->f_op->splice_write(pipe, out, ppos, len, flags);
return default_file_splice_write(pipe, out, ppos, len, flags);
} }
/* /*
...@@ -866,9 +764,9 @@ static long do_splice_to(struct file *in, loff_t *ppos, ...@@ -866,9 +764,9 @@ static long do_splice_to(struct file *in, loff_t *ppos,
if (unlikely(len > MAX_RW_COUNT)) if (unlikely(len > MAX_RW_COUNT))
len = MAX_RW_COUNT; len = MAX_RW_COUNT;
if (in->f_op->splice_read) if (unlikely(!in->f_op->splice_read))
return warn_unsupported(in, "read");
return in->f_op->splice_read(in, ppos, pipe, len, flags); return in->f_op->splice_read(in, ppos, pipe, len, flags);
return default_file_splice_read(in, ppos, pipe, len, flags);
} }
/** /**
......
...@@ -136,7 +136,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, ...@@ -136,7 +136,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
struct ctl_table *table, int write, struct ctl_table *table, int write,
void **buf, size_t *pcount, loff_t *ppos, char **buf, size_t *pcount, loff_t *ppos,
enum bpf_attach_type type); enum bpf_attach_type type);
int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level, int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level,
......
...@@ -1894,8 +1894,6 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, ...@@ -1894,8 +1894,6 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
unsigned long, loff_t *, rwf_t);
extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *, extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
loff_t, size_t, unsigned int); loff_t, size_t, unsigned int);
extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
......
...@@ -30,6 +30,7 @@ struct proc_ops { ...@@ -30,6 +30,7 @@ struct proc_ops {
unsigned int proc_flags; unsigned int proc_flags;
int (*proc_open)(struct inode *, struct file *); int (*proc_open)(struct inode *, struct file *);
ssize_t (*proc_read)(struct file *, char __user *, size_t, loff_t *); ssize_t (*proc_read)(struct file *, char __user *, size_t, loff_t *);
ssize_t (*proc_read_iter)(struct kiocb *, struct iov_iter *);
ssize_t (*proc_write)(struct file *, const char __user *, size_t, loff_t *); ssize_t (*proc_write)(struct file *, const char __user *, size_t, loff_t *);
loff_t (*proc_lseek)(struct file *, loff_t, int); loff_t (*proc_lseek)(struct file *, loff_t, int);
int (*proc_release)(struct inode *, struct file *); int (*proc_release)(struct inode *, struct file *);
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#ifdef CONFIG_SET_FS
/* /*
* Force the uaccess routines to be wired up for actual userspace access, * Force the uaccess routines to be wired up for actual userspace access,
* overriding any possible set_fs(KERNEL_DS) still lingering around. Undone * overriding any possible set_fs(KERNEL_DS) still lingering around. Undone
...@@ -25,6 +26,23 @@ static inline void force_uaccess_end(mm_segment_t oldfs) ...@@ -25,6 +26,23 @@ static inline void force_uaccess_end(mm_segment_t oldfs)
{ {
set_fs(oldfs); set_fs(oldfs);
} }
#else /* CONFIG_SET_FS */
typedef struct {
/* empty dummy */
} mm_segment_t;
#define uaccess_kernel() (false)
#define user_addr_max() (TASK_SIZE_MAX)
static inline mm_segment_t force_uaccess_begin(void)
{
return (mm_segment_t) { };
}
static inline void force_uaccess_end(mm_segment_t oldfs)
{
}
#endif /* CONFIG_SET_FS */
/* /*
* Architectures should provide two primitives (raw_copy_{to,from}_user()) * Architectures should provide two primitives (raw_copy_{to,from}_user())
......
...@@ -1226,7 +1226,7 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = { ...@@ -1226,7 +1226,7 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
*/ */
int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
struct ctl_table *table, int write, struct ctl_table *table, int write,
void **buf, size_t *pcount, loff_t *ppos, char **buf, size_t *pcount, loff_t *ppos,
enum bpf_attach_type type) enum bpf_attach_type type)
{ {
struct bpf_sysctl_kern ctx = { struct bpf_sysctl_kern ctx = {
......
...@@ -354,50 +354,37 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = { ...@@ -354,50 +354,37 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = {
}; };
static void __init __test_bitmap_parselist(int is_user) static void __init test_bitmap_parselist(void)
{ {
int i; int i;
int err; int err;
ktime_t time; ktime_t time;
DECLARE_BITMAP(bmap, 2048); DECLARE_BITMAP(bmap, 2048);
char *mode = is_user ? "_user" : "";
for (i = 0; i < ARRAY_SIZE(parselist_tests); i++) { for (i = 0; i < ARRAY_SIZE(parselist_tests); i++) {
#define ptest parselist_tests[i] #define ptest parselist_tests[i]
if (is_user) {
mm_segment_t orig_fs = get_fs();
size_t len = strlen(ptest.in);
set_fs(KERNEL_DS);
time = ktime_get();
err = bitmap_parselist_user((__force const char __user *)ptest.in, len,
bmap, ptest.nbits);
time = ktime_get() - time;
set_fs(orig_fs);
} else {
time = ktime_get(); time = ktime_get();
err = bitmap_parselist(ptest.in, bmap, ptest.nbits); err = bitmap_parselist(ptest.in, bmap, ptest.nbits);
time = ktime_get() - time; time = ktime_get() - time;
}
if (err != ptest.errno) { if (err != ptest.errno) {
pr_err("parselist%s: %d: input is %s, errno is %d, expected %d\n", pr_err("parselist: %d: input is %s, errno is %d, expected %d\n",
mode, i, ptest.in, err, ptest.errno); i, ptest.in, err, ptest.errno);
continue; continue;
} }
if (!err && ptest.expected if (!err && ptest.expected
&& !__bitmap_equal(bmap, ptest.expected, ptest.nbits)) { && !__bitmap_equal(bmap, ptest.expected, ptest.nbits)) {
pr_err("parselist%s: %d: input is %s, result is 0x%lx, expected 0x%lx\n", pr_err("parselist: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
mode, i, ptest.in, bmap[0], i, ptest.in, bmap[0],
*ptest.expected); *ptest.expected);
continue; continue;
} }
if (ptest.flags & PARSE_TIME) if (ptest.flags & PARSE_TIME)
pr_err("parselist%s: %d: input is '%s' OK, Time: %llu\n", pr_err("parselist: %d: input is '%s' OK, Time: %llu\n",
mode, i, ptest.in, time); i, ptest.in, time);
#undef ptest #undef ptest
} }
...@@ -443,75 +430,41 @@ static const struct test_bitmap_parselist parse_tests[] __initconst = { ...@@ -443,75 +430,41 @@ static const struct test_bitmap_parselist parse_tests[] __initconst = {
#undef step #undef step
}; };
static void __init __test_bitmap_parse(int is_user) static void __init test_bitmap_parse(void)
{ {
int i; int i;
int err; int err;
ktime_t time; ktime_t time;
DECLARE_BITMAP(bmap, 2048); DECLARE_BITMAP(bmap, 2048);
char *mode = is_user ? "_user" : "";
for (i = 0; i < ARRAY_SIZE(parse_tests); i++) { for (i = 0; i < ARRAY_SIZE(parse_tests); i++) {
struct test_bitmap_parselist test = parse_tests[i]; struct test_bitmap_parselist test = parse_tests[i];
size_t len = test.flags & NO_LEN ? UINT_MAX : strlen(test.in);
if (is_user) {
size_t len = strlen(test.in);
mm_segment_t orig_fs = get_fs();
set_fs(KERNEL_DS);
time = ktime_get();
err = bitmap_parse_user((__force const char __user *)test.in, len,
bmap, test.nbits);
time = ktime_get() - time;
set_fs(orig_fs);
} else {
size_t len = test.flags & NO_LEN ?
UINT_MAX : strlen(test.in);
time = ktime_get(); time = ktime_get();
err = bitmap_parse(test.in, len, bmap, test.nbits); err = bitmap_parse(test.in, len, bmap, test.nbits);
time = ktime_get() - time; time = ktime_get() - time;
}
if (err != test.errno) { if (err != test.errno) {
pr_err("parse%s: %d: input is %s, errno is %d, expected %d\n", pr_err("parse: %d: input is %s, errno is %d, expected %d\n",
mode, i, test.in, err, test.errno); i, test.in, err, test.errno);
continue; continue;
} }
if (!err && test.expected if (!err && test.expected
&& !__bitmap_equal(bmap, test.expected, test.nbits)) { && !__bitmap_equal(bmap, test.expected, test.nbits)) {
pr_err("parse%s: %d: input is %s, result is 0x%lx, expected 0x%lx\n", pr_err("parse: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
mode, i, test.in, bmap[0], i, test.in, bmap[0],
*test.expected); *test.expected);
continue; continue;
} }
if (test.flags & PARSE_TIME) if (test.flags & PARSE_TIME)
pr_err("parse%s: %d: input is '%s' OK, Time: %llu\n", pr_err("parse: %d: input is '%s' OK, Time: %llu\n",
mode, i, test.in, time); i, test.in, time);
} }
} }
static void __init test_bitmap_parselist(void)
{
__test_bitmap_parselist(0);
}
static void __init test_bitmap_parselist_user(void)
{
__test_bitmap_parselist(1);
}
static void __init test_bitmap_parse(void)
{
__test_bitmap_parse(0);
}
static void __init test_bitmap_parse_user(void)
{
__test_bitmap_parse(1);
}
#define EXP1_IN_BITS (sizeof(exp1) * 8) #define EXP1_IN_BITS (sizeof(exp1) * 8)
static void __init test_bitmap_arr32(void) static void __init test_bitmap_arr32(void)
...@@ -675,9 +628,7 @@ static void __init selftest(void) ...@@ -675,9 +628,7 @@ static void __init selftest(void)
test_replace(); test_replace();
test_bitmap_arr32(); test_bitmap_arr32();
test_bitmap_parse(); test_bitmap_parse();
test_bitmap_parse_user();
test_bitmap_parselist(); test_bitmap_parselist();
test_bitmap_parselist_user();
test_mem_optimisations(); test_mem_optimisations();
test_for_each_set_clump8(); test_for_each_set_clump8();
test_bitmap_cut(); test_bitmap_cut();
......
...@@ -9,7 +9,6 @@ EXCEPTION ...@@ -9,7 +9,6 @@ EXCEPTION
#CORRUPT_STACK_STRONG Crashes entire system on success #CORRUPT_STACK_STRONG Crashes entire system on success
CORRUPT_LIST_ADD list_add corruption CORRUPT_LIST_ADD list_add corruption
CORRUPT_LIST_DEL list_del corruption CORRUPT_LIST_DEL list_del corruption
CORRUPT_USER_DS Invalid address limit on user-mode return
STACK_GUARD_PAGE_LEADING STACK_GUARD_PAGE_LEADING
STACK_GUARD_PAGE_TRAILING STACK_GUARD_PAGE_TRAILING
UNSET_SMEP CR4 bits went missing UNSET_SMEP CR4 bits went missing
...@@ -67,6 +66,5 @@ USERCOPY_STACK_FRAME_TO ...@@ -67,6 +66,5 @@ USERCOPY_STACK_FRAME_TO
USERCOPY_STACK_FRAME_FROM USERCOPY_STACK_FRAME_FROM
USERCOPY_STACK_BEYOND USERCOPY_STACK_BEYOND
USERCOPY_KERNEL USERCOPY_KERNEL
USERCOPY_KERNEL_DS
STACKLEAK_ERASING OK: the rest of the thread stack is properly erased STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
CFI_FORWARD_PROTO CFI_FORWARD_PROTO
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment