Commit a86538a2 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf-prevent-userspace-memory-access'

Puranjay Mohan says:

====================
bpf: prevent userspace memory access

V5: https://lore.kernel.org/bpf/20240324185356.59111-1-puranjay12@gmail.com/
Changes in V6:
- Disable the verifier's instrumentation in x86-64 and update the JIT to
  take care of vsyscall page in addition to userspace addresses.
- Update bpf_testmod to test for vsyscall addresses.

V4: https://lore.kernel.org/bpf/20240321124640.8870-1-puranjay12@gmail.com/
Changes in V5:
- Use TASK_SIZE_MAX + PAGE_SIZE, VSYSCALL_ADDR as userspace boundary in
  x86-64 JIT.
- Added Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>

V3: https://lore.kernel.org/bpf/20240321120842.78983-1-puranjay12@gmail.com/
Changes in V4:
- Disable this feature on architectures that don't define
  CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE.
- By doing the above, we don't need anything explicitly for s390x.

V2: https://lore.kernel.org/bpf/20240321101058.68530-1-puranjay12@gmail.com/
Changes in V3:
- Return 0 from bpf_arch_uaddress_limit() in disabled case because it
  returns u64.
- Modify the check in verifier to no do instrumentation when uaddress_limit
  is 0.

V1: https://lore.kernel.org/bpf/20240320105436.4781-1-puranjay12@gmail.com/
Changes in V2:
- Disable this feature on s390x.

With BPF_PROBE_MEM, BPF allows de-referencing an untrusted pointer. To
thwart invalid memory accesses, the JITs add an exception table entry for
all such accesses. But in case the src_reg + offset is a userspace address,
the BPF program might read that memory if the user has mapped it.

x86-64 JIT already instruments the BPF_PROBE_MEM based loads with checks to
skip loads from userspace addresses, but is doesn't check for vsyscall page
because it falls in the kernel address space but is considered a userspace
page. The second patch in this series fixes the x86-64 JIT to also skip
loads from the vsyscall page. The last patch updates the bpf_testmod so
this address can be checked as part of the selftests.

Other architectures don't have the complexity of the vsyscall address and
just need to skip loads from the userspace. To make this more scalable and
robust, the verifier is updated in the first patch to instrument
BPF_PROBE_MEM to skip loads from the userspace addresses.
====================

Link: https://lore.kernel.org/r/20240424100210.11982-1-puranjay@kernel.orgSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 5bcf0dcb 7cd6750d
...@@ -1807,36 +1807,41 @@ st: if (is_imm8(insn->off)) ...@@ -1807,36 +1807,41 @@ st: if (is_imm8(insn->off))
if (BPF_MODE(insn->code) == BPF_PROBE_MEM || if (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX) { BPF_MODE(insn->code) == BPF_PROBE_MEMSX) {
/* Conservatively check that src_reg + insn->off is a kernel address: /* Conservatively check that src_reg + insn->off is a kernel address:
* src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE * src_reg + insn->off > TASK_SIZE_MAX + PAGE_SIZE
* src_reg is used as scratch for src_reg += insn->off and restored * and
* after emit_ldx if necessary * src_reg + insn->off < VSYSCALL_ADDR
*/ */
u64 limit = TASK_SIZE_MAX + PAGE_SIZE; u64 limit = TASK_SIZE_MAX + PAGE_SIZE - VSYSCALL_ADDR;
u8 *end_of_jmp; u8 *end_of_jmp;
/* At end of these emitted checks, insn->off will have been added /* movabsq r10, VSYSCALL_ADDR */
* to src_reg, so no need to do relative load with insn->off offset emit_mov_imm64(&prog, BPF_REG_AX, (long)VSYSCALL_ADDR >> 32,
*/ (u32)(long)VSYSCALL_ADDR);
insn_off = 0;
/* movabsq r11, limit */ /* mov src_reg, r11 */
EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG)); EMIT_mov(AUX_REG, src_reg);
EMIT((u32)limit, 4);
EMIT(limit >> 32, 4);
if (insn->off) { if (insn->off) {
/* add src_reg, insn->off */ /* add r11, insn->off */
maybe_emit_1mod(&prog, src_reg, true); maybe_emit_1mod(&prog, AUX_REG, true);
EMIT2_off32(0x81, add_1reg(0xC0, src_reg), insn->off); EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off);
} }
/* cmp src_reg, r11 */ /* sub r11, r10 */
maybe_emit_mod(&prog, src_reg, AUX_REG, true); maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true);
EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG)); EMIT2(0x29, add_2reg(0xC0, AUX_REG, BPF_REG_AX));
/* movabsq r10, limit */
emit_mov_imm64(&prog, BPF_REG_AX, (long)limit >> 32,
(u32)(long)limit);
/* if unsigned '>=', goto load */ /* cmp r10, r11 */
EMIT2(X86_JAE, 0); maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true);
EMIT2(0x39, add_2reg(0xC0, AUX_REG, BPF_REG_AX));
/* if unsigned '>', goto load */
EMIT2(X86_JA, 0);
end_of_jmp = prog; end_of_jmp = prog;
/* xor dst_reg, dst_reg */ /* xor dst_reg, dst_reg */
...@@ -1862,18 +1867,6 @@ st: if (is_imm8(insn->off)) ...@@ -1862,18 +1867,6 @@ st: if (is_imm8(insn->off))
/* populate jmp_offset for JMP above */ /* populate jmp_offset for JMP above */
start_of_ldx[-1] = prog - start_of_ldx; start_of_ldx[-1] = prog - start_of_ldx;
if (insn->off && src_reg != dst_reg) {
/* sub src_reg, insn->off
* Restore src_reg after "add src_reg, insn->off" in prev
* if statement. But if src_reg == dst_reg, emit_ldx
* above already clobbered src_reg, so no need to restore.
* If add src_reg, insn->off was unnecessary, no need to
* restore either.
*/
maybe_emit_1mod(&prog, src_reg, true);
EMIT2_off32(0x81, add_1reg(0xE8, src_reg), insn->off);
}
if (!bpf_prog->aux->extable) if (!bpf_prog->aux->extable)
break; break;
...@@ -3473,3 +3466,9 @@ bool bpf_jit_supports_ptr_xchg(void) ...@@ -3473,3 +3466,9 @@ bool bpf_jit_supports_ptr_xchg(void)
{ {
return true; return true;
} }
/* x86-64 JIT emits its own code to filter user addresses so return 0 here */
u64 bpf_arch_uaddress_limit(void)
{
return 0;
}
...@@ -963,6 +963,7 @@ bool bpf_jit_supports_far_kfunc_call(void); ...@@ -963,6 +963,7 @@ bool bpf_jit_supports_far_kfunc_call(void);
bool bpf_jit_supports_exceptions(void); bool bpf_jit_supports_exceptions(void);
bool bpf_jit_supports_ptr_xchg(void); bool bpf_jit_supports_ptr_xchg(void);
bool bpf_jit_supports_arena(void); bool bpf_jit_supports_arena(void);
u64 bpf_arch_uaddress_limit(void);
void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie);
bool bpf_helper_changes_pkt_data(void *func); bool bpf_helper_changes_pkt_data(void *func);
......
...@@ -2942,6 +2942,15 @@ bool __weak bpf_jit_supports_arena(void) ...@@ -2942,6 +2942,15 @@ bool __weak bpf_jit_supports_arena(void)
return false; return false;
} }
u64 __weak bpf_arch_uaddress_limit(void)
{
#if defined(CONFIG_64BIT) && defined(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE)
return TASK_SIZE;
#else
return 0;
#endif
}
/* Return TRUE if the JIT backend satisfies the following two conditions: /* Return TRUE if the JIT backend satisfies the following two conditions:
* 1) JIT backend supports atomic_xchg() on pointer-sized words. * 1) JIT backend supports atomic_xchg() on pointer-sized words.
* 2) Under the specific arch, the implementation of xchg() is the same * 2) Under the specific arch, the implementation of xchg() is the same
......
...@@ -19675,6 +19675,36 @@ static int do_misc_fixups(struct bpf_verifier_env *env) ...@@ -19675,6 +19675,36 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
goto next_insn; goto next_insn;
} }
/* Make it impossible to de-reference a userspace address */
if (BPF_CLASS(insn->code) == BPF_LDX &&
(BPF_MODE(insn->code) == BPF_PROBE_MEM ||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) {
struct bpf_insn *patch = &insn_buf[0];
u64 uaddress_limit = bpf_arch_uaddress_limit();
if (!uaddress_limit)
goto next_insn;
*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
if (insn->off)
*patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off);
*patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32);
*patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2);
*patch++ = *insn;
*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
*patch++ = BPF_MOV64_IMM(insn->dst_reg, 0);
cnt = patch - insn_buf;
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
goto next_insn;
}
/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */ /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
if (BPF_CLASS(insn->code) == BPF_LD && if (BPF_CLASS(insn->code) == BPF_LD &&
(BPF_MODE(insn->code) == BPF_ABS || (BPF_MODE(insn->code) == BPF_ABS ||
......
...@@ -205,6 +205,9 @@ __weak noinline struct file *bpf_testmod_return_ptr(int arg) ...@@ -205,6 +205,9 @@ __weak noinline struct file *bpf_testmod_return_ptr(int arg)
case 5: return (void *)~(1ull << 30); /* trigger extable */ case 5: return (void *)~(1ull << 30); /* trigger extable */
case 6: return &f; /* valid addr */ case 6: return &f; /* valid addr */
case 7: return (void *)((long)&f | 1); /* kernel tricks */ case 7: return (void *)((long)&f | 1); /* kernel tricks */
#ifdef CONFIG_X86_64
case 8: return (void *)VSYSCALL_ADDR; /* vsyscall page address */
#endif
default: return NULL; default: return NULL;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment