Commit 358fdb45 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf_probe_read_user'

Daniel Borkmann says:

====================
This set adds probe_read_{user,kernel}(), probe_read_str_{user,kernel}()
helpers, fixes probe_write_user() helper and selftests. For details please
see individual patches.

Thanks!

v2 -> v3:
  - noticed two more things that are fixed in here:
   - bpf uapi helper description used 'int size' for *_str helpers, now u32
   - we need TASK_SIZE_MAX + guard page on x86-64 in patch 2 otherwise
     we'll trigger the 00c42373 warn as well, so full range covered now
v1 -> v2:
  - standardize unsafe_ptr terminology in uapi header comment (Andrii)
  - probe_read_{user,kernel}[_str] naming scheme (Andrii)
  - use global data in last test case, remove relaxed_maps (Andrii)
  - add strict non-pagefault kernel read funcs to avoid warning in
    kernel probe read helpers (Alexei)
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents e1cb7d2d fa553d9b
...@@ -13,7 +13,7 @@ CFLAGS_REMOVE_mem_encrypt_identity.o = -pg ...@@ -13,7 +13,7 @@ CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
endif endif
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
# Make sure __phys_addr has no stackprotector # Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector) nostackp := $(call cc-option, -fno-stack-protector)
......
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/uaccess.h>
#include <linux/kernel.h>
#ifdef CONFIG_X86_64
static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits)
{
return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
}
static __always_inline bool invalid_probe_range(u64 vaddr)
{
/*
* Range covering the highest possible canonical userspace address
* as well as non-canonical address range. For the canonical range
* we also need to include the userspace guard page.
*/
return vaddr < TASK_SIZE_MAX + PAGE_SIZE ||
canonical_address(vaddr, boot_cpu_data.x86_virt_bits) != vaddr;
}
#else
static __always_inline bool invalid_probe_range(u64 vaddr)
{
return vaddr < TASK_SIZE_MAX;
}
#endif
long probe_kernel_read_strict(void *dst, const void *src, size_t size)
{
if (unlikely(invalid_probe_range((unsigned long)src)))
return -EFAULT;
return __probe_kernel_read(dst, src, size);
}
long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr, long count)
{
if (unlikely(invalid_probe_range((unsigned long)unsafe_addr)))
return -EFAULT;
return __strncpy_from_unsafe(dst, unsafe_addr, count);
}
...@@ -311,6 +311,7 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src, ...@@ -311,6 +311,7 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src,
* happens, handle that and return -EFAULT. * happens, handle that and return -EFAULT.
*/ */
extern long probe_kernel_read(void *dst, const void *src, size_t size); extern long probe_kernel_read(void *dst, const void *src, size_t size);
extern long probe_kernel_read_strict(void *dst, const void *src, size_t size);
extern long __probe_kernel_read(void *dst, const void *src, size_t size); extern long __probe_kernel_read(void *dst, const void *src, size_t size);
/* /*
...@@ -337,7 +338,22 @@ extern long __probe_user_read(void *dst, const void __user *src, size_t size); ...@@ -337,7 +338,22 @@ extern long __probe_user_read(void *dst, const void __user *src, size_t size);
extern long notrace probe_kernel_write(void *dst, const void *src, size_t size); extern long notrace probe_kernel_write(void *dst, const void *src, size_t size);
extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size); extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size);
/*
* probe_user_write(): safely attempt to write to a location in user space
* @dst: address to write to
* @src: pointer to the data that shall be written
* @size: size of the data chunk
*
* Safely write to address @dst from the buffer at @src. If a kernel fault
* happens, handle that and return -EFAULT.
*/
extern long notrace probe_user_write(void __user *dst, const void *src, size_t size);
extern long notrace __probe_user_write(void __user *dst, const void *src, size_t size);
extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
extern long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr,
long count);
extern long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr, extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
long count); long count);
extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count); extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
......
...@@ -563,10 +563,13 @@ union bpf_attr { ...@@ -563,10 +563,13 @@ union bpf_attr {
* Return * Return
* 0 on success, or a negative error in case of failure. * 0 on success, or a negative error in case of failure.
* *
* int bpf_probe_read(void *dst, u32 size, const void *src) * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
* Description * Description
* For tracing programs, safely attempt to read *size* bytes from * For tracing programs, safely attempt to read *size* bytes from
* address *src* and store the data in *dst*. * kernel space address *unsafe_ptr* and store the data in *dst*.
*
* Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
* instead.
* Return * Return
* 0 on success, or a negative error in case of failure. * 0 on success, or a negative error in case of failure.
* *
...@@ -1428,45 +1431,14 @@ union bpf_attr { ...@@ -1428,45 +1431,14 @@ union bpf_attr {
* Return * Return
* 0 on success, or a negative error in case of failure. * 0 on success, or a negative error in case of failure.
* *
* int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr) * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
* Description * Description
* Copy a NUL terminated string from an unsafe address * Copy a NUL terminated string from an unsafe kernel address
* *unsafe_ptr* to *dst*. The *size* should include the * *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
* terminating NUL byte. In case the string length is smaller than * more details.
* *size*, the target is not padded with further NUL bytes. If the
* string length is larger than *size*, just *size*-1 bytes are
* copied and the last byte is set to NUL.
*
* On success, the length of the copied string is returned. This
* makes this helper useful in tracing programs for reading
* strings, and more importantly to get its length at runtime. See
* the following snippet:
* *
* :: * Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
* * instead.
* SEC("kprobe/sys_open")
* void bpf_sys_open(struct pt_regs *ctx)
* {
* char buf[PATHLEN]; // PATHLEN is defined to 256
* int res = bpf_probe_read_str(buf, sizeof(buf),
* ctx->di);
*
* // Consume buf, for example push it to
* // userspace via bpf_perf_event_output(); we
* // can use res (the string length) as event
* // size, after checking its boundaries.
* }
*
* In comparison, using **bpf_probe_read()** helper here instead
* to read the string would require to estimate the length at
* compile time, and would often result in copying more memory
* than necessary.
*
* Another useful use case is when parsing individual process
* arguments or individual environment variables navigating
* *current*\ **->mm->arg_start** and *current*\
* **->mm->env_start**: using this helper and the return value,
* one can quickly iterate at the right offset of the memory area.
* Return * Return
* On success, the strictly positive length of the string, * On success, the strictly positive length of the string,
* including the trailing NUL character. On error, a negative * including the trailing NUL character. On error, a negative
...@@ -2777,6 +2749,72 @@ union bpf_attr { ...@@ -2777,6 +2749,72 @@ union bpf_attr {
* restricted to raw_tracepoint bpf programs. * restricted to raw_tracepoint bpf programs.
* Return * Return
* 0 on success, or a negative error in case of failure. * 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Safely attempt to read *size* bytes from user space address
* *unsafe_ptr* and store the data in *dst*.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Safely attempt to read *size* bytes from kernel space address
* *unsafe_ptr* and store the data in *dst*.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe user address
* *unsafe_ptr* to *dst*. The *size* should include the
* terminating NUL byte. In case the string length is smaller than
* *size*, the target is not padded with further NUL bytes. If the
* string length is larger than *size*, just *size*-1 bytes are
* copied and the last byte is set to NUL.
*
* On success, the length of the copied string is returned. This
* makes this helper useful in tracing programs for reading
* strings, and more importantly to get its length at runtime. See
* the following snippet:
*
* ::
*
* SEC("kprobe/sys_open")
* void bpf_sys_open(struct pt_regs *ctx)
* {
* char buf[PATHLEN]; // PATHLEN is defined to 256
* int res = bpf_probe_read_user_str(buf, sizeof(buf),
* ctx->di);
*
* // Consume buf, for example push it to
* // userspace via bpf_perf_event_output(); we
* // can use res (the string length) as event
* // size, after checking its boundaries.
* }
*
* In comparison, using **bpf_probe_read_user()** helper here
* instead to read the string would require to estimate the length
* at compile time, and would often result in copying more memory
* than necessary.
*
* Another useful use case is when parsing individual process
* arguments or individual environment variables navigating
* *current*\ **->mm->arg_start** and *current*\
* **->mm->env_start**: using this helper and the return value,
* one can quickly iterate at the right offset of the memory area.
* Return
* On success, the strictly positive length of the string,
* including the trailing NUL character. On error, a negative
* value.
*
* int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
* to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
* Return
* On success, the strictly positive length of the string, including
* the trailing NUL character. On error, a negative value.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -2890,7 +2928,11 @@ union bpf_attr { ...@@ -2890,7 +2928,11 @@ union bpf_attr {
FN(sk_storage_delete), \ FN(sk_storage_delete), \
FN(send_signal), \ FN(send_signal), \
FN(tcp_gen_syncookie), \ FN(tcp_gen_syncookie), \
FN(skb_output), FN(skb_output), \
FN(probe_read_user), \
FN(probe_read_kernel), \
FN(probe_read_user_str), \
FN(probe_read_kernel_str),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
......
...@@ -1306,11 +1306,12 @@ bool bpf_opcode_in_insntable(u8 code) ...@@ -1306,11 +1306,12 @@ bool bpf_opcode_in_insntable(u8 code)
} }
#ifndef CONFIG_BPF_JIT_ALWAYS_ON #ifndef CONFIG_BPF_JIT_ALWAYS_ON
u64 __weak bpf_probe_read(void * dst, u32 size, const void * unsafe_ptr) u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
{ {
memset(dst, 0, size); memset(dst, 0, size);
return -EFAULT; return -EFAULT;
} }
/** /**
* __bpf_prog_run - run eBPF program on a given context * __bpf_prog_run - run eBPF program on a given context
* @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers * @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers
...@@ -1568,7 +1569,7 @@ static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u6 ...@@ -1568,7 +1569,7 @@ static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u6
#undef LDST #undef LDST
#define LDX_PROBE(SIZEOP, SIZE) \ #define LDX_PROBE(SIZEOP, SIZE) \
LDX_PROBE_MEM_##SIZEOP: \ LDX_PROBE_MEM_##SIZEOP: \
bpf_probe_read(&DST, SIZE, (const void *)(long) SRC); \ bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) SRC); \
CONT; CONT;
LDX_PROBE(B, 1) LDX_PROBE(B, 1)
LDX_PROBE(H, 2) LDX_PROBE(H, 2)
......
...@@ -138,24 +138,125 @@ static const struct bpf_func_proto bpf_override_return_proto = { ...@@ -138,24 +138,125 @@ static const struct bpf_func_proto bpf_override_return_proto = {
}; };
#endif #endif
BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size,
const void __user *, unsafe_ptr)
{ {
int ret; int ret = probe_user_read(dst, unsafe_ptr, size);
ret = security_locked_down(LOCKDOWN_BPF_READ); if (unlikely(ret < 0))
if (ret < 0) memset(dst, 0, size);
goto out;
return ret;
}
static const struct bpf_func_proto bpf_probe_read_user_proto = {
.func = bpf_probe_read_user,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
};
BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size,
const void __user *, unsafe_ptr)
{
int ret = strncpy_from_unsafe_user(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
memset(dst, 0, size);
return ret;
}
static const struct bpf_func_proto bpf_probe_read_user_str_proto = {
.func = bpf_probe_read_user_str,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
};
ret = probe_kernel_read(dst, unsafe_ptr, size); static __always_inline int
bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr,
const bool compat)
{
int ret = security_locked_down(LOCKDOWN_BPF_READ);
if (unlikely(ret < 0))
goto out;
ret = compat ? probe_kernel_read(dst, unsafe_ptr, size) :
probe_kernel_read_strict(dst, unsafe_ptr, size);
if (unlikely(ret < 0)) if (unlikely(ret < 0))
out: out:
memset(dst, 0, size); memset(dst, 0, size);
return ret;
}
BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
const void *, unsafe_ptr)
{
return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, false);
}
static const struct bpf_func_proto bpf_probe_read_kernel_proto = {
.func = bpf_probe_read_kernel,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
};
BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size,
const void *, unsafe_ptr)
{
return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, true);
}
static const struct bpf_func_proto bpf_probe_read_compat_proto = {
.func = bpf_probe_read_compat,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
};
static __always_inline int
bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr,
const bool compat)
{
int ret = security_locked_down(LOCKDOWN_BPF_READ);
if (unlikely(ret < 0))
goto out;
/*
* The strncpy_from_unsafe_*() call will likely not fill the entire
* buffer, but that's okay in this circumstance as we're probing
* arbitrary memory anyway similar to bpf_probe_read_*() and might
* as well probe the stack. Thus, memory is explicitly cleared
* only in error case, so that improper users ignoring return
* code altogether don't copy garbage; otherwise length of string
* is returned that can be used for bpf_perf_event_output() et al.
*/
ret = compat ? strncpy_from_unsafe(dst, unsafe_ptr, size) :
strncpy_from_unsafe_strict(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
out:
memset(dst, 0, size);
return ret; return ret;
} }
static const struct bpf_func_proto bpf_probe_read_proto = { BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size,
.func = bpf_probe_read, const void *, unsafe_ptr)
{
return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, false);
}
static const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
.func = bpf_probe_read_kernel_str,
.gpl_only = true, .gpl_only = true,
.ret_type = RET_INTEGER, .ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg1_type = ARG_PTR_TO_UNINIT_MEM,
...@@ -163,7 +264,22 @@ static const struct bpf_func_proto bpf_probe_read_proto = { ...@@ -163,7 +264,22 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
.arg3_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING,
}; };
BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src, BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size,
const void *, unsafe_ptr)
{
return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, true);
}
static const struct bpf_func_proto bpf_probe_read_compat_str_proto = {
.func = bpf_probe_read_compat_str,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
};
BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src,
u32, size) u32, size)
{ {
/* /*
...@@ -186,10 +302,8 @@ BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src, ...@@ -186,10 +302,8 @@ BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
return -EPERM; return -EPERM;
if (unlikely(!nmi_uaccess_okay())) if (unlikely(!nmi_uaccess_okay()))
return -EPERM; return -EPERM;
if (!access_ok(unsafe_ptr, size))
return -EPERM;
return probe_kernel_write(unsafe_ptr, src, size); return probe_user_write(unsafe_ptr, src, size);
} }
static const struct bpf_func_proto bpf_probe_write_user_proto = { static const struct bpf_func_proto bpf_probe_write_user_proto = {
...@@ -585,41 +699,6 @@ static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = { ...@@ -585,41 +699,6 @@ static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
.arg2_type = ARG_ANYTHING, .arg2_type = ARG_ANYTHING,
}; };
BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
const void *, unsafe_ptr)
{
int ret;
ret = security_locked_down(LOCKDOWN_BPF_READ);
if (ret < 0)
goto out;
/*
* The strncpy_from_unsafe() call will likely not fill the entire
* buffer, but that's okay in this circumstance as we're probing
* arbitrary memory anyway similar to bpf_probe_read() and might
* as well probe the stack. Thus, memory is explicitly cleared
* only in error case, so that improper users ignoring return
* code altogether don't copy garbage; otherwise length of string
* is returned that can be used for bpf_perf_event_output() et al.
*/
ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
out:
memset(dst, 0, size);
return ret;
}
static const struct bpf_func_proto bpf_probe_read_str_proto = {
.func = bpf_probe_read_str,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_ANYTHING,
};
struct send_signal_irq_work { struct send_signal_irq_work {
struct irq_work irq_work; struct irq_work irq_work;
struct task_struct *task; struct task_struct *task;
...@@ -699,8 +778,6 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -699,8 +778,6 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_map_pop_elem_proto; return &bpf_map_pop_elem_proto;
case BPF_FUNC_map_peek_elem: case BPF_FUNC_map_peek_elem:
return &bpf_map_peek_elem_proto; return &bpf_map_peek_elem_proto;
case BPF_FUNC_probe_read:
return &bpf_probe_read_proto;
case BPF_FUNC_ktime_get_ns: case BPF_FUNC_ktime_get_ns:
return &bpf_ktime_get_ns_proto; return &bpf_ktime_get_ns_proto;
case BPF_FUNC_tail_call: case BPF_FUNC_tail_call:
...@@ -727,8 +804,18 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -727,8 +804,18 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_current_task_under_cgroup_proto; return &bpf_current_task_under_cgroup_proto;
case BPF_FUNC_get_prandom_u32: case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto; return &bpf_get_prandom_u32_proto;
case BPF_FUNC_probe_read_user:
return &bpf_probe_read_user_proto;
case BPF_FUNC_probe_read_kernel:
return &bpf_probe_read_kernel_proto;
case BPF_FUNC_probe_read:
return &bpf_probe_read_compat_proto;
case BPF_FUNC_probe_read_user_str:
return &bpf_probe_read_user_str_proto;
case BPF_FUNC_probe_read_kernel_str:
return &bpf_probe_read_kernel_str_proto;
case BPF_FUNC_probe_read_str: case BPF_FUNC_probe_read_str:
return &bpf_probe_read_str_proto; return &bpf_probe_read_compat_str_proto;
#ifdef CONFIG_CGROUPS #ifdef CONFIG_CGROUPS
case BPF_FUNC_get_current_cgroup_id: case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto; return &bpf_get_current_cgroup_id_proto;
......
...@@ -18,6 +18,18 @@ probe_read_common(void *dst, const void __user *src, size_t size) ...@@ -18,6 +18,18 @@ probe_read_common(void *dst, const void __user *src, size_t size)
return ret ? -EFAULT : 0; return ret ? -EFAULT : 0;
} }
static __always_inline long
probe_write_common(void __user *dst, const void *src, size_t size)
{
long ret;
pagefault_disable();
ret = __copy_to_user_inatomic(dst, src, size);
pagefault_enable();
return ret ? -EFAULT : 0;
}
/** /**
* probe_kernel_read(): safely attempt to read from a kernel-space location * probe_kernel_read(): safely attempt to read from a kernel-space location
* @dst: pointer to the buffer that shall take the data * @dst: pointer to the buffer that shall take the data
...@@ -31,11 +43,20 @@ probe_read_common(void *dst, const void __user *src, size_t size) ...@@ -31,11 +43,20 @@ probe_read_common(void *dst, const void __user *src, size_t size)
* do_page_fault() doesn't attempt to take mmap_sem. This makes * do_page_fault() doesn't attempt to take mmap_sem. This makes
* probe_kernel_read() suitable for use within regions where the caller * probe_kernel_read() suitable for use within regions where the caller
* already holds mmap_sem, or other locks which nest inside mmap_sem. * already holds mmap_sem, or other locks which nest inside mmap_sem.
*
* probe_kernel_read_strict() is the same as probe_kernel_read() except for
* the case where architectures have non-overlapping user and kernel address
* ranges: probe_kernel_read_strict() will additionally return -EFAULT for
* probing memory on a user address range where probe_user_read() is supposed
* to be used instead.
*/ */
long __weak probe_kernel_read(void *dst, const void *src, size_t size) long __weak probe_kernel_read(void *dst, const void *src, size_t size)
__attribute__((alias("__probe_kernel_read"))); __attribute__((alias("__probe_kernel_read")));
long __weak probe_kernel_read_strict(void *dst, const void *src, size_t size)
__attribute__((alias("__probe_kernel_read")));
long __probe_kernel_read(void *dst, const void *src, size_t size) long __probe_kernel_read(void *dst, const void *src, size_t size)
{ {
long ret; long ret;
...@@ -85,6 +106,7 @@ EXPORT_SYMBOL_GPL(probe_user_read); ...@@ -85,6 +106,7 @@ EXPORT_SYMBOL_GPL(probe_user_read);
* Safely write to address @dst from the buffer at @src. If a kernel fault * Safely write to address @dst from the buffer at @src. If a kernel fault
* happens, handle that and return -EFAULT. * happens, handle that and return -EFAULT.
*/ */
long __weak probe_kernel_write(void *dst, const void *src, size_t size) long __weak probe_kernel_write(void *dst, const void *src, size_t size)
__attribute__((alias("__probe_kernel_write"))); __attribute__((alias("__probe_kernel_write")));
...@@ -94,15 +116,39 @@ long __probe_kernel_write(void *dst, const void *src, size_t size) ...@@ -94,15 +116,39 @@ long __probe_kernel_write(void *dst, const void *src, size_t size)
mm_segment_t old_fs = get_fs(); mm_segment_t old_fs = get_fs();
set_fs(KERNEL_DS); set_fs(KERNEL_DS);
pagefault_disable(); ret = probe_write_common((__force void __user *)dst, src, size);
ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
pagefault_enable();
set_fs(old_fs); set_fs(old_fs);
return ret ? -EFAULT : 0; return ret;
} }
EXPORT_SYMBOL_GPL(probe_kernel_write); EXPORT_SYMBOL_GPL(probe_kernel_write);
/**
* probe_user_write(): safely attempt to write to a user-space location
* @dst: address to write to
* @src: pointer to the data that shall be written
* @size: size of the data chunk
*
* Safely write to address @dst from the buffer at @src. If a kernel fault
* happens, handle that and return -EFAULT.
*/
long __weak probe_user_write(void __user *dst, const void *src, size_t size)
__attribute__((alias("__probe_user_write")));
long __probe_user_write(void __user *dst, const void *src, size_t size)
{
long ret = -EFAULT;
mm_segment_t old_fs = get_fs();
set_fs(USER_DS);
if (access_ok(dst, size))
ret = probe_write_common(dst, src, size);
set_fs(old_fs);
return ret;
}
EXPORT_SYMBOL_GPL(probe_user_write);
/** /**
* strncpy_from_unsafe: - Copy a NUL terminated string from unsafe address. * strncpy_from_unsafe: - Copy a NUL terminated string from unsafe address.
...@@ -120,8 +166,22 @@ EXPORT_SYMBOL_GPL(probe_kernel_write); ...@@ -120,8 +166,22 @@ EXPORT_SYMBOL_GPL(probe_kernel_write);
* *
* If @count is smaller than the length of the string, copies @count-1 bytes, * If @count is smaller than the length of the string, copies @count-1 bytes,
* sets the last byte of @dst buffer to NUL and returns @count. * sets the last byte of @dst buffer to NUL and returns @count.
*
* strncpy_from_unsafe_strict() is the same as strncpy_from_unsafe() except
* for the case where architectures have non-overlapping user and kernel address
* ranges: strncpy_from_unsafe_strict() will additionally return -EFAULT for
* probing memory on a user address range where strncpy_from_unsafe_user() is
* supposed to be used instead.
*/ */
long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
long __weak strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
__attribute__((alias("__strncpy_from_unsafe")));
long __weak strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr,
long count)
__attribute__((alias("__strncpy_from_unsafe")));
long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
{ {
mm_segment_t old_fs = get_fs(); mm_segment_t old_fs = get_fs();
const void *src = unsafe_addr; const void *src = unsafe_addr;
......
...@@ -181,7 +181,7 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx) ...@@ -181,7 +181,7 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx)
if (addrlen != sizeof(*in6)) if (addrlen != sizeof(*in6))
return 0; return 0;
ret = bpf_probe_read(test_params.dst6, sizeof(test_params.dst6), ret = bpf_probe_read_user(test_params.dst6, sizeof(test_params.dst6),
&in6->sin6_addr); &in6->sin6_addr);
if (ret) if (ret)
goto done; goto done;
......
...@@ -118,7 +118,7 @@ int trace_sys_connect(struct pt_regs *ctx) ...@@ -118,7 +118,7 @@ int trace_sys_connect(struct pt_regs *ctx)
if (addrlen != sizeof(*in6)) if (addrlen != sizeof(*in6))
return 0; return 0;
ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr); ret = bpf_probe_read_user(dst6, sizeof(dst6), &in6->sin6_addr);
if (ret) { if (ret) {
inline_ret = ret; inline_ret = ret;
goto done; goto done;
...@@ -129,7 +129,7 @@ int trace_sys_connect(struct pt_regs *ctx) ...@@ -129,7 +129,7 @@ int trace_sys_connect(struct pt_regs *ctx)
test_case = dst6[7]; test_case = dst6[7];
ret = bpf_probe_read(&port, sizeof(port), &in6->sin6_port); ret = bpf_probe_read_user(&port, sizeof(port), &in6->sin6_port);
if (ret) { if (ret) {
inline_ret = ret; inline_ret = ret;
goto done; goto done;
......
...@@ -37,7 +37,7 @@ int bpf_prog1(struct pt_regs *ctx) ...@@ -37,7 +37,7 @@ int bpf_prog1(struct pt_regs *ctx)
if (sockaddr_len > sizeof(orig_addr)) if (sockaddr_len > sizeof(orig_addr))
return 0; return 0;
if (bpf_probe_read(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0) if (bpf_probe_read_user(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
return 0; return 0;
mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr); mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr);
......
...@@ -563,10 +563,13 @@ union bpf_attr { ...@@ -563,10 +563,13 @@ union bpf_attr {
* Return * Return
* 0 on success, or a negative error in case of failure. * 0 on success, or a negative error in case of failure.
* *
* int bpf_probe_read(void *dst, u32 size, const void *src) * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
* Description * Description
* For tracing programs, safely attempt to read *size* bytes from * For tracing programs, safely attempt to read *size* bytes from
* address *src* and store the data in *dst*. * kernel space address *unsafe_ptr* and store the data in *dst*.
*
* Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
* instead.
* Return * Return
* 0 on success, or a negative error in case of failure. * 0 on success, or a negative error in case of failure.
* *
...@@ -1428,45 +1431,14 @@ union bpf_attr { ...@@ -1428,45 +1431,14 @@ union bpf_attr {
* Return * Return
* 0 on success, or a negative error in case of failure. * 0 on success, or a negative error in case of failure.
* *
* int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr) * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
* Description * Description
* Copy a NUL terminated string from an unsafe address * Copy a NUL terminated string from an unsafe kernel address
* *unsafe_ptr* to *dst*. The *size* should include the * *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
* terminating NUL byte. In case the string length is smaller than * more details.
* *size*, the target is not padded with further NUL bytes. If the
* string length is larger than *size*, just *size*-1 bytes are
* copied and the last byte is set to NUL.
*
* On success, the length of the copied string is returned. This
* makes this helper useful in tracing programs for reading
* strings, and more importantly to get its length at runtime. See
* the following snippet:
* *
* :: * Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
* * instead.
* SEC("kprobe/sys_open")
* void bpf_sys_open(struct pt_regs *ctx)
* {
* char buf[PATHLEN]; // PATHLEN is defined to 256
* int res = bpf_probe_read_str(buf, sizeof(buf),
* ctx->di);
*
* // Consume buf, for example push it to
* // userspace via bpf_perf_event_output(); we
* // can use res (the string length) as event
* // size, after checking its boundaries.
* }
*
* In comparison, using **bpf_probe_read()** helper here instead
* to read the string would require to estimate the length at
* compile time, and would often result in copying more memory
* than necessary.
*
* Another useful use case is when parsing individual process
* arguments or individual environment variables navigating
* *current*\ **->mm->arg_start** and *current*\
* **->mm->env_start**: using this helper and the return value,
* one can quickly iterate at the right offset of the memory area.
* Return * Return
* On success, the strictly positive length of the string, * On success, the strictly positive length of the string,
* including the trailing NUL character. On error, a negative * including the trailing NUL character. On error, a negative
...@@ -2777,6 +2749,72 @@ union bpf_attr { ...@@ -2777,6 +2749,72 @@ union bpf_attr {
* restricted to raw_tracepoint bpf programs. * restricted to raw_tracepoint bpf programs.
* Return * Return
* 0 on success, or a negative error in case of failure. * 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Safely attempt to read *size* bytes from user space address
* *unsafe_ptr* and store the data in *dst*.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Safely attempt to read *size* bytes from kernel space address
* *unsafe_ptr* and store the data in *dst*.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe user address
* *unsafe_ptr* to *dst*. The *size* should include the
* terminating NUL byte. In case the string length is smaller than
* *size*, the target is not padded with further NUL bytes. If the
* string length is larger than *size*, just *size*-1 bytes are
* copied and the last byte is set to NUL.
*
* On success, the length of the copied string is returned. This
* makes this helper useful in tracing programs for reading
* strings, and more importantly to get its length at runtime. See
* the following snippet:
*
* ::
*
* SEC("kprobe/sys_open")
* void bpf_sys_open(struct pt_regs *ctx)
* {
* char buf[PATHLEN]; // PATHLEN is defined to 256
* int res = bpf_probe_read_user_str(buf, sizeof(buf),
* ctx->di);
*
* // Consume buf, for example push it to
* // userspace via bpf_perf_event_output(); we
* // can use res (the string length) as event
* // size, after checking its boundaries.
* }
*
* In comparison, using **bpf_probe_read_user()** helper here
* instead to read the string would require to estimate the length
* at compile time, and would often result in copying more memory
* than necessary.
*
* Another useful use case is when parsing individual process
* arguments or individual environment variables navigating
* *current*\ **->mm->arg_start** and *current*\
* **->mm->env_start**: using this helper and the return value,
* one can quickly iterate at the right offset of the memory area.
* Return
* On success, the strictly positive length of the string,
* including the trailing NUL character. On error, a negative
* value.
*
* int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
* to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
* Return
* On success, the strictly positive length of the string, including
* the trailing NUL character. On error, a negative value.
*/ */
#define __BPF_FUNC_MAPPER(FN) \ #define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \ FN(unspec), \
...@@ -2890,7 +2928,11 @@ union bpf_attr { ...@@ -2890,7 +2928,11 @@ union bpf_attr {
FN(sk_storage_delete), \ FN(sk_storage_delete), \
FN(send_signal), \ FN(send_signal), \
FN(tcp_gen_syncookie), \ FN(tcp_gen_syncookie), \
FN(skb_output), FN(skb_output), \
FN(probe_read_user), \
FN(probe_read_kernel), \
FN(probe_read_user_str), \
FN(probe_read_kernel_str),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper /* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call * function eBPF program intends to call
......
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
void test_probe_user(void)
{
#define kprobe_name "__sys_connect"
const char *prog_name = "kprobe/" kprobe_name;
const char *obj_file = "./test_probe_user.o";
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, );
int err, results_map_fd, sock_fd, duration = 0;
struct sockaddr curr, orig, tmp;
struct sockaddr_in *in = (struct sockaddr_in *)&curr;
struct bpf_link *kprobe_link = NULL;
struct bpf_program *kprobe_prog;
struct bpf_object *obj;
static const int zero = 0;
obj = bpf_object__open_file(obj_file, &opts);
if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
return;
kprobe_prog = bpf_object__find_program_by_title(obj, prog_name);
if (CHECK(!kprobe_prog, "find_probe",
"prog '%s' not found\n", prog_name))
goto cleanup;
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d\n", err))
goto cleanup;
results_map_fd = bpf_find_map(__func__, obj, "test_pro.bss");
if (CHECK(results_map_fd < 0, "find_bss_map",
"err %d\n", results_map_fd))
goto cleanup;
kprobe_link = bpf_program__attach_kprobe(kprobe_prog, false,
kprobe_name);
if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
"err %ld\n", PTR_ERR(kprobe_link))) {
kprobe_link = NULL;
goto cleanup;
}
memset(&curr, 0, sizeof(curr));
in->sin_family = AF_INET;
in->sin_port = htons(5555);
in->sin_addr.s_addr = inet_addr("255.255.255.255");
memcpy(&orig, &curr, sizeof(curr));
sock_fd = socket(AF_INET, SOCK_STREAM, 0);
if (CHECK(sock_fd < 0, "create_sock_fd", "err %d\n", sock_fd))
goto cleanup;
connect(sock_fd, &curr, sizeof(curr));
close(sock_fd);
err = bpf_map_lookup_elem(results_map_fd, &zero, &tmp);
if (CHECK(err, "get_kprobe_res",
"failed to get kprobe res: %d\n", err))
goto cleanup;
in = (struct sockaddr_in *)&tmp;
if (CHECK(memcmp(&tmp, &orig, sizeof(orig)), "check_kprobe_res",
"wrong kprobe res from probe read: %s:%u\n",
inet_ntoa(in->sin_addr), ntohs(in->sin_port)))
goto cleanup;
memset(&tmp, 0xab, sizeof(tmp));
in = (struct sockaddr_in *)&curr;
if (CHECK(memcmp(&curr, &tmp, sizeof(tmp)), "check_kprobe_res",
"wrong kprobe res from probe write: %s:%u\n",
inet_ntoa(in->sin_addr), ntohs(in->sin_port)))
goto cleanup;
cleanup:
bpf_link__destroy(kprobe_link);
bpf_object__close(obj);
}
...@@ -79,11 +79,11 @@ int trace_kfree_skb(struct trace_kfree_skb *ctx) ...@@ -79,11 +79,11 @@ int trace_kfree_skb(struct trace_kfree_skb *ctx)
func = ptr->func; func = ptr->func;
})); }));
bpf_probe_read(&pkt_type, sizeof(pkt_type), _(&skb->__pkt_type_offset)); bpf_probe_read_kernel(&pkt_type, sizeof(pkt_type), _(&skb->__pkt_type_offset));
pkt_type &= 7; pkt_type &= 7;
/* read eth proto */ /* read eth proto */
bpf_probe_read(&pkt_data, sizeof(pkt_data), data + 12); bpf_probe_read_kernel(&pkt_data, sizeof(pkt_data), data + 12);
bpf_printk("rcuhead.next %llx func %llx\n", ptr, func); bpf_printk("rcuhead.next %llx func %llx\n", ptr, func);
bpf_printk("skb->len %d users %d pkt_type %x\n", bpf_printk("skb->len %d users %d pkt_type %x\n",
......
...@@ -72,8 +72,8 @@ static __always_inline void *get_thread_state(void *tls_base, PidData *pidData) ...@@ -72,8 +72,8 @@ static __always_inline void *get_thread_state(void *tls_base, PidData *pidData)
void* thread_state; void* thread_state;
int key; int key;
bpf_probe_read(&key, sizeof(key), (void*)(long)pidData->tls_key_addr); bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
bpf_probe_read(&thread_state, sizeof(thread_state), bpf_probe_read_user(&thread_state, sizeof(thread_state),
tls_base + 0x310 + key * 0x10 + 0x08); tls_base + 0x310 + key * 0x10 + 0x08);
return thread_state; return thread_state;
} }
...@@ -82,31 +82,33 @@ static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData, ...@@ -82,31 +82,33 @@ static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData,
FrameData *frame, Symbol *symbol) FrameData *frame, Symbol *symbol)
{ {
// read data from PyFrameObject // read data from PyFrameObject
bpf_probe_read(&frame->f_back, bpf_probe_read_user(&frame->f_back,
sizeof(frame->f_back), sizeof(frame->f_back),
frame_ptr + pidData->offsets.PyFrameObject_back); frame_ptr + pidData->offsets.PyFrameObject_back);
bpf_probe_read(&frame->f_code, bpf_probe_read_user(&frame->f_code,
sizeof(frame->f_code), sizeof(frame->f_code),
frame_ptr + pidData->offsets.PyFrameObject_code); frame_ptr + pidData->offsets.PyFrameObject_code);
// read data from PyCodeObject // read data from PyCodeObject
if (!frame->f_code) if (!frame->f_code)
return false; return false;
bpf_probe_read(&frame->co_filename, bpf_probe_read_user(&frame->co_filename,
sizeof(frame->co_filename), sizeof(frame->co_filename),
frame->f_code + pidData->offsets.PyCodeObject_filename); frame->f_code + pidData->offsets.PyCodeObject_filename);
bpf_probe_read(&frame->co_name, bpf_probe_read_user(&frame->co_name,
sizeof(frame->co_name), sizeof(frame->co_name),
frame->f_code + pidData->offsets.PyCodeObject_name); frame->f_code + pidData->offsets.PyCodeObject_name);
// read actual names into symbol // read actual names into symbol
if (frame->co_filename) if (frame->co_filename)
bpf_probe_read_str(&symbol->file, bpf_probe_read_user_str(&symbol->file,
sizeof(symbol->file), sizeof(symbol->file),
frame->co_filename + pidData->offsets.String_data); frame->co_filename +
pidData->offsets.String_data);
if (frame->co_name) if (frame->co_name)
bpf_probe_read_str(&symbol->name, bpf_probe_read_user_str(&symbol->name,
sizeof(symbol->name), sizeof(symbol->name),
frame->co_name + pidData->offsets.String_data); frame->co_name +
pidData->offsets.String_data);
return true; return true;
} }
...@@ -174,7 +176,7 @@ static __always_inline int __on_event(struct pt_regs *ctx) ...@@ -174,7 +176,7 @@ static __always_inline int __on_event(struct pt_regs *ctx)
event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0); event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
void* thread_state_current = (void*)0; void* thread_state_current = (void*)0;
bpf_probe_read(&thread_state_current, bpf_probe_read_user(&thread_state_current,
sizeof(thread_state_current), sizeof(thread_state_current),
(void*)(long)pidData->current_state_addr); (void*)(long)pidData->current_state_addr);
...@@ -188,11 +190,13 @@ static __always_inline int __on_event(struct pt_regs *ctx) ...@@ -188,11 +190,13 @@ static __always_inline int __on_event(struct pt_regs *ctx)
if (pidData->use_tls) { if (pidData->use_tls) {
uint64_t pthread_created; uint64_t pthread_created;
uint64_t pthread_self; uint64_t pthread_self;
bpf_probe_read(&pthread_self, sizeof(pthread_self), tls_base + 0x10); bpf_probe_read_user(&pthread_self, sizeof(pthread_self),
tls_base + 0x10);
bpf_probe_read(&pthread_created, bpf_probe_read_user(&pthread_created,
sizeof(pthread_created), sizeof(pthread_created),
thread_state + pidData->offsets.PyThreadState_thread); thread_state +
pidData->offsets.PyThreadState_thread);
event->pthread_match = pthread_created == pthread_self; event->pthread_match = pthread_created == pthread_self;
} else { } else {
event->pthread_match = 1; event->pthread_match = 1;
...@@ -204,9 +208,10 @@ static __always_inline int __on_event(struct pt_regs *ctx) ...@@ -204,9 +208,10 @@ static __always_inline int __on_event(struct pt_regs *ctx)
Symbol sym = {}; Symbol sym = {};
int cur_cpu = bpf_get_smp_processor_id(); int cur_cpu = bpf_get_smp_processor_id();
bpf_probe_read(&frame_ptr, bpf_probe_read_user(&frame_ptr,
sizeof(frame_ptr), sizeof(frame_ptr),
thread_state + pidData->offsets.PyThreadState_frame); thread_state +
pidData->offsets.PyThreadState_frame);
int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym); int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
if (symbol_counter == NULL) if (symbol_counter == NULL)
......
...@@ -98,7 +98,7 @@ struct strobe_map_raw { ...@@ -98,7 +98,7 @@ struct strobe_map_raw {
/* /*
* having volatile doesn't change anything on BPF side, but clang * having volatile doesn't change anything on BPF side, but clang
* emits warnings for passing `volatile const char *` into * emits warnings for passing `volatile const char *` into
* bpf_probe_read_str that expects just `const char *` * bpf_probe_read_user_str that expects just `const char *`
*/ */
const char* tag; const char* tag;
/* /*
...@@ -309,18 +309,18 @@ static __always_inline void *calc_location(struct strobe_value_loc *loc, ...@@ -309,18 +309,18 @@ static __always_inline void *calc_location(struct strobe_value_loc *loc,
dtv_t *dtv; dtv_t *dtv;
void *tls_ptr; void *tls_ptr;
bpf_probe_read(&tls_index, sizeof(struct tls_index), bpf_probe_read_user(&tls_index, sizeof(struct tls_index),
(void *)loc->offset); (void *)loc->offset);
/* valid module index is always positive */ /* valid module index is always positive */
if (tls_index.module > 0) { if (tls_index.module > 0) {
/* dtv = ((struct tcbhead *)tls_base)->dtv[tls_index.module] */ /* dtv = ((struct tcbhead *)tls_base)->dtv[tls_index.module] */
bpf_probe_read(&dtv, sizeof(dtv), bpf_probe_read_user(&dtv, sizeof(dtv),
&((struct tcbhead *)tls_base)->dtv); &((struct tcbhead *)tls_base)->dtv);
dtv += tls_index.module; dtv += tls_index.module;
} else { } else {
dtv = NULL; dtv = NULL;
} }
bpf_probe_read(&tls_ptr, sizeof(void *), dtv); bpf_probe_read_user(&tls_ptr, sizeof(void *), dtv);
/* if pointer has (void *)-1 value, then TLS wasn't initialized yet */ /* if pointer has (void *)-1 value, then TLS wasn't initialized yet */
return tls_ptr && tls_ptr != (void *)-1 return tls_ptr && tls_ptr != (void *)-1
? tls_ptr + tls_index.offset ? tls_ptr + tls_index.offset
...@@ -336,7 +336,7 @@ static __always_inline void read_int_var(struct strobemeta_cfg *cfg, ...@@ -336,7 +336,7 @@ static __always_inline void read_int_var(struct strobemeta_cfg *cfg,
if (!location) if (!location)
return; return;
bpf_probe_read(value, sizeof(struct strobe_value_generic), location); bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
data->int_vals[idx] = value->val; data->int_vals[idx] = value->val;
if (value->header.len) if (value->header.len)
data->int_vals_set_mask |= (1 << idx); data->int_vals_set_mask |= (1 << idx);
...@@ -356,13 +356,13 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, ...@@ -356,13 +356,13 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
if (!location) if (!location)
return 0; return 0;
bpf_probe_read(value, sizeof(struct strobe_value_generic), location); bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, value->ptr); len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr);
/* /*
* if bpf_probe_read_str returns error (<0), due to casting to * if bpf_probe_read_user_str returns error (<0), due to casting to
* unsinged int, it will become big number, so next check is * unsinged int, it will become big number, so next check is
* sufficient to check for errors AND prove to BPF verifier, that * sufficient to check for errors AND prove to BPF verifier, that
* bpf_probe_read_str won't return anything bigger than * bpf_probe_read_user_str won't return anything bigger than
* STROBE_MAX_STR_LEN * STROBE_MAX_STR_LEN
*/ */
if (len > STROBE_MAX_STR_LEN) if (len > STROBE_MAX_STR_LEN)
...@@ -391,8 +391,8 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, ...@@ -391,8 +391,8 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
if (!location) if (!location)
return payload; return payload;
bpf_probe_read(value, sizeof(struct strobe_value_generic), location); bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
if (bpf_probe_read(&map, sizeof(struct strobe_map_raw), value->ptr)) if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr))
return payload; return payload;
descr->id = map.id; descr->id = map.id;
...@@ -402,7 +402,7 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, ...@@ -402,7 +402,7 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
data->req_meta_valid = 1; data->req_meta_valid = 1;
} }
len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, map.tag); len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag);
if (len <= STROBE_MAX_STR_LEN) { if (len <= STROBE_MAX_STR_LEN) {
descr->tag_len = len; descr->tag_len = len;
payload += len; payload += len;
...@@ -418,14 +418,14 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, ...@@ -418,14 +418,14 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
break; break;
descr->key_lens[i] = 0; descr->key_lens[i] = 0;
len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
map.entries[i].key); map.entries[i].key);
if (len <= STROBE_MAX_STR_LEN) { if (len <= STROBE_MAX_STR_LEN) {
descr->key_lens[i] = len; descr->key_lens[i] = len;
payload += len; payload += len;
} }
descr->val_lens[i] = 0; descr->val_lens[i] = 0;
len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
map.entries[i].val); map.entries[i].val);
if (len <= STROBE_MAX_STR_LEN) { if (len <= STROBE_MAX_STR_LEN) {
descr->val_lens[i] = len; descr->val_lens[i] = len;
......
// SPDX-License-Identifier: GPL-2.0
#include <linux/ptrace.h>
#include <linux/bpf.h>
#include <netinet/in.h>
#include "bpf_helpers.h"
#include "bpf_tracing.h"
static struct sockaddr_in old;
SEC("kprobe/__sys_connect")
int handle_sys_connect(struct pt_regs *ctx)
{
void *ptr = (void *)PT_REGS_PARM2(ctx);
struct sockaddr_in new;
bpf_probe_read_user(&old, sizeof(old), ptr);
__builtin_memset(&new, 0xab, sizeof(new));
bpf_probe_write_user(ptr, &new, sizeof(new));
return 0;
}
char _license[] SEC("license") = "GPL";
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
#include <sys/socket.h> #include <sys/socket.h>
#include "bpf_helpers.h" #include "bpf_helpers.h"
#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) #define _(P) ({typeof(P) val = 0; bpf_probe_read_kernel(&val, sizeof(val), &P); val;})
#define TCP_ESTATS_MAGIC 0xBAADBEEF #define TCP_ESTATS_MAGIC 0xBAADBEEF
/* This test case needs "sock" and "pt_regs" data structure. /* This test case needs "sock" and "pt_regs" data structure.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment