Commit eefc06bd authored by David S. Miller's avatar David S. Miller

Merge branch 'bpf-probe-write-user'

Sargun Dhillon says:

====================
bpf: add bpf_probe_write_user helper & example

This patch series contains two patches that add support for a probe_write
helper to BPF programs. This allows them to manipulate user memory during
the course of tracing. The second patch in the series has an example that
uses it, in one the intended ways to divert execution.

Thanks to Alexei Starovoitov, and Daniel Borkmann for being patient, review, and
helping me get familiar with the code base. I've made changes based on their
recommendations.

This helper should be considered for experimental usage and debugging, so we
print a warning to dmesg when it is along with the command and pid when someone
tries to install a proglet that uses it. A follow-up patchset will contain a
mechanism to verify the safety of the probe beyond what was done by hand.
----
v1->v2: restrict writing to user space, as opposed to globally v2->v3: Fixed
        formatting issues v3->v4: Rename copy_to_user -> bpf_probe_write
        Simplify checking of whether or not it's safe to write
        Add warnings to dmesg
v4->v5: Raise warning level
        Cleanup location of warning code
        Make test fail when helper is broken
v5->v6: General formatting cleanup
        Rename bpf_probe_write -> bpf_probe_write_user
v6->v7: More formatting cleanup.
        Clarifying a few comments
	Clarified log message
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 9b022a6e cf9b1199
...@@ -365,6 +365,16 @@ enum bpf_func_id { ...@@ -365,6 +365,16 @@ enum bpf_func_id {
*/ */
BPF_FUNC_get_current_task, BPF_FUNC_get_current_task,
/**
* bpf_probe_write_user(void *dst, void *src, int len)
* safely attempt to write to a location
* @dst: destination address in userspace
* @src: source address on stack
* @len: number of bytes to copy
* Return: 0 on success or negative error
*/
BPF_FUNC_probe_write_user,
__BPF_FUNC_MAX_ID, __BPF_FUNC_MAX_ID,
}; };
......
...@@ -81,6 +81,49 @@ static const struct bpf_func_proto bpf_probe_read_proto = { ...@@ -81,6 +81,49 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
.arg3_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING,
}; };
static u64 bpf_probe_write_user(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
void *unsafe_ptr = (void *) (long) r1;
void *src = (void *) (long) r2;
int size = (int) r3;
/*
* Ensure we're in user context which is safe for the helper to
* run. This helper has no business in a kthread.
*
* access_ok() should prevent writing to non-user memory, but in
* some situations (nommu, temporary switch, etc) access_ok() does
* not provide enough validation, hence the check on KERNEL_DS.
*/
if (unlikely(in_interrupt() ||
current->flags & (PF_KTHREAD | PF_EXITING)))
return -EPERM;
if (unlikely(segment_eq(get_fs(), KERNEL_DS)))
return -EPERM;
if (!access_ok(VERIFY_WRITE, unsafe_ptr, size))
return -EPERM;
return probe_kernel_write(unsafe_ptr, src, size);
}
static const struct bpf_func_proto bpf_probe_write_user_proto = {
.func = bpf_probe_write_user,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_ANYTHING,
.arg2_type = ARG_PTR_TO_STACK,
.arg3_type = ARG_CONST_STACK_SIZE,
};
static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
{
pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!",
current->comm, task_pid_nr(current));
return &bpf_probe_write_user_proto;
}
/* /*
* limited trace_printk() * limited trace_printk()
* only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed
...@@ -362,6 +405,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) ...@@ -362,6 +405,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
return &bpf_get_smp_processor_id_proto; return &bpf_get_smp_processor_id_proto;
case BPF_FUNC_perf_event_read: case BPF_FUNC_perf_event_read:
return &bpf_perf_event_read_proto; return &bpf_perf_event_read_proto;
case BPF_FUNC_probe_write_user:
return bpf_get_probe_write_proto();
default: default:
return NULL; return NULL;
} }
......
...@@ -14,6 +14,7 @@ hostprogs-y += tracex3 ...@@ -14,6 +14,7 @@ hostprogs-y += tracex3
hostprogs-y += tracex4 hostprogs-y += tracex4
hostprogs-y += tracex5 hostprogs-y += tracex5
hostprogs-y += tracex6 hostprogs-y += tracex6
hostprogs-y += test_probe_write_user
hostprogs-y += trace_output hostprogs-y += trace_output
hostprogs-y += lathist hostprogs-y += lathist
hostprogs-y += offwaketime hostprogs-y += offwaketime
...@@ -37,6 +38,7 @@ tracex3-objs := bpf_load.o libbpf.o tracex3_user.o ...@@ -37,6 +38,7 @@ tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
tracex4-objs := bpf_load.o libbpf.o tracex4_user.o tracex4-objs := bpf_load.o libbpf.o tracex4_user.o
tracex5-objs := bpf_load.o libbpf.o tracex5_user.o tracex5-objs := bpf_load.o libbpf.o tracex5_user.o
tracex6-objs := bpf_load.o libbpf.o tracex6_user.o tracex6-objs := bpf_load.o libbpf.o tracex6_user.o
test_probe_write_user-objs := bpf_load.o libbpf.o test_probe_write_user_user.o
trace_output-objs := bpf_load.o libbpf.o trace_output_user.o trace_output-objs := bpf_load.o libbpf.o trace_output_user.o
lathist-objs := bpf_load.o libbpf.o lathist_user.o lathist-objs := bpf_load.o libbpf.o lathist_user.o
offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o
...@@ -59,6 +61,7 @@ always += tracex3_kern.o ...@@ -59,6 +61,7 @@ always += tracex3_kern.o
always += tracex4_kern.o always += tracex4_kern.o
always += tracex5_kern.o always += tracex5_kern.o
always += tracex6_kern.o always += tracex6_kern.o
always += test_probe_write_user_kern.o
always += trace_output_kern.o always += trace_output_kern.o
always += tcbpf1_kern.o always += tcbpf1_kern.o
always += lathist_kern.o always += lathist_kern.o
...@@ -85,6 +88,7 @@ HOSTLOADLIBES_tracex3 += -lelf ...@@ -85,6 +88,7 @@ HOSTLOADLIBES_tracex3 += -lelf
HOSTLOADLIBES_tracex4 += -lelf -lrt HOSTLOADLIBES_tracex4 += -lelf -lrt
HOSTLOADLIBES_tracex5 += -lelf HOSTLOADLIBES_tracex5 += -lelf
HOSTLOADLIBES_tracex6 += -lelf HOSTLOADLIBES_tracex6 += -lelf
HOSTLOADLIBES_test_probe_write_user += -lelf
HOSTLOADLIBES_trace_output += -lelf -lrt HOSTLOADLIBES_trace_output += -lelf -lrt
HOSTLOADLIBES_lathist += -lelf HOSTLOADLIBES_lathist += -lelf
HOSTLOADLIBES_offwaketime += -lelf HOSTLOADLIBES_offwaketime += -lelf
......
...@@ -41,6 +41,8 @@ static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, ...@@ -41,6 +41,8 @@ static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data,
(void *) BPF_FUNC_perf_event_output; (void *) BPF_FUNC_perf_event_output;
static int (*bpf_get_stackid)(void *ctx, void *map, int flags) = static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
(void *) BPF_FUNC_get_stackid; (void *) BPF_FUNC_get_stackid;
static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
(void *) BPF_FUNC_probe_write_user;
/* llvm builtin functions that eBPF C program may use to /* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions * emit BPF_LD_ABS and BPF_LD_IND instructions
......
/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <uapi/linux/bpf.h>
#include <linux/version.h>
#include "bpf_helpers.h"
struct bpf_map_def SEC("maps") dnat_map = {
.type = BPF_MAP_TYPE_HASH,
.key_size = sizeof(struct sockaddr_in),
.value_size = sizeof(struct sockaddr_in),
.max_entries = 256,
};
/* kprobe is NOT a stable ABI
* kernel functions can be removed, renamed or completely change semantics.
* Number of arguments and their positions can change, etc.
* In such case this bpf+kprobe example will no longer be meaningful
*
* This example sits on a syscall, and the syscall ABI is relatively stable
* of course, across platforms, and over time, the ABI may change.
*/
SEC("kprobe/sys_connect")
int bpf_prog1(struct pt_regs *ctx)
{
struct sockaddr_in new_addr, orig_addr = {};
struct sockaddr_in *mapped_addr;
void *sockaddr_arg = (void *)PT_REGS_PARM2(ctx);
int sockaddr_len = (int)PT_REGS_PARM3(ctx);
if (sockaddr_len > sizeof(orig_addr))
return 0;
if (bpf_probe_read(&orig_addr, sizeof(orig_addr), sockaddr_arg) != 0)
return 0;
mapped_addr = bpf_map_lookup_elem(&dnat_map, &orig_addr);
if (mapped_addr != NULL) {
memcpy(&new_addr, mapped_addr, sizeof(new_addr));
bpf_probe_write_user(sockaddr_arg, &new_addr,
sizeof(new_addr));
}
return 0;
}
char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;
#include <stdio.h>
#include <assert.h>
#include <linux/bpf.h>
#include <unistd.h>
#include "libbpf.h"
#include "bpf_load.h"
#include <sys/socket.h>
#include <string.h>
#include <netinet/in.h>
#include <arpa/inet.h>
int main(int ac, char **argv)
{
int serverfd, serverconnfd, clientfd;
socklen_t sockaddr_len;
struct sockaddr serv_addr, mapped_addr, tmp_addr;
struct sockaddr_in *serv_addr_in, *mapped_addr_in, *tmp_addr_in;
char filename[256];
char *ip;
serv_addr_in = (struct sockaddr_in *)&serv_addr;
mapped_addr_in = (struct sockaddr_in *)&mapped_addr;
tmp_addr_in = (struct sockaddr_in *)&tmp_addr;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
return 1;
}
assert((serverfd = socket(AF_INET, SOCK_STREAM, 0)) > 0);
assert((clientfd = socket(AF_INET, SOCK_STREAM, 0)) > 0);
/* Bind server to ephemeral port on lo */
memset(&serv_addr, 0, sizeof(serv_addr));
serv_addr_in->sin_family = AF_INET;
serv_addr_in->sin_port = 0;
serv_addr_in->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
assert(bind(serverfd, &serv_addr, sizeof(serv_addr)) == 0);
sockaddr_len = sizeof(serv_addr);
assert(getsockname(serverfd, &serv_addr, &sockaddr_len) == 0);
ip = inet_ntoa(serv_addr_in->sin_addr);
printf("Server bound to: %s:%d\n", ip, ntohs(serv_addr_in->sin_port));
memset(&mapped_addr, 0, sizeof(mapped_addr));
mapped_addr_in->sin_family = AF_INET;
mapped_addr_in->sin_port = htons(5555);
mapped_addr_in->sin_addr.s_addr = inet_addr("255.255.255.255");
assert(!bpf_update_elem(map_fd[0], &mapped_addr, &serv_addr, BPF_ANY));
assert(listen(serverfd, 5) == 0);
ip = inet_ntoa(mapped_addr_in->sin_addr);
printf("Client connecting to: %s:%d\n",
ip, ntohs(mapped_addr_in->sin_port));
assert(connect(clientfd, &mapped_addr, sizeof(mapped_addr)) == 0);
sockaddr_len = sizeof(tmp_addr);
ip = inet_ntoa(tmp_addr_in->sin_addr);
assert((serverconnfd = accept(serverfd, &tmp_addr, &sockaddr_len)) > 0);
printf("Server received connection from: %s:%d\n",
ip, ntohs(tmp_addr_in->sin_port));
sockaddr_len = sizeof(tmp_addr);
assert(getpeername(clientfd, &tmp_addr, &sockaddr_len) == 0);
ip = inet_ntoa(tmp_addr_in->sin_addr);
printf("Client's peer address: %s:%d\n",
ip, ntohs(tmp_addr_in->sin_port));
/* Is the server's getsockname = the socket getpeername */
assert(memcmp(&serv_addr, &tmp_addr, sizeof(struct sockaddr_in)) == 0);
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment