Commit 2da34267 authored by yonghong-song's avatar yonghong-song Committed by GitHub

generate indirect parameter assignment if arch uses syscall wrapper (#1816)

Fix issue #1802.

On x64, the following commit (in 4.17) changed the raw parameter passed to
the syscall entry function from a list of parameters supplied in user space
to a single `pt_regs *` parameter. Also in 4.17, x64 syscall entry function
is changed from `sys_<name>` to `__x64_sys_<name>`.

```
commit fa697140f9a20119a9ec8fd7460cc4314fbdaff3
Author: Dominik Brodowski <linux@dominikbrodowski.net>
Date:   Thu Apr 5 11:53:02 2018 +0200

    syscalls/x86: Use 'struct pt_regs' based syscall calling convention for 64-bit syscalls

    Let's make use of ARCH_HAS_SYSCALL_WRAPPER=y on pure 64-bit x86-64 systems:

    Each syscall defines a stub which takes struct pt_regs as its only
    argument. It decodes just those parameters it needs, e.g:

            asmlinkage long sys_xyzzy(const struct pt_regs *regs)
            {
                    return SyS_xyzzy(regs->di, regs->si, regs->dx);
            }

    This approach avoids leaking random user-provided register content down
    the call chain.

    ...
```

In bcc, we support kprobe function signatures in the bpf program.
The rewriter will automatically generate proper assignment to
these parameters. With the above function signature change, the
original method does not work any more.

This patch enhanced rewriter to generate two version codes guarded
with CONFIG_ARCH_HAS_SYSCALL_WRAPPER. But we need to identify
whether a function will be attached to syscall entry function
or not during prog load time at which time the program has not
attached to any event.

The prefix `kprobe__` is used for kprobe autoload, we can use
`kprobe____x64_sys_` as the prefix to identify x64 syscall entry
functions. To support other architecture or not-autoloading program,
the prefix `syscall__` is introduced to signal it is a syscall
entry function.

trace.py and other tools which uses kprobe syscall entry functions
are also modified with the new interface so that they can
work properly with 4.17.
Signed-off-by: default avatarYonghong Song <yhs@fb.com>
parent eebd4856
......@@ -429,9 +429,83 @@ DiagnosticBuilder ProbeVisitor::error(SourceLocation loc, const char (&fmt)[N])
BTypeVisitor::BTypeVisitor(ASTContext &C, BFrontendAction &fe)
: C(C), diag_(C.getDiagnostics()), fe_(fe), rewriter_(fe.rewriter()), out_(llvm::errs()) {}
bool BTypeVisitor::VisitFunctionDecl(FunctionDecl *D) {
void BTypeVisitor::genParamDirectAssign(FunctionDecl *D, string& preamble,
const char **calling_conv_regs) {
for (size_t idx = 0; idx < fn_args_.size(); idx++) {
ParmVarDecl *arg = fn_args_[idx];
if (idx >= 1) {
// Move the args into a preamble section where the same params are
// declared and initialized from pt_regs.
// Todo: this init should be done only when the program requests it.
string text = rewriter_.getRewrittenText(expansionRange(arg->getSourceRange()));
arg->addAttr(UnavailableAttr::CreateImplicit(C, "ptregs"));
size_t d = idx - 1;
const char *reg = calling_conv_regs[d];
preamble += " " + text + " = " + fn_args_[0]->getName().str() + "->" +
string(reg) + ";";
}
}
}
void BTypeVisitor::genParamIndirectAssign(FunctionDecl *D, string& preamble,
const char **calling_conv_regs) {
string new_ctx;
for (size_t idx = 0; idx < fn_args_.size(); idx++) {
ParmVarDecl *arg = fn_args_[idx];
if (idx == 0) {
new_ctx = "__" + arg->getName().str();
preamble += " struct pt_regs * " + new_ctx + " = " +
arg->getName().str() + "->" +
string(calling_conv_regs[0]) + ";";
} else {
// Move the args into a preamble section where the same params are
// declared and initialized from pt_regs.
// Todo: this init should be done only when the program requests it.
string text = rewriter_.getRewrittenText(expansionRange(arg->getSourceRange()));
size_t d = idx - 1;
const char *reg = calling_conv_regs[d];
preamble += "\n " + text + ";";
preamble += " bpf_probe_read(&" + arg->getName().str() + ", sizeof(" +
arg->getName().str() + "), &" + new_ctx + "->" +
string(reg) + ");";
}
}
}
void BTypeVisitor::rewriteFuncParam(FunctionDecl *D) {
const char **calling_conv_regs = get_call_conv();
string preamble = "{\n";
if (D->param_size() > 1) {
// If function prefix is "syscall__" or "kprobe____x64_sys_",
// the function will attach to a kprobe syscall function.
// Guard parameter assiggnment with CONFIG_ARCH_HAS_SYSCALL_WRAPPER.
// For __x64_sys_* syscalls, this is always true, but we guard
// it in case of "syscall__" for other architectures.
if (strncmp(D->getName().str().c_str(), "syscall__", 9) == 0 ||
strncmp(D->getName().str().c_str(), "kprobe____x64_sys_", 18) == 0) {
preamble += "#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER\n";
genParamIndirectAssign(D, preamble, calling_conv_regs);
preamble += "\n#else\n";
genParamDirectAssign(D, preamble, calling_conv_regs);
preamble += "\n#endif\n";
} else {
genParamDirectAssign(D, preamble, calling_conv_regs);
}
rewriter_.ReplaceText(
expansionRange(SourceRange(D->getParamDecl(0)->getLocEnd(),
D->getParamDecl(D->getNumParams() - 1)->getLocEnd())),
fn_args_[0]->getName());
}
// for each trace argument, convert the variable from ptregs to something on stack
if (CompoundStmt *S = dyn_cast<CompoundStmt>(D->getBody()))
rewriter_.ReplaceText(S->getLBracLoc(), 1, preamble);
}
bool BTypeVisitor::VisitFunctionDecl(FunctionDecl *D) {
// put each non-static non-inline function decl in its own section, to be
// extracted by the MemoryManager
auto real_start_loc = rewriter_.getSourceMgr().getFileLoc(D->getLocStart());
......@@ -447,37 +521,17 @@ bool BTypeVisitor::VisitFunctionDecl(FunctionDecl *D) {
"too many arguments, bcc only supports in-register parameters");
return false;
}
// remember the arg names of the current function...first one is the ctx
fn_args_.clear();
string preamble = "{";
for (auto arg_it = D->param_begin(); arg_it != D->param_end(); arg_it++) {
auto arg = *arg_it;
auto *arg = *arg_it;
if (arg->getName() == "") {
error(arg->getLocEnd(), "arguments to BPF program definition must be named");
return false;
}
fn_args_.push_back(arg);
if (fn_args_.size() > 1) {
// Move the args into a preamble section where the same params are
// declared and initialized from pt_regs.
// Todo: this init should be done only when the program requests it.
string text = rewriter_.getRewrittenText(expansionRange(arg->getSourceRange()));
arg->addAttr(UnavailableAttr::CreateImplicit(C, "ptregs"));
size_t d = fn_args_.size() - 2;
const char *reg = calling_conv_regs[d];
preamble += " " + text + " = " + fn_args_[0]->getName().str() + "->" +
string(reg) + ";";
}
}
if (D->param_size() > 1) {
rewriter_.ReplaceText(
expansionRange(SourceRange(D->getParamDecl(0)->getLocEnd(),
D->getParamDecl(D->getNumParams() - 1)->getLocEnd())),
fn_args_[0]->getName());
}
// for each trace argument, convert the variable from ptregs to something on stack
if (CompoundStmt *S = dyn_cast<CompoundStmt>(D->getBody()))
rewriter_.ReplaceText(S->getLBracLoc(), 1, preamble);
rewriteFuncParam(D);
} else if (D->hasBody() &&
rewriter_.getSourceMgr().getFileID(real_start_loc)
== rewriter_.getSourceMgr().getMainFileID()) {
......
......@@ -70,6 +70,11 @@ class BTypeVisitor : public clang::RecursiveASTVisitor<BTypeVisitor> {
private:
clang::SourceRange expansionRange(clang::SourceRange range);
bool checkFormatSpecifiers(const std::string& fmt, clang::SourceLocation loc);
void genParamDirectAssign(clang::FunctionDecl *D, std::string& preamble,
const char **calling_conv_regs);
void genParamIndirectAssign(clang::FunctionDecl *D, std::string& preamble,
const char **calling_conv_regs);
void rewriteFuncParam(clang::FunctionDecl *D);
template <unsigned N>
clang::DiagnosticBuilder error(clang::SourceLocation loc, const char (&fmt)[N]);
template <unsigned N>
......
......@@ -98,7 +98,7 @@ static int submit_arg(struct pt_regs *ctx, void *ptr, struct data_t *data)
return 0;
}
int do_sys_execve(struct pt_regs *ctx,
int syscall__execve(struct pt_regs *ctx,
const char __user *filename,
const char __user *const __user *__argv,
const char __user *const __user *__envp)
......@@ -146,7 +146,7 @@ if args.ebpf:
# initialize BPF
b = BPF(text=bpf_text)
execve_fnname = b.get_syscall_fnname("execve")
b.attach_kprobe(event=execve_fnname, fn_name="do_sys_execve")
b.attach_kprobe(event=execve_fnname, fn_name="syscall__execve")
b.attach_kretprobe(event=execve_fnname, fn_name="do_ret_sys_execve")
# header
......
......@@ -60,7 +60,7 @@ struct data_t {
BPF_HASH(infotmp, u32, struct val_t);
BPF_PERF_OUTPUT(events);
int do_sys_kill(struct pt_regs *ctx, int tpid, int sig)
int syscall__kill(struct pt_regs *ctx, int tpid, int sig)
{
u32 pid = bpf_get_current_pid_tgid();
FILTER
......@@ -112,7 +112,7 @@ if debug or args.ebpf:
# initialize BPF
b = BPF(text=bpf_text)
kill_fnname = b.get_syscall_fnname("kill")
b.attach_kprobe(event=kill_fnname, fn_name="do_sys_kill")
b.attach_kprobe(event=kill_fnname, fn_name="syscall__kill")
b.attach_kretprobe(event=kill_fnname, fn_name="do_ret_sys_kill")
......
......@@ -86,7 +86,7 @@ struct data_t {
BPF_PERF_OUTPUT(events);
int do_sys_mount(struct pt_regs *ctx, char __user *source,
int syscall__mount(struct pt_regs *ctx, char __user *source,
char __user *target, char __user *type,
unsigned long flags)
{
......@@ -145,7 +145,7 @@ int do_ret_sys_mount(struct pt_regs *ctx)
return 0;
}
int do_sys_umount(struct pt_regs *ctx, char __user *target, int flags)
int syscall__umount(struct pt_regs *ctx, char __user *target, int flags)
{
struct data_t event = {};
struct task_struct *task;
......@@ -404,10 +404,10 @@ def main():
exit()
b = bcc.BPF(text=bpf_text)
mount_fnname = b.get_syscall_fnname("mount")
b.attach_kprobe(event=mount_fnname, fn_name="do_sys_mount")
b.attach_kprobe(event=mount_fnname, fn_name="syscall__mount")
b.attach_kretprobe(event=mount_fnname, fn_name="do_ret_sys_mount")
umount_fnname = b.get_syscall_fnname("umount")
b.attach_kprobe(event=umount_fnname, fn_name="do_sys_umount")
b.attach_kprobe(event=umount_fnname, fn_name="syscall__umount")
b.attach_kretprobe(event=umount_fnname, fn_name="do_ret_sys_umount")
b['events'].open_perf_buffer(
functools.partial(print_event, mounts, umounts))
......
......@@ -61,7 +61,7 @@ BPF_HASH(args_filename, u32, const char *);
BPF_HASH(infotmp, u32, struct val_t);
BPF_PERF_OUTPUT(events);
int trace_entry(struct pt_regs *ctx, const char __user *filename)
int syscall__entry(struct pt_regs *ctx, const char __user *filename)
{
struct val_t val = {};
u32 pid = bpf_get_current_pid_tgid();
......@@ -116,17 +116,17 @@ b = BPF(text=bpf_text)
# actually exist before attaching the probes
syscall_fnname = b.get_syscall_fnname("stat")
if BPF.ksymname(syscall_fnname) != -1:
b.attach_kprobe(event=syscall_fnname, fn_name="trace_entry")
b.attach_kprobe(event=syscall_fnname, fn_name="syscall__entry")
b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return")
syscall_fnname = b.get_syscall_fnname("statfs")
if BPF.ksymname(syscall_fnname) != -1:
b.attach_kprobe(event=syscall_fnname, fn_name="trace_entry")
b.attach_kprobe(event=syscall_fnname, fn_name="syscall__entry")
b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return")
syscall_fnname = b.get_syscall_fnname("newstat")
if BPF.ksymname(syscall_fnname) != -1:
b.attach_kprobe(event=syscall_fnname, fn_name="trace_entry")
b.attach_kprobe(event=syscall_fnname, fn_name="syscall__entry")
b.attach_kretprobe(event=syscall_fnname, fn_name="trace_return")
TASK_COMM_LEN = 16 # linux/sched.h
......
......@@ -25,14 +25,14 @@ struct data_t {
BPF_PERF_OUTPUT(events);
void do_sys_sync(void *ctx) {
void syscall__sync(void *ctx) {
struct data_t data = {};
data.ts = bpf_ktime_get_ns() / 1000;
events.perf_submit(ctx, &data, sizeof(data));
};
""")
b.attach_kprobe(event=b.get_syscall_fnname("sync"),
fn_name="do_sys_sync")
fn_name="syscall__sync")
class Data(ct.Structure):
_fields_ = [
......
......@@ -64,6 +64,11 @@ class Probe(object):
self.probe_name = re.sub(r'[^A-Za-z0-9_]', '_',
self.probe_name)
# compiler can generate proper codes for function
# signatures with "syscall__" prefix
if self.is_syscall_kprobe:
self.probe_name = "syscall__" + self.probe_name[6:]
def __str__(self):
return "%s:%s:%s FLT=%s ACT=%s/%s" % (self.probe_type,
self.library, self._display_function(), self.filter,
......@@ -154,6 +159,12 @@ class Probe(object):
self.library = ':'.join(parts[1:-1])
self.function = parts[-1]
# only x64 syscalls needs checking, no other syscall wrapper yet.
self.is_syscall_kprobe = False
if self.probe_type == "p" and len(self.library) == 0 and \
self.function[:10] == "__x64_sys_":
self.is_syscall_kprobe = True
def _find_usdt_probe(self):
target = Probe.pid if Probe.pid and Probe.pid != -1 \
else Probe.tgid
......@@ -194,14 +205,32 @@ class Probe(object):
if len(part) > 0:
self.values.append(part)
aliases = {
"retval": "PT_REGS_RC(ctx)",
aliases_arg = {
"arg1": "PT_REGS_PARM1(ctx)",
"arg2": "PT_REGS_PARM2(ctx)",
"arg3": "PT_REGS_PARM3(ctx)",
"arg4": "PT_REGS_PARM4(ctx)",
"arg5": "PT_REGS_PARM5(ctx)",
"arg6": "PT_REGS_PARM6(ctx)",
}
aliases_indarg = {
"arg1": "({u64 _val; struct pt_regs *_ctx = PT_REGS_PARM1(ctx);"
" bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM1(_ctx))); _val;})",
"arg2": "({u64 _val; struct pt_regs *_ctx = PT_REGS_PARM2(ctx);"
" bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM2(_ctx))); _val;})",
"arg3": "({u64 _val; struct pt_regs *_ctx = PT_REGS_PARM3(ctx);"
" bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM3(_ctx))); _val;})",
"arg4": "({u64 _val; struct pt_regs *_ctx = PT_REGS_PARM4(ctx);"
" bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM4(_ctx))); _val;})",
"arg5": "({u64 _val; struct pt_regs *_ctx = PT_REGS_PARM5(ctx);"
" bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM5(_ctx))); _val;})",
"arg6": "({u64 _val; struct pt_regs *_ctx = PT_REGS_PARM6(ctx);"
" bpf_probe_read(&_val, sizeof(_val), &(PT_REGS_PARM6(_ctx))); _val;})",
}
aliases_common = {
"retval": "PT_REGS_RC(ctx)",
"$uid": "(unsigned)(bpf_get_current_uid_gid() & 0xffffffff)",
"$gid": "(unsigned)(bpf_get_current_uid_gid() >> 32)",
"$pid": "(unsigned)(bpf_get_current_pid_tgid() & 0xffffffff)",
......@@ -229,13 +258,19 @@ static inline bool %s(char const *ignored, uintptr_t str) {
return fname
def _rewrite_expr(self, expr):
for alias, replacement in Probe.aliases.items():
if self.is_syscall_kprobe:
for alias, replacement in Probe.aliases_indarg.items():
expr = expr.replace(alias, replacement)
else:
for alias, replacement in Probe.aliases_arg.items():
# For USDT probes, we replace argN values with the
# actual arguments for that probe obtained using
# bpf_readarg_N macros emitted at BPF construction.
if alias.startswith("arg") and self.probe_type == "u":
if self.probe_type == "u":
continue
expr = expr.replace(alias, replacement)
for alias, replacement in Probe.aliases_common.items():
expr = expr.replace(alias, replacement)
matches = re.finditer('STRCMP\\(("[^"]+\\")', expr)
for match in matches:
string = match.group(1)
......@@ -362,9 +397,8 @@ BPF_PERF_OUTPUT(%s);
text = ""
if self.probe_type != "u":
return text
for arg, _ in Probe.aliases.items():
if not (arg.startswith("arg") and
(arg in self.filter)):
for arg, _ in Probe.aliases_arg.items():
if not (arg in self.filter):
continue
arg_index = int(arg.replace("arg", ""))
arg_ctype = self.usdt.get_probe_arg_ctype(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment