Commit accd4cf5 authored by Sasha Goldshtein's avatar Sasha Goldshtein Committed by 4ast

trace: Add %K and %U format specifiers (#742)

The %K and %U format specifiers can be used in a trace
format string to resolve kernel and user symbols,
respectively. For example, the pthread_create USDT probe
has an argument pointing to the new thread's function.
To trace pthread_create and print the symbolic name of
the new thread's function, use:

```
trace 'u:pthread:pthread_create "%U", arg3'
```

The %U specifier resolves addresses in the event's process,
while the %K specifier resolves kernel addresses.
parent 99a3bc86
...@@ -92,6 +92,12 @@ number of arguments as there are placeholders in the format string. The ...@@ -92,6 +92,12 @@ number of arguments as there are placeholders in the format string. The
format specifier replacements may be any C expressions, and may refer to the format specifier replacements may be any C expressions, and may refer to the
same special keywords as in the predicate (arg1, arg2, etc.). same special keywords as in the predicate (arg1, arg2, etc.).
In addition to the above format specifiers, you can also use %K and %U when
the expression is an address that potentially points to executable code (i.e.,
a symbol). trace will resolve %K specifiers to a kernel symbol, such as
vfs__read, and will resolve %U specifiers to a user-space symbol in that
process, such as sprintf.
In tracepoints, both the predicate and the arguments may refer to the tracepoint In tracepoints, both the predicate and the arguments may refer to the tracepoint
format structure, which is stored in the special "args" variable. For example, the format structure, which is stored in the special "args" variable. For example, the
block:block_rq_complete tracepoint can print or filter by args->nr_sector. To block:block_rq_complete tracepoint can print or filter by args->nr_sector. To
......
...@@ -163,10 +163,11 @@ class Probe(object): ...@@ -163,10 +163,11 @@ class Probe(object):
def _parse_types(self, fmt): def _parse_types(self, fmt):
for match in re.finditer( for match in re.finditer(
r'[^%]%(s|u|d|llu|lld|hu|hd|x|llx|c)', fmt): r'[^%]%(s|u|d|llu|lld|hu|hd|x|llx|c|K|U)', fmt):
self.types.append(match.group(1)) self.types.append(match.group(1))
fmt = re.sub(r'([^%]%)(u|d|llu|lld|hu|hd)', r'\1d', fmt) fmt = re.sub(r'([^%]%)(u|d|llu|lld|hu|hd)', r'\1d', fmt)
fmt = re.sub(r'([^%]%)(x|llx)', r'\1x', fmt) fmt = re.sub(r'([^%]%)(x|llx)', r'\1x', fmt)
fmt = re.sub('%K|%U', '%s', fmt)
self.python_format = fmt.strip('"') self.python_format = fmt.strip('"')
def _parse_action(self, action): def _parse_action(self, action):
...@@ -216,8 +217,8 @@ class Probe(object): ...@@ -216,8 +217,8 @@ class Probe(object):
p_type = { "u": ct.c_uint, "d": ct.c_int, p_type = { "u": ct.c_uint, "d": ct.c_int,
"llu": ct.c_ulonglong, "lld": ct.c_longlong, "llu": ct.c_ulonglong, "lld": ct.c_longlong,
"hu": ct.c_ushort, "hd": ct.c_short, "hu": ct.c_ushort, "hd": ct.c_short,
"x": ct.c_uint, "llx": ct.c_ulonglong, "x": ct.c_uint, "llx": ct.c_ulonglong, "c": ct.c_ubyte,
"c": ct.c_ubyte } "K": ct.c_ulonglong, "U": ct.c_ulonglong }
def _generate_python_field_decl(self, idx, fields): def _generate_python_field_decl(self, idx, fields):
field_type = self.types[idx] field_type = self.types[idx]
...@@ -248,7 +249,8 @@ class Probe(object): ...@@ -248,7 +249,8 @@ class Probe(object):
"llu": "unsigned long long", "lld": "long long", "llu": "unsigned long long", "lld": "long long",
"hu": "unsigned short", "hd": "short", "hu": "unsigned short", "hd": "short",
"x": "unsigned int", "llx": "unsigned long long", "x": "unsigned int", "llx": "unsigned long long",
"c": "char" } "c": "char", "K": "unsigned long long",
"U": "unsigned long long" }
fmt_types = c_type.keys() fmt_types = c_type.keys()
def _generate_field_decl(self, idx): def _generate_field_decl(self, idx):
...@@ -417,12 +419,24 @@ BPF_PERF_OUTPUT(%s); ...@@ -417,12 +419,24 @@ BPF_PERF_OUTPUT(%s);
def print_stack(self, bpf, stack_id, pid): def print_stack(self, bpf, stack_id, pid):
if stack_id < 0: if stack_id < 0:
print(" %d" % stack_id) print(" %d" % stack_id)
return return
stack = list(bpf.get_table(self.stacks_name).walk(stack_id)) stack = list(bpf.get_table(self.stacks_name).walk(stack_id))
for addr in stack: for addr in stack:
print(" %016x %s" % (addr, bpf.sym(addr, pid))) print(" %016x %s" % (addr, bpf.sym(addr, pid)))
def _format_message(self, bpf, pid, values):
# Replace each %K with kernel sym and %U with user sym in pid
kernel_placeholders = [i for i in xrange(0, len(self.types))
if self.types[i] == 'K']
user_placeholders = [i for i in xrange(0, len(self.types))
if self.types[i] == 'U']
for kp in kernel_placeholders:
values[kp] = bpf.ksymaddr(values[kp])
for up in user_placeholders:
values[up] = bpf.symaddr(values[up], pid)
return self.python_format % tuple(values)
def print_event(self, bpf, cpu, data, size): def print_event(self, bpf, cpu, data, size):
# Cast as the generated structure type and display # Cast as the generated structure type and display
...@@ -430,7 +444,7 @@ BPF_PERF_OUTPUT(%s); ...@@ -430,7 +444,7 @@ BPF_PERF_OUTPUT(%s);
event = ct.cast(data, ct.POINTER(self.python_struct)).contents event = ct.cast(data, ct.POINTER(self.python_struct)).contents
values = map(lambda i: getattr(event, "v%d" % i), values = map(lambda i: getattr(event, "v%d" % i),
range(0, len(self.values))) range(0, len(self.values)))
msg = self.python_format % tuple(values) msg = self._format_message(bpf, event.pid, values)
time = strftime("%H:%M:%S") if Probe.use_localtime else \ time = strftime("%H:%M:%S") if Probe.use_localtime else \
Probe._time_off_str(event.timestamp_ns) Probe._time_off_str(event.timestamp_ns)
print("%-8s %-6d %-12s %-16s %s" % \ print("%-8s %-6d %-12s %-16s %s" % \
...@@ -438,13 +452,13 @@ BPF_PERF_OUTPUT(%s); ...@@ -438,13 +452,13 @@ BPF_PERF_OUTPUT(%s);
self._display_function(), msg)) self._display_function(), msg))
if self.user_stack: if self.user_stack:
print(" User Stack Trace:") print(" User Stack Trace:")
self.print_stack(bpf, event.user_stack_id, event.pid) self.print_stack(bpf, event.user_stack_id, event.pid)
if self.kernel_stack: if self.kernel_stack:
print(" Kernel Stack Trace:") print(" Kernel Stack Trace:")
self.print_stack(bpf, event.kernel_stack_id, -1) self.print_stack(bpf, event.kernel_stack_id, -1)
if self.user_stack or self.kernel_stack: if self.user_stack or self.kernel_stack:
print("") print("")
Probe.event_count += 1 Probe.event_count += 1
if Probe.max_events is not None and \ if Probe.max_events is not None and \
......
...@@ -105,6 +105,37 @@ block:block_rq_complete ...@@ -105,6 +105,37 @@ block:block_rq_complete
This output tells you that you can use "args->dev", "args->sector", etc. in your This output tells you that you can use "args->dev", "args->sector", etc. in your
predicate and trace arguments. predicate and trace arguments.
More and more high-level libraries are instrumented with USDT probe support.
These probes can be traced by trace just like kernel tracepoints. For example,
trace new threads being created and their function name:
# trace 'u:pthread:pthread_create "%U", arg3'
TIME PID COMM FUNC -
02:07:29 4051 contentions pthread_create primes_thread+0x0
02:07:29 4051 contentions pthread_create primes_thread+0x0
02:07:29 4051 contentions pthread_create primes_thread+0x0
02:07:29 4051 contentions pthread_create primes_thread+0x0
^C
The "%U" format specifier tells trace to resolve arg3 as a user-space symbol,
if possible. Similarly, use "%K" for kernel symbols.
Ruby, Node, and OpenJDK are also instrumented with USDT. For example, let's
trace Ruby methods being called (this requires a version of Ruby built with
the --enable-dtrace configure flag):
# trace 'u:ruby:method__entry "%s.%s", arg1, arg2' -p $(pidof irb)
TIME PID COMM FUNC -
12:08:43 18420 irb method__entry IRB::Context.verbose?
12:08:43 18420 irb method__entry RubyLex.ungetc
12:08:43 18420 irb method__entry RuxyLex.debug?
^C
In the previous invocation, arg1 and arg2 are the class name and method name
for the Ruby method being invoked.
As a final example, let's trace open syscalls for a specific process. By As a final example, let's trace open syscalls for a specific process. By
default, tracing is system-wide, but the -p switch overrides this: default, tracing is system-wide, but the -p switch overrides this:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment