trace: STRCMP helper function

`trace` filters and print expressions can now use the magic STRCMP helper function to compare strings. The first string must be a compile-time constant literal string, such as "test", and the second string can be determined at runtime (e.g., from a function argument). The codegen for STRCMP is on a case-by-case basis for each literal string, and it generates an inline function with a constant-length loop that compares the string's characters. This is a decent workaround until we get something more reasonable from the kernel side, such as a `bpf_strcmp` helper. Usage example: ``` trace 'p:c:open (STRCMP("test.txt", arg1)) "%s", arg1' ``

trace: STRCMP helper function
`trace` filters and print expressions can now use the magic STRCMP helper function to compare strings. The first string must be a compile-time constant literal string, such as "test", and the second string can be determined at runtime (e.g., from a function argument). The codegen for STRCMP is on a case-by-case basis for each literal string, and it generates an inline function with a constant-length loop that compares the string's characters. This is a decent workaround until we get something more reasonable from the kernel side, such as a `bpf_strcmp` helper. Usage example: ``` trace 'p:c:open (STRCMP("test.txt", arg1)) "%s", arg1' ``
f4797b0c · Sasha Goldshtein · 56ddca09 · f4797b0c · f4797b0c · f4797b0c
Commit f4797b0c authored Oct 17, 2016 by Sasha Goldshtein
Hide whitespace changes
Inline Side-by-side

Showing with 49 additions and 8 deletions

man/man8/trace.8 man/man8/trace.8 +6 -0

tools/trace.py tools/trace.py +32 -7

tools/trace_example.txt tools/trace_example.txt +11 -1

No files found.
--- a/man/man8/trace.8
+++ b/man/man8/trace.8
@@ -94,6 +94,12 @@ Note that only arg1-arg6 are supported, and only if the function is using the
 standard x86_64 convention where the first six arguments are in the RDI, RSI, 
 RDX, RCX, R8, R9 registers. If no predicate is specified, all function 
 invocations are traced.
+The predicate expression may also use the STRCMP pseudo-function to compare
+a predefined string to a string argument. For example: STRCMP("test", arg1).
+The order of arguments is important: the first argument MUST be a quoted
+literal string, and the second argument can be a runtime string, most typically
+an argument. 
 .TP
 .B ["format string"[, arguments]]
 A printf-style format string that will be used for the trace message. You can

--- a/tools/trace.py
+++ b/tools/trace.py
@@ -46,6 +46,7 @@ class Time(object):
 class Probe(object):
        probe_count = 0
+        streq_index = 0
        max_events = None
        event_count = 0
        first_ts = 0
@@ -61,6 +62,7 @@ class Probe(object):
        def __init__(self, probe, string_size, kernel_stack, user_stack):
                self.usdt = None
+                self.streq_functions = ""
                self.raw_probe = probe
                self.string_size = string_size
                self.kernel_stack = kernel_stack
@@ -159,7 +161,7 @@ class Probe(object):
                self._bail("unrecognized USDT probe %s" % self.usdt_name)
        def _parse_filter(self, filt):
-                self.filter = self._replace_args(filt)
+                self.filter = self._rewrite_expr(filt)
        def _parse_types(self, fmt):
                for match in re.finditer(
@@ -178,14 +180,14 @@ class Probe(object):
                        return
                action = action.strip()
-                match = re.search(r'(\".*\"),?(.*)', action)
+                match = re.search(r'(\".*?\"),?(.*)', action)
                if match is None:
                        self._bail("expected format string in \"s")
                self.raw_format = match.group(1)
                self._parse_types(self.raw_format)
-                for part in match.group(2).split(','):
+                for part in re.split('(?<!"),', match.group(2)):
-                        part = self._replace_args(part)
+                        part = self._rewrite_expr(part)
                        if len(part) > 0:
                                self.values.append(part)
@@ -204,7 +206,25 @@ class Probe(object):
                "$cpu": "bpf_get_smp_processor_id()"
        }
-        def _replace_args(self, expr):
+        def _generate_streq_function(self, string):
+                fname = "streq_%d" % Probe.streq_index
+                Probe.streq_index += 1
+                self.streq_functions += """
+static inline bool %s(char const *ignored, unsigned long str) {
+        char needle[] = %s;
+        char haystack[sizeof(needle)];
+        bpf_probe_read(&haystack, sizeof(haystack), (void *)str);
+        for (int i = 0; i < sizeof(needle); ++i) {
+                if (needle[i] != haystack[i]) {
+                        return false;
+                }
+        }
+        return true;
+}
+                """ % (fname, string)
+                return fname
+        def _rewrite_expr(self, expr):
                for alias, replacement in Probe.aliases.items():
                        # For USDT probes, we replace argN values with the
                        # actual arguments for that probe obtained using
@@ -212,6 +232,11 @@ class Probe(object):
                        if alias.startswith("arg") and self.probe_type == "u":
                                continue
                        expr = expr.replace(alias, replacement)
+                matches = re.finditer('STRCMP\\(("[^"]+\\")', expr)
+                for match in matches:
+                        string = match.group(1)
+                        fname = self._generate_streq_function(string)
+                        expr = expr.replace("STRCMP", fname, 1)
                return expr
        p_type = {"u": ct.c_uint, "d": ct.c_int,
@@ -405,7 +430,7 @@ BPF_PERF_OUTPUT(%s);
                               self.struct_name, data_fields,
                               stack_trace, self.events_name, ctx_name)
-                return data_decl + "\n" + text
+                return self.streq_functions + data_decl + "\n" + text
        @classmethod
        def _time_off_str(cls, timestamp_ns):
@@ -526,7 +551,7 @@ trace 'p:c:write (arg1 == 1) "writing %d bytes to STDOUT", arg3'
        Trace the write() call from libc to monitor writes to STDOUT
 trace 'r::__kmalloc (retval == 0) "kmalloc failed!"
        Trace returns from __kmalloc which returned a null pointer
-trace 'r:c:malloc (retval) "allocated = %p", retval
+trace 'r:c:malloc (retval) "allocated = %x", retval
        Trace returns from malloc and print non-NULL allocated buffers
 trace 't:block:block_rq_complete "sectors=%d", args->nr_sector'
        Trace the block_rq_complete kernel tracepoint and print # of tx sectors

--- a/tools/trace_example.txt
+++ b/tools/trace_example.txt
@@ -136,6 +136,16 @@ In the previous invocation, arg1 and arg2 are the class name and method name
 for the Ruby method being invoked.
+Occasionally, it can be useful to filter specific strings. For example, you
+might be interested in open() calls that open a specific file:
+# trace 'p:c:open (STRCMP("test.txt", arg1)) "opening %s", arg1'
+TIME     PID    COMM         FUNC             -
+01:43:15 10938  cat          open             opening test.txt
+01:43:20 10939  cat          open             opening test.txt
+^C
 As a final example, let's trace open syscalls for a specific process. By 
 default, tracing is system-wide, but the -p switch overrides this:
@@ -202,7 +212,7 @@ trace 'p:c:write (arg1 == 1) "writing %d bytes to STDOUT", arg3'
        Trace the write() call from libc to monitor writes to STDOUT
 trace 'r::__kmalloc (retval == 0) "kmalloc failed!"
        Trace returns from __kmalloc which returned a null pointer
-trace 'r:c:malloc (retval) "allocated = %p", retval
+trace 'r:c:malloc (retval) "allocated = %x", retval
        Trace returns from malloc and print non-NULL allocated buffers
 trace 't:block:block_rq_complete "sectors=%d", args->nr_sector'
        Trace the block_rq_complete kernel tracepoint and print # of tx sectors