Commit 01553853 authored by Sasha Goldshtein's avatar Sasha Goldshtein

cc: Resolve symbols from external debuginfo

Adds support for resolving symbols using external debuginfo files,
which can be retrieved from two locations. First, check the build-id
of the desired binary and look in /usr/lib/debug/.build-id according
to the build-id structure. Second, check the debuglink section of
the desired binary and look in /usr/lib/debug or in the binary's
current directory. These are the rules applied by GDB as well, but
GDB lets the user reconfigure the debug directory path from
/usr/lib/debug to something else; we do not support this.

These changes are based on the following description of how GDB
resolves external debuginfo:

https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html
parent 7d8c29ce
...@@ -1107,9 +1107,9 @@ Examples in situ: ...@@ -1107,9 +1107,9 @@ Examples in situ:
### 3. sym() ### 3. sym()
Syntax: ```BPF.sym(addr, pid, show_module=False, show_address=True)``` Syntax: ```BPF.sym(addr, pid, show_module=False, show_offset=False)```
Translate a memory address into a function name for a pid, which is returned. A pid of less than zero will access the kernel symbol cache. The `show_module` and `show_address` parameters control whether the module in which the symbol lies should be displayed, and whether the instruction offset from the beginning of the symbol should be displayed. These extra parameters default to `False`. Translate a memory address into a function name for a pid, which is returned. A pid of less than zero will access the kernel symbol cache. The `show_module` and `show_offset` parameters control whether the module in which the symbol lies should be displayed, and whether the instruction offset from the beginning of the symbol should be displayed. These extra parameters default to `False`.
Example: Example:
......
...@@ -55,4 +55,4 @@ stack_traces = b.get_table("stack_traces") ...@@ -55,4 +55,4 @@ stack_traces = b.get_table("stack_traces")
for k, v in reversed(sorted(calls.items(), key=lambda c: c[1].value)): for k, v in reversed(sorted(calls.items(), key=lambda c: c[1].value)):
print("%d bytes allocated at:" % v.value) print("%d bytes allocated at:" % v.value)
for addr in stack_traces.walk(k.value): for addr in stack_traces.walk(k.value):
print("\t%s" % b.sym(addr, pid, show_address=True)) print("\t%s" % b.sym(addr, pid, show_offset=True))
...@@ -15,9 +15,14 @@ ...@@ -15,9 +15,14 @@
*/ */
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h> #include <fcntl.h>
#include <unistd.h> #include <unistd.h>
#include <string.h> #include <string.h>
#include <libgen.h>
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <gelf.h> #include <gelf.h>
#include "bcc_elf.h" #include "bcc_elf.h"
...@@ -196,20 +201,248 @@ static int listsymbols(Elf *e, bcc_elf_symcb callback, void *payload) { ...@@ -196,20 +201,248 @@ static int listsymbols(Elf *e, bcc_elf_symcb callback, void *payload) {
return 0; return 0;
} }
int bcc_elf_foreach_sym(const char *path, bcc_elf_symcb callback, static Elf_Data * get_section_elf_data(Elf *e, const char *section_name) {
void *payload) { Elf_Scn *section = NULL;
GElf_Shdr header;
char *name;
size_t stridx;
if (elf_getshdrstrndx(e, &stridx) != 0)
return NULL;
while ((section = elf_nextscn(e, section)) != 0) {
if (!gelf_getshdr(section, &header))
continue;
name = elf_strptr(e, stridx, header.sh_name);
if (name && !strcmp(name, section_name)) {
return elf_getdata(section, NULL);
}
}
return NULL;
}
static int find_debuglink(Elf *e, char **debug_file, unsigned int *crc) {
Elf_Data *data = NULL;
*debug_file = NULL;
*crc = 0;
data = get_section_elf_data(e, ".gnu_debuglink");
if (!data || data->d_size <= 5)
return 0;
*debug_file = (char *)data->d_buf;
*crc = *(unsigned int*)((char *)data->d_buf + data->d_size - 4);
return *debug_file ? 1 : 0;
}
static int find_buildid(Elf *e, char *buildid) {
Elf_Data *data = get_section_elf_data(e, ".note.gnu.build-id");
if (data->d_size <= 16 || strcmp((char *)data->d_buf + 12, "GNU"))
return 0;
char *buf = (char *)data->d_buf + 16;
size_t length = data->d_size - 16;
for (size_t i = 0; i < length; ++i) {
sprintf(buildid + (i * 2), "%02hhx", buf[i]);
}
return 1;
}
// The CRC algorithm used by GNU debuglink. Taken from:
// https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html
static unsigned int gnu_debuglink_crc32(unsigned int crc,
char *buf, size_t len) {
static const unsigned int crc32_table[256] =
{
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
0x2d02ef8d
};
char *end;
crc = ~crc & 0xffffffff;
for (end = buf + len; buf < end; ++buf)
crc = crc32_table[(crc ^ *buf) & 0xff] ^ (crc >> 8);
return ~crc & 0xffffffff;
}
static int verify_checksum(const char *file, unsigned int crc) {
struct stat st;
int fd;
void *buf;
unsigned int actual;
fd = open(file, O_RDONLY);
if (fd < 0)
return 0;
if (fstat(fd, &st) < 0)
return 0;
buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (!buf) {
close(fd);
return 0;
}
actual = gnu_debuglink_crc32(0, buf, st.st_size);
munmap(buf, st.st_size);
close(fd);
return actual == crc;
}
static char *find_debug_via_debuglink(Elf *e, const char *binpath) {
char fullpath[PATH_MAX];
char *bindir = NULL;
char *res = NULL;
unsigned int crc;
char *name; // the name of the debuginfo file
if (!find_debuglink(e, &name, &crc))
return NULL;
bindir = strdup(binpath);
bindir = dirname(bindir);
// Search for the file in 'binpath'
sprintf(fullpath, "%s/%s", bindir, name);
if (access(fullpath, F_OK) != -1) {
res = strdup(fullpath);
goto DONE;
}
// Search for the file in 'binpath'/.debug
sprintf(fullpath, "%s/.debug/%s", bindir, name);
if (access(fullpath, F_OK) != -1) {
res = strdup(fullpath);
goto DONE;
}
// Search for the file in the global debug directory /usr/lib/debug/'binpath'
sprintf(fullpath, "/usr/lib/debug%s/%s", bindir, name);
if (access(fullpath, F_OK) != -1) {
res = strdup(fullpath);
goto DONE;
}
DONE:
free(bindir);
if (verify_checksum(res, crc))
return res;
return NULL;
}
static char *find_debug_via_buildid(Elf *e) {
char fullpath[PATH_MAX];
char buildid[128]; // currently 40 seems to be default, let's be safe
if (!find_buildid(e, buildid))
return NULL;
// Search for the file in the global debug directory with a sub-path:
// mm/nnnnnn...nnnn.debug
// Where mm are the first two characters of the buildid, and nnnn are the
// rest of the build id, followed by .debug.
sprintf(fullpath, "/usr/lib/debug/.build-id/%c%c/%s.debug",
buildid[0], buildid[1], buildid + 2);
if (access(fullpath, F_OK) != -1) {
return strdup(fullpath);
}
return NULL;
}
static int foreach_sym_core(const char *path, bcc_elf_symcb callback,
void *payload, int is_debug_file) {
Elf *e; Elf *e;
int fd, res; int fd, res;
char *debug_file;
if (openelf(path, &e, &fd) < 0) if (openelf(path, &e, &fd) < 0)
return -1; return -1;
// If there is a separate debuginfo file, try to locate and read it, first
// using the build-id section, then using the debuglink section. These are
// also the rules that GDB folows.
// See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html
if (!is_debug_file) {
// The is_debug_file argument helps avoid infinitely resolving debuginfo
// files for debuginfo files and so on.
debug_file = find_debug_via_buildid(e);
if (!debug_file)
debug_file = find_debug_via_debuglink(e, path);
if (debug_file) {
foreach_sym_core(debug_file, callback, payload, 1);
free(debug_file);
}
}
res = listsymbols(e, callback, payload); res = listsymbols(e, callback, payload);
elf_end(e); elf_end(e);
close(fd); close(fd);
return res; return res;
} }
int bcc_elf_foreach_sym(const char *path, bcc_elf_symcb callback,
void *payload) {
return foreach_sym_core(path, callback, payload, 0);
}
static int loadaddr(Elf *e, uint64_t *addr) { static int loadaddr(Elf *e, uint64_t *addr) {
size_t phnum, i; size_t phnum, i;
...@@ -268,11 +501,11 @@ int bcc_elf_is_shared_obj(const char *path) { ...@@ -268,11 +501,11 @@ int bcc_elf_is_shared_obj(const char *path) {
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
uint64_t addr; uint64_t addr;
if (bcc_elf_findsym(argv[1], argv[2], -1, STT_FUNC, &addr) < 0) if (bcc_elf_findsym(argv[1], argv[2], -1, STT_FUNC, &addr) < 0)
return -1; return -1;
printf("%s: %p\n", argv[2], (void *)addr); printf("%s: %p\n", argv[2], (void *)addr);
return 0; return 0;
} }
#endif #endif
...@@ -980,42 +980,42 @@ class BPF(object): ...@@ -980,42 +980,42 @@ class BPF(object):
return BPF._sym_caches[pid] return BPF._sym_caches[pid]
@staticmethod @staticmethod
def sym(addr, pid, show_module=False, show_address=False): def sym(addr, pid, show_module=False, show_offset=False):
"""sym(addr, pid, show_module=False, show_address=False) """sym(addr, pid, show_module=False, show_offset=False)
Translate a memory address into a function name for a pid, which is Translate a memory address into a function name for a pid, which is
returned. When show_module is True, the module name is also included. returned. When show_module is True, the module name is also included.
When show_address is True, the instruction offset as a hexadecimal When show_offset is True, the instruction offset as a hexadecimal
number is also included in the string. number is also included in the string.
A pid of less than zero will access the kernel symbol cache. A pid of less than zero will access the kernel symbol cache.
Example output when both show_module and show_address are True: Example output when both show_module and show_offset are True:
"start_thread+0x202 [libpthread-2.24.so]" "start_thread+0x202 [libpthread-2.24.so]"
Example output when both show_module and show_address are False: Example output when both show_module and show_offset are False:
"start_thread" "start_thread"
""" """
name, offset, module = BPF._sym_cache(pid).resolve(addr) name, offset, module = BPF._sym_cache(pid).resolve(addr)
offset = "+0x%x" % offset if show_address and name is not None else "" offset = "+0x%x" % offset if show_offset and name is not None else ""
name = name or "[unknown]" name = name or "[unknown]"
name = name + offset name = name + offset
module = " [%s]" % os.path.basename(module) if show_module else "" module = " [%s]" % os.path.basename(module) if show_module else ""
return name + module return name + module
@staticmethod @staticmethod
def ksym(addr, show_module=False, show_address=False): def ksym(addr, show_module=False, show_offset=False):
"""ksym(addr) """ksym(addr)
Translate a kernel memory address into a kernel function name, which is Translate a kernel memory address into a kernel function name, which is
returned. When show_module is True, the module name ("kernel") is also returned. When show_module is True, the module name ("kernel") is also
included. When show_address is true, the instruction offset as a included. When show_offset is true, the instruction offset as a
hexadecimal number is also included in the string. hexadecimal number is also included in the string.
Example output when both show_module and show_address are True: Example output when both show_module and show_offset are True:
"default_idle+0x0 [kernel]" "default_idle+0x0 [kernel]"
""" """
return BPF.sym(addr, -1, show_module, show_address) return BPF.sym(addr, -1, show_module, show_offset)
@staticmethod @staticmethod
def ksymname(name): def ksymname(name):
......
...@@ -240,7 +240,7 @@ def print_outstanding(): ...@@ -240,7 +240,7 @@ def print_outstanding():
combined = [] combined = []
for addr in stack: for addr in stack:
combined.append(bpf_program.sym(addr, pid, combined.append(bpf_program.sym(addr, pid,
show_module=True, show_address=True)) show_module=True, show_offset=True))
alloc_info[info.stack_id] = Allocation(combined, alloc_info[info.stack_id] = Allocation(combined,
info.size) info.size)
if args.show_allocs: if args.show_allocs:
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# Licensed under the Apache License, Version 2.0 (the "License") # Licensed under the Apache License, Version 2.0 (the "License")
# Copyright (C) 2016 Sasha Goldshtein. # Copyright (C) 2016 Sasha Goldshtein.
from bcc import BPF, SymbolCache from bcc import BPF
from time import sleep from time import sleep
from datetime import datetime from datetime import datetime
import argparse import argparse
...@@ -24,7 +24,7 @@ def decode_stack(bpf, pid, info): ...@@ -24,7 +24,7 @@ def decode_stack(bpf, pid, info):
return "???" return "???"
for i in range(0, info.num_frames): for i in range(0, info.num_frames):
addr = info.callstack[i] addr = info.callstack[i]
stack += " %s ;" % bpf.sym(addr, pid, show_address=True) stack += " %s ;" % bpf.sym(addr, pid, show_offset=True)
return stack return stack
def run_command_get_output(command): def run_command_get_output(command):
......
...@@ -145,7 +145,7 @@ def print_frame(addr): ...@@ -145,7 +145,7 @@ def print_frame(addr):
print(" ", end="") print(" ", end="")
if verbose: if verbose:
print("%-16x " % addr, end="") print("%-16x " % addr, end="")
print(b.ksym(addr, show_address=offset)) print(b.ksym(addr, show_offset=offset))
# output # output
exiting = 0 if args.interval else 1 exiting = 0 if args.interval else 1
......
...@@ -119,7 +119,7 @@ while 1: ...@@ -119,7 +119,7 @@ while 1:
(task, pid, cpu, flags, ts, msg) = b.trace_fields() (task, pid, cpu, flags, ts, msg) = b.trace_fields()
if msg != "": if msg != "":
(reg, addr) = msg.split(" ") (reg, addr) = msg.split(" ")
ip = b.ksym(int(addr, 16), show_address=offset) ip = b.ksym(int(addr, 16), show_offset=offset)
msg = msg + " " + ip msg = msg + " " + ip
if verbose: if verbose:
print("%-18.9f %-12.12s %-6d %-3d %s" % (ts, task, pid, cpu, msg)) print("%-18.9f %-12.12s %-6d %-3d %s" % (ts, task, pid, cpu, msg))
......
...@@ -225,7 +225,7 @@ class Tool(object): ...@@ -225,7 +225,7 @@ class Tool(object):
if self.args.verbose: if self.args.verbose:
print("%-16x " % addr, end="") print("%-16x " % addr, end="")
if self.args.offset: if self.args.offset:
print("%s" % self.probe.bpf.sym(addr, pid, show_address=True)) print("%s" % self.probe.bpf.sym(addr, pid, show_offset=True))
else: else:
print("%s" % self.probe.bpf.sym(addr, pid)) print("%s" % self.probe.bpf.sym(addr, pid))
......
...@@ -120,7 +120,7 @@ def print_event(cpu, data, size): ...@@ -120,7 +120,7 @@ def print_event(cpu, data, size):
print("%-18.9f %s" % (ts, function)) print("%-18.9f %s" % (ts, function))
for addr in stack_traces.walk(event.stack_id): for addr in stack_traces.walk(event.stack_id):
sym = b.ksym(addr, show_address=offset) sym = b.ksym(addr, show_offset=offset)
print("\t%s" % sym) print("\t%s" % sym)
print() print()
......
...@@ -458,7 +458,7 @@ BPF_PERF_OUTPUT(%s); ...@@ -458,7 +458,7 @@ BPF_PERF_OUTPUT(%s);
stack = list(bpf.get_table(self.stacks_name).walk(stack_id)) stack = list(bpf.get_table(self.stacks_name).walk(stack_id))
for addr in stack: for addr in stack:
print(" %s" % (bpf.sym(addr, tgid, print(" %s" % (bpf.sym(addr, tgid,
show_module=True, show_address=True))) show_module=True, show_offset=True)))
def _format_message(self, bpf, tgid, values): def _format_message(self, bpf, tgid, values):
# Replace each %K with kernel sym and %U with user sym in tgid # Replace each %K with kernel sym and %U with user sym in tgid
...@@ -467,10 +467,10 @@ BPF_PERF_OUTPUT(%s); ...@@ -467,10 +467,10 @@ BPF_PERF_OUTPUT(%s);
user_placeholders = [i for i, t in enumerate(self.types) user_placeholders = [i for i, t in enumerate(self.types)
if t == 'U'] if t == 'U']
for kp in kernel_placeholders: for kp in kernel_placeholders:
values[kp] = bpf.ksym(values[kp], show_address=True) values[kp] = bpf.ksym(values[kp], show_offset=True)
for up in user_placeholders: for up in user_placeholders:
values[up] = bpf.sym(values[up], tgid, values[up] = bpf.sym(values[up], tgid,
show_module=True, show_address=True) show_module=True, show_offset=True)
return self.python_format % tuple(values) return self.python_format % tuple(values)
def print_event(self, bpf, cpu, data, size): def print_event(self, bpf, cpu, data, size):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment