Commit 746eab44 authored by Brenden Blanco's avatar Brenden Blanco

Merge pull request #442 from goldshtn/usyms

Moved user symbol decoding from memleak into bcc module
parents 3bc2828f 8737c6e4
...@@ -20,7 +20,7 @@ import json ...@@ -20,7 +20,7 @@ import json
import multiprocessing import multiprocessing
import os import os
import re import re
from subprocess import Popen, PIPE from subprocess import Popen, PIPE, STDOUT
import struct import struct
import sys import sys
basestring = (unicode if sys.version_info[0] < 3 else str) basestring = (unicode if sys.version_info[0] < 3 else str)
...@@ -28,6 +28,7 @@ basestring = (unicode if sys.version_info[0] < 3 else str) ...@@ -28,6 +28,7 @@ basestring = (unicode if sys.version_info[0] < 3 else str)
from .libbcc import lib, _CB_TYPE from .libbcc import lib, _CB_TYPE
from .table import Table from .table import Table
from .tracepoint import Perf, Tracepoint from .tracepoint import Perf, Tracepoint
from .usyms import ProcessSymbols
open_kprobes = {} open_kprobes = {}
open_uprobes = {} open_uprobes = {}
...@@ -747,6 +748,28 @@ class BPF(object): ...@@ -747,6 +748,28 @@ class BPF(object):
return 0 return 0
return ksyms[idx][1] return ksyms[idx][1]
@classmethod
def usymaddr(cls, pid, addr, refresh_symbols=False):
"""usymaddr(pid, addr, refresh_symbols=False)
Decode the specified address in the specified process to a symbolic
representation that includes the symbol name, offset within the symbol,
and the module name. See the ProcessSymbols class for more details.
Specify refresh_symbols=True if you suspect the set of loaded modules
or their load addresses has changed since the last time you called
usymaddr() on this pid.
"""
proc_sym = None
if pid in cls._process_symbols:
proc_sym = cls._process_symbols[pid]
if refresh_symbols:
proc_sym.refresh_code_ranges()
else:
proc_sym = ProcessSymbols(pid)
cls._process_symbols[pid] = proc_sym
return proc_sym.decode_addr(addr)
@staticmethod @staticmethod
def num_open_kprobes(): def num_open_kprobes():
"""num_open_kprobes() """num_open_kprobes()
......
# Copyright 2016 Sasha Goldshtein
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from subprocess import Popen, PIPE, STDOUT
class ProcessSymbols(object):
def __init__(self, pid):
"""
Initializes the process symbols store for the specified pid.
Call refresh_code_ranges() periodically if you anticipate changes
in the set of loaded libraries or their addresses.
"""
self.pid = pid
self.refresh_code_ranges()
def refresh_code_ranges(self):
self.code_ranges = self._get_code_ranges()
self.ranges_cache = {}
self.exe = self._get_exe()
self.start_time = self._get_start_time()
def _get_exe(self):
return ProcessSymbols._run_command_get_output(
"readlink -f /proc/%d/exe" % self.pid)
def _get_start_time(self):
return ProcessSymbols._run_command_get_output(
"cut -d' ' -f 22 /proc/%d/stat" % self.pid)
@staticmethod
def _is_binary_segment(parts):
return len(parts) == 6 and parts[5][0] != '[' and 'x' in parts[1]
def _get_code_ranges(self):
ranges = {}
raw_ranges = open("/proc/%d/maps" % self.pid).readlines()
# A typical line from /proc/PID/maps looks like this:
# 7f21b6635000-7f21b67eb000 r-xp ... /usr/lib64/libc-2.21.so
# We are looking for executable segments that have a .so file
# or the main executable. The first two lines are the range of
# that memory segment, which we index by binary name.
for raw_range in raw_ranges:
parts = raw_range.split()
if not ProcessSymbols._is_binary_segment(parts):
continue
binary = parts[5]
range_parts = parts[0].split('-')
addr_range = (int(range_parts[0], 16), int(range_parts[1], 16))
ranges[binary] = addr_range
return ranges
@staticmethod
def _is_function_symbol(parts):
return len(parts) == 6 and parts[3] == ".text" and parts[2] == "F"
@staticmethod
def _run_command_get_output(command):
p = Popen(command.split(), stdout=PIPE, stderr=STDOUT)
return iter(p.stdout.readline, b'')
def _get_sym_ranges(self, binary):
if binary in self.ranges_cache:
return self.ranges_cache[binary]
sym_ranges = {}
raw_symbols = ProcessSymbols._run_command_get_output(
"objdump -t %s" % binary)
for raw_symbol in raw_symbols:
# A typical line from objdump -t looks like this:
# 00000000004007f5 g F .text 000000000000010e main
# We only care about functions in the .text segment.
# The first number is the start address, and the second
# number is the length.
parts = raw_symbol.split()
if not ProcessSymbols._is_function_symbol(parts):
continue
sym_start = int(parts[0], 16)
sym_len = int(parts[4], 16)
sym_name = parts[5]
sym_ranges[sym_name] = (sym_start, sym_len)
self.ranges_cache[binary] = sym_ranges
return sym_ranges
def _decode_sym(self, binary, offset):
sym_ranges = self._get_sym_ranges(binary)
# Find the symbol that contains the specified offset.
# There might not be one.
for name, (start, length) in sym_ranges.items():
if offset >= start and offset <= (start + length):
return "%s+0x%x" % (name, offset - start)
return "%x" % offset
def _check_pid_wrap(self):
# If the pid wrapped, our exe name and start time must have changed.
# Detect this and get rid of the cached ranges.
if self.exe != self._get_exe() or \
self.start_time != self._get_start_time():
self.refresh_code_ranges()
def decode_addr(self, addr):
"""
Given an address, return the best symbolic representation of it.
If it doesn't fall in any module, return its hex string. If it
falls within a module but we don't have a symbol for it, return
the hex string and the module. If we do have a symbol for it,
return the symbol and the module, e.g. "readline+0x10 [bash]".
"""
self._check_pid_wrap()
# Find the binary that contains the specified address.
# For .so files, look at the relative address; for the main
# executable, look at the absolute address.
for binary, (start, end) in self.code_ranges.items():
if addr >= start and addr <= end:
offset = addr - start \
if binary.endswith(".so") else addr
return "%s [%s]" % (self._decode_sym(binary, offset),
binary)
return "%x" % addr
#!/usr/bin/env python #!/usr/bin/env python
# #
# memleak Trace and display outstanding allocations to detect # memleak Trace and display outstanding allocations to detect
# memory leaks in user-mode processes and the kernel. # memory leaks in user-mode processes and the kernel.
# #
# USAGE: memleak [-h] [-p PID] [-t] [-a] [-o OLDER] [-c COMMAND] # USAGE: memleak [-h] [-p PID] [-t] [-a] [-o OLDER] [-c COMMAND]
# [-s SAMPLE_RATE] [-d STACK_DEPTH] [-T TOP] [-z MIN_SIZE] # [-s SAMPLE_RATE] [-d STACK_DEPTH] [-T TOP] [-z MIN_SIZE]
# [-Z MAX_SIZE] # [-Z MAX_SIZE]
# [interval] [count] # [interval] [count]
# #
# Licensed under the Apache License, Version 2.0 (the "License") # Licensed under the Apache License, Version 2.0 (the "License")
# Copyright (C) 2016 Sasha Goldshtein. # Copyright (C) 2016 Sasha Goldshtein.
from bcc import BPF from bcc import BPF, ProcessSymbols
from time import sleep from time import sleep
from datetime import datetime from datetime import datetime
import argparse import argparse
...@@ -45,88 +45,14 @@ class Time(object): ...@@ -45,88 +45,14 @@ class Time(object):
return t.tv_sec * 1e9 + t.tv_nsec return t.tv_sec * 1e9 + t.tv_nsec
class StackDecoder(object): class StackDecoder(object):
def __init__(self, pid, bpf): def __init__(self, pid):
self.pid = pid self.pid = pid
self.bpf = bpf if pid != -1:
self.ranges_cache = {} self.proc_sym = ProcessSymbols(pid)
self.refresh_code_ranges()
def refresh_code_ranges(self): def refresh(self):
if self.pid == -1: if self.pid != -1:
return self.proc_sym.refresh_code_ranges()
self.code_ranges = self._get_code_ranges()
@staticmethod
def _is_binary_segment(parts):
return len(parts) == 6 and \
parts[5][0] != '[' and 'x' in parts[1]
def _get_code_ranges(self):
ranges = {}
raw_ranges = open("/proc/%d/maps" % self.pid).readlines()
# A typical line from /proc/PID/maps looks like this:
# 7f21b6635000-7f21b67eb000 r-xp ... /usr/lib64/libc-2.21.so
# We are looking for executable segments that have a .so file
# or the main executable. The first two lines are the range of
# that memory segment, which we index by binary name.
for raw_range in raw_ranges:
parts = raw_range.split()
if not StackDecoder._is_binary_segment(parts):
continue
binary = parts[5]
range_parts = parts[0].split('-')
addr_range = (int(range_parts[0], 16),
int(range_parts[1], 16))
ranges[binary] = addr_range
return ranges
@staticmethod
def _is_function_symbol(parts):
return len(parts) == 6 and parts[3] == ".text" \
and parts[2] == "F"
def _get_sym_ranges(self, binary):
if binary in self.ranges_cache:
return self.ranges_cache[binary]
sym_ranges = {}
raw_symbols = run_command_get_output("objdump -t %s" % binary)
for raw_symbol in raw_symbols:
# A typical line from objdump -t looks like this:
# 00000000004007f5 g F .text 000000000000010e main
# We only care about functions in the .text segment.
# The first number is the start address, and the second
# number is the length.
parts = raw_symbol.split()
if not StackDecoder._is_function_symbol(parts):
continue
sym_start = int(parts[0], 16)
sym_len = int(parts[4], 16)
sym_name = parts[5]
sym_ranges[sym_name] = (sym_start, sym_len)
self.ranges_cache[binary] = sym_ranges
return sym_ranges
def _decode_sym(self, binary, offset):
sym_ranges = self._get_sym_ranges(binary)
# Find the symbol that contains the specified offset.
# There might not be one.
for name, (start, length) in sym_ranges.items():
if offset >= start and offset <= (start + length):
return "%s+0x%x" % (name, offset - start)
return "%x" % offset
def _decode_addr(self, addr):
code_ranges = self._get_code_ranges()
# Find the binary that contains the specified address.
# For .so files, look at the relative address; for the main
# executable, look at the absolute address.
for binary, (start, end) in code_ranges.items():
if addr >= start and addr <= end:
offset = addr - start \
if binary.endswith(".so") else addr
return "%s [%s]" % (self._decode_sym(binary,
offset), binary)
return "%x" % addr
def decode_stack(self, info, is_kernel_trace): def decode_stack(self, info, is_kernel_trace):
stack = "" stack = ""
...@@ -136,13 +62,10 @@ class StackDecoder(object): ...@@ -136,13 +62,10 @@ class StackDecoder(object):
addr = info.callstack[i] addr = info.callstack[i]
if is_kernel_trace: if is_kernel_trace:
stack += " %s [kernel] (%x) ;" % \ stack += " %s [kernel] (%x) ;" % \
(self.bpf.ksym(addr), addr) (BPF.ksym(addr), addr)
else: else:
# At some point, we hope to have native BPF
# user-mode symbol decoding, but for now we
# have to use our own.
stack += " %s (%x) ;" % \ stack += " %s (%x) ;" % \
(self._decode_addr(addr), addr) (self.proc_sym.decode_addr(addr), addr)
return stack return stack
def run_command_get_output(command): def run_command_get_output(command):
...@@ -302,7 +225,7 @@ int alloc_exit(struct pt_regs *ctx) ...@@ -302,7 +225,7 @@ int alloc_exit(struct pt_regs *ctx)
info.timestamp_ns = bpf_ktime_get_ns(); info.timestamp_ns = bpf_ktime_get_ns();
info.num_frames = grab_stack(ctx, &info) - 2; info.num_frames = grab_stack(ctx, &info) - 2;
allocs.update(&address, &info); allocs.update(&address, &info);
if (SHOULD_PRINT) { if (SHOULD_PRINT) {
bpf_trace_printk("alloc exited, size = %lu, result = %lx, frames = %d\\n", bpf_trace_printk("alloc exited, size = %lu, result = %lx, frames = %d\\n",
info.size, address, info.num_frames); info.size, address, info.num_frames);
...@@ -325,7 +248,7 @@ int free_enter(struct pt_regs *ctx, void *address) ...@@ -325,7 +248,7 @@ int free_enter(struct pt_regs *ctx, void *address)
} }
return 0; return 0;
} }
""" """
bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0") bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0")
bpf_source = bpf_source.replace("SAMPLE_EVERY_N", str(sample_every_n)) bpf_source = bpf_source.replace("SAMPLE_EVERY_N", str(sample_every_n))
bpf_source = bpf_source.replace("GRAB_ONE_FRAME", max_stack_size * bpf_source = bpf_source.replace("GRAB_ONE_FRAME", max_stack_size *
...@@ -358,7 +281,7 @@ else: ...@@ -358,7 +281,7 @@ else:
bpf_program.attach_kretprobe(event="__kmalloc", fn_name="alloc_exit") bpf_program.attach_kretprobe(event="__kmalloc", fn_name="alloc_exit")
bpf_program.attach_kprobe(event="kfree", fn_name="free_enter") bpf_program.attach_kprobe(event="kfree", fn_name="free_enter")
decoder = StackDecoder(pid, bpf_program) decoder = StackDecoder(pid)
def print_outstanding(): def print_outstanding():
stacks = {} stacks = {}
...@@ -391,7 +314,7 @@ while True: ...@@ -391,7 +314,7 @@ while True:
sleep(interval) sleep(interval)
except KeyboardInterrupt: except KeyboardInterrupt:
exit() exit()
decoder.refresh_code_ranges() decoder.refresh()
print_outstanding() print_outstanding()
count_so_far += 1 count_so_far += 1
if num_prints is not None and count_so_far >= num_prints: if num_prints is not None and count_so_far >= num_prints:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment