Commit 746eab44 authored by Brenden Blanco's avatar Brenden Blanco

Merge pull request #442 from goldshtn/usyms

Moved user symbol decoding from memleak into bcc module
parents 3bc2828f 8737c6e4
......@@ -20,7 +20,7 @@ import json
import multiprocessing
import os
import re
from subprocess import Popen, PIPE
from subprocess import Popen, PIPE, STDOUT
import struct
import sys
basestring = (unicode if sys.version_info[0] < 3 else str)
......@@ -28,6 +28,7 @@ basestring = (unicode if sys.version_info[0] < 3 else str)
from .libbcc import lib, _CB_TYPE
from .table import Table
from .tracepoint import Perf, Tracepoint
from .usyms import ProcessSymbols
open_kprobes = {}
open_uprobes = {}
......@@ -747,6 +748,28 @@ class BPF(object):
return 0
return ksyms[idx][1]
@classmethod
def usymaddr(cls, pid, addr, refresh_symbols=False):
"""usymaddr(pid, addr, refresh_symbols=False)
Decode the specified address in the specified process to a symbolic
representation that includes the symbol name, offset within the symbol,
and the module name. See the ProcessSymbols class for more details.
Specify refresh_symbols=True if you suspect the set of loaded modules
or their load addresses has changed since the last time you called
usymaddr() on this pid.
"""
proc_sym = None
if pid in cls._process_symbols:
proc_sym = cls._process_symbols[pid]
if refresh_symbols:
proc_sym.refresh_code_ranges()
else:
proc_sym = ProcessSymbols(pid)
cls._process_symbols[pid] = proc_sym
return proc_sym.decode_addr(addr)
@staticmethod
def num_open_kprobes():
"""num_open_kprobes()
......
# Copyright 2016 Sasha Goldshtein
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from subprocess import Popen, PIPE, STDOUT
class ProcessSymbols(object):
def __init__(self, pid):
"""
Initializes the process symbols store for the specified pid.
Call refresh_code_ranges() periodically if you anticipate changes
in the set of loaded libraries or their addresses.
"""
self.pid = pid
self.refresh_code_ranges()
def refresh_code_ranges(self):
self.code_ranges = self._get_code_ranges()
self.ranges_cache = {}
self.exe = self._get_exe()
self.start_time = self._get_start_time()
def _get_exe(self):
return ProcessSymbols._run_command_get_output(
"readlink -f /proc/%d/exe" % self.pid)
def _get_start_time(self):
return ProcessSymbols._run_command_get_output(
"cut -d' ' -f 22 /proc/%d/stat" % self.pid)
@staticmethod
def _is_binary_segment(parts):
return len(parts) == 6 and parts[5][0] != '[' and 'x' in parts[1]
def _get_code_ranges(self):
ranges = {}
raw_ranges = open("/proc/%d/maps" % self.pid).readlines()
# A typical line from /proc/PID/maps looks like this:
# 7f21b6635000-7f21b67eb000 r-xp ... /usr/lib64/libc-2.21.so
# We are looking for executable segments that have a .so file
# or the main executable. The first two lines are the range of
# that memory segment, which we index by binary name.
for raw_range in raw_ranges:
parts = raw_range.split()
if not ProcessSymbols._is_binary_segment(parts):
continue
binary = parts[5]
range_parts = parts[0].split('-')
addr_range = (int(range_parts[0], 16), int(range_parts[1], 16))
ranges[binary] = addr_range
return ranges
@staticmethod
def _is_function_symbol(parts):
return len(parts) == 6 and parts[3] == ".text" and parts[2] == "F"
@staticmethod
def _run_command_get_output(command):
p = Popen(command.split(), stdout=PIPE, stderr=STDOUT)
return iter(p.stdout.readline, b'')
def _get_sym_ranges(self, binary):
if binary in self.ranges_cache:
return self.ranges_cache[binary]
sym_ranges = {}
raw_symbols = ProcessSymbols._run_command_get_output(
"objdump -t %s" % binary)
for raw_symbol in raw_symbols:
# A typical line from objdump -t looks like this:
# 00000000004007f5 g F .text 000000000000010e main
# We only care about functions in the .text segment.
# The first number is the start address, and the second
# number is the length.
parts = raw_symbol.split()
if not ProcessSymbols._is_function_symbol(parts):
continue
sym_start = int(parts[0], 16)
sym_len = int(parts[4], 16)
sym_name = parts[5]
sym_ranges[sym_name] = (sym_start, sym_len)
self.ranges_cache[binary] = sym_ranges
return sym_ranges
def _decode_sym(self, binary, offset):
sym_ranges = self._get_sym_ranges(binary)
# Find the symbol that contains the specified offset.
# There might not be one.
for name, (start, length) in sym_ranges.items():
if offset >= start and offset <= (start + length):
return "%s+0x%x" % (name, offset - start)
return "%x" % offset
def _check_pid_wrap(self):
# If the pid wrapped, our exe name and start time must have changed.
# Detect this and get rid of the cached ranges.
if self.exe != self._get_exe() or \
self.start_time != self._get_start_time():
self.refresh_code_ranges()
def decode_addr(self, addr):
"""
Given an address, return the best symbolic representation of it.
If it doesn't fall in any module, return its hex string. If it
falls within a module but we don't have a symbol for it, return
the hex string and the module. If we do have a symbol for it,
return the symbol and the module, e.g. "readline+0x10 [bash]".
"""
self._check_pid_wrap()
# Find the binary that contains the specified address.
# For .so files, look at the relative address; for the main
# executable, look at the absolute address.
for binary, (start, end) in self.code_ranges.items():
if addr >= start and addr <= end:
offset = addr - start \
if binary.endswith(".so") else addr
return "%s [%s]" % (self._decode_sym(binary, offset),
binary)
return "%x" % addr
#!/usr/bin/env python
#
# memleak Trace and display outstanding allocations to detect
# memory leaks in user-mode processes and the kernel.
# memleak Trace and display outstanding allocations to detect
# memory leaks in user-mode processes and the kernel.
#
# USAGE: memleak [-h] [-p PID] [-t] [-a] [-o OLDER] [-c COMMAND]
# [-s SAMPLE_RATE] [-d STACK_DEPTH] [-T TOP] [-z MIN_SIZE]
# [-Z MAX_SIZE]
# [interval] [count]
# [-s SAMPLE_RATE] [-d STACK_DEPTH] [-T TOP] [-z MIN_SIZE]
# [-Z MAX_SIZE]
# [interval] [count]
#
# Licensed under the Apache License, Version 2.0 (the "License")
# Copyright (C) 2016 Sasha Goldshtein.
from bcc import BPF
from bcc import BPF, ProcessSymbols
from time import sleep
from datetime import datetime
import argparse
......@@ -45,88 +45,14 @@ class Time(object):
return t.tv_sec * 1e9 + t.tv_nsec
class StackDecoder(object):
def __init__(self, pid, bpf):
def __init__(self, pid):
self.pid = pid
self.bpf = bpf
self.ranges_cache = {}
self.refresh_code_ranges()
if pid != -1:
self.proc_sym = ProcessSymbols(pid)
def refresh_code_ranges(self):
if self.pid == -1:
return
self.code_ranges = self._get_code_ranges()
@staticmethod
def _is_binary_segment(parts):
return len(parts) == 6 and \
parts[5][0] != '[' and 'x' in parts[1]
def _get_code_ranges(self):
ranges = {}
raw_ranges = open("/proc/%d/maps" % self.pid).readlines()
# A typical line from /proc/PID/maps looks like this:
# 7f21b6635000-7f21b67eb000 r-xp ... /usr/lib64/libc-2.21.so
# We are looking for executable segments that have a .so file
# or the main executable. The first two lines are the range of
# that memory segment, which we index by binary name.
for raw_range in raw_ranges:
parts = raw_range.split()
if not StackDecoder._is_binary_segment(parts):
continue
binary = parts[5]
range_parts = parts[0].split('-')
addr_range = (int(range_parts[0], 16),
int(range_parts[1], 16))
ranges[binary] = addr_range
return ranges
@staticmethod
def _is_function_symbol(parts):
return len(parts) == 6 and parts[3] == ".text" \
and parts[2] == "F"
def _get_sym_ranges(self, binary):
if binary in self.ranges_cache:
return self.ranges_cache[binary]
sym_ranges = {}
raw_symbols = run_command_get_output("objdump -t %s" % binary)
for raw_symbol in raw_symbols:
# A typical line from objdump -t looks like this:
# 00000000004007f5 g F .text 000000000000010e main
# We only care about functions in the .text segment.
# The first number is the start address, and the second
# number is the length.
parts = raw_symbol.split()
if not StackDecoder._is_function_symbol(parts):
continue
sym_start = int(parts[0], 16)
sym_len = int(parts[4], 16)
sym_name = parts[5]
sym_ranges[sym_name] = (sym_start, sym_len)
self.ranges_cache[binary] = sym_ranges
return sym_ranges
def _decode_sym(self, binary, offset):
sym_ranges = self._get_sym_ranges(binary)
# Find the symbol that contains the specified offset.
# There might not be one.
for name, (start, length) in sym_ranges.items():
if offset >= start and offset <= (start + length):
return "%s+0x%x" % (name, offset - start)
return "%x" % offset
def _decode_addr(self, addr):
code_ranges = self._get_code_ranges()
# Find the binary that contains the specified address.
# For .so files, look at the relative address; for the main
# executable, look at the absolute address.
for binary, (start, end) in code_ranges.items():
if addr >= start and addr <= end:
offset = addr - start \
if binary.endswith(".so") else addr
return "%s [%s]" % (self._decode_sym(binary,
offset), binary)
return "%x" % addr
def refresh(self):
if self.pid != -1:
self.proc_sym.refresh_code_ranges()
def decode_stack(self, info, is_kernel_trace):
stack = ""
......@@ -136,13 +62,10 @@ class StackDecoder(object):
addr = info.callstack[i]
if is_kernel_trace:
stack += " %s [kernel] (%x) ;" % \
(self.bpf.ksym(addr), addr)
(BPF.ksym(addr), addr)
else:
# At some point, we hope to have native BPF
# user-mode symbol decoding, but for now we
# have to use our own.
stack += " %s (%x) ;" % \
(self._decode_addr(addr), addr)
(self.proc_sym.decode_addr(addr), addr)
return stack
def run_command_get_output(command):
......@@ -302,7 +225,7 @@ int alloc_exit(struct pt_regs *ctx)
info.timestamp_ns = bpf_ktime_get_ns();
info.num_frames = grab_stack(ctx, &info) - 2;
allocs.update(&address, &info);
if (SHOULD_PRINT) {
bpf_trace_printk("alloc exited, size = %lu, result = %lx, frames = %d\\n",
info.size, address, info.num_frames);
......@@ -325,7 +248,7 @@ int free_enter(struct pt_regs *ctx, void *address)
}
return 0;
}
"""
"""
bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0")
bpf_source = bpf_source.replace("SAMPLE_EVERY_N", str(sample_every_n))
bpf_source = bpf_source.replace("GRAB_ONE_FRAME", max_stack_size *
......@@ -358,7 +281,7 @@ else:
bpf_program.attach_kretprobe(event="__kmalloc", fn_name="alloc_exit")
bpf_program.attach_kprobe(event="kfree", fn_name="free_enter")
decoder = StackDecoder(pid, bpf_program)
decoder = StackDecoder(pid)
def print_outstanding():
stacks = {}
......@@ -391,7 +314,7 @@ while True:
sleep(interval)
except KeyboardInterrupt:
exit()
decoder.refresh_code_ranges()
decoder.refresh()
print_outstanding()
count_so_far += 1
if num_prints is not None and count_so_far >= num_prints:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment