Commit 33522d7b authored by Sasha Goldshtein's avatar Sasha Goldshtein

Fixed indentation and Python style issues from pep

parent dda47697
......@@ -3,10 +3,10 @@
#define MAX_STACK_SIZE 10
struct alloc_info_t {
u64 size;
u64 timestamp_ns;
int num_frames;
u64 callstack[MAX_STACK_SIZE];
u64 size;
u64 timestamp_ns;
int num_frames;
u64 callstack[MAX_STACK_SIZE];
};
BPF_HASH(sizes, u64);
......@@ -14,23 +14,23 @@ BPF_HASH(allocs, u64, struct alloc_info_t);
// Adapted from https://github.com/iovisor/bcc/tools/offcputime.py
static u64 get_frame(u64 *bp) {
if (*bp) {
// The following stack walker is x86_64 specific
u64 ret = 0;
if (bpf_probe_read(&ret, sizeof(ret), (void *)(*bp+8)))
return 0;
if (bpf_probe_read(bp, sizeof(*bp), (void *)*bp))
*bp = 0;
return ret;
}
return 0;
if (*bp) {
// The following stack walker is x86_64 specific
u64 ret = 0;
if (bpf_probe_read(&ret, sizeof(ret), (void *)(*bp+8)))
return 0;
if (bpf_probe_read(bp, sizeof(*bp), (void *)*bp))
*bp = 0;
return ret;
}
return 0;
}
static int grab_stack(struct pt_regs *ctx, struct alloc_info_t *info)
{
int depth = 0;
u64 bp = ctx->bp;
if (!(info->callstack[depth++] = get_frame(&bp))) return depth;
if (!(info->callstack[depth++] = get_frame(&bp))) return depth;
int depth = 0;
u64 bp = ctx->bp;
if (!(info->callstack[depth++] = get_frame(&bp))) return depth;
if (!(info->callstack[depth++] = get_frame(&bp))) return depth;
if (!(info->callstack[depth++] = get_frame(&bp))) return depth;
if (!(info->callstack[depth++] = get_frame(&bp))) return depth;
if (!(info->callstack[depth++] = get_frame(&bp))) return depth;
......@@ -39,14 +39,14 @@ static int grab_stack(struct pt_regs *ctx, struct alloc_info_t *info)
if (!(info->callstack[depth++] = get_frame(&bp))) return depth;
if (!(info->callstack[depth++] = get_frame(&bp))) return depth;
if (!(info->callstack[depth++] = get_frame(&bp))) return depth;
return depth;
return depth;
}
int alloc_enter(struct pt_regs *ctx, size_t size)
{
u64 pid = bpf_get_current_pid_tgid();
u64 size64 = size;
sizes.update(&pid, &size64);
u64 pid = bpf_get_current_pid_tgid();
u64 size64 = size;
sizes.update(&pid, &size64);
if (SHOULD_PRINT)
bpf_trace_printk("alloc entered, size = %u\n", size);
......@@ -55,21 +55,21 @@ int alloc_enter(struct pt_regs *ctx, size_t size)
int alloc_exit(struct pt_regs *ctx)
{
u64 address = ctx->ax;
u64 pid = bpf_get_current_pid_tgid();
u64* size64 = sizes.lookup(&pid);
struct alloc_info_t info = {0};
u64 address = ctx->ax;
u64 pid = bpf_get_current_pid_tgid();
u64* size64 = sizes.lookup(&pid);
struct alloc_info_t info = {0};
if (size64 == 0)
return 0; // missed alloc entry
if (size64 == 0)
return 0; // missed alloc entry
info.size = *size64;
sizes.delete(&pid);
info.size = *size64;
sizes.delete(&pid);
info.timestamp_ns = bpf_ktime_get_ns();
info.num_frames = grab_stack(ctx, &info) - 2;
allocs.update(&address, &info);
info.timestamp_ns = bpf_ktime_get_ns();
info.num_frames = grab_stack(ctx, &info) - 2;
allocs.update(&address, &info);
if (SHOULD_PRINT)
bpf_trace_printk("alloc exited, size = %lu, result = %lx, frames = %d\n", info.size, address, info.num_frames);
return 0;
......@@ -77,12 +77,12 @@ int alloc_exit(struct pt_regs *ctx)
int free_enter(struct pt_regs *ctx, void *address)
{
u64 addr = (u64)address;
struct alloc_info_t *info = allocs.lookup(&addr);
if (info == 0)
return 0;
u64 addr = (u64)address;
struct alloc_info_t *info = allocs.lookup(&addr);
if (info == 0)
return 0;
allocs.delete(&addr);
allocs.delete(&addr);
if (SHOULD_PRINT)
bpf_trace_printk("free entered, address = %lx, size = %lu\n", address, info->size);
......
......@@ -8,139 +8,159 @@ import ctypes
import os
class Time(object):
# BPF timestamps come from the monotonic clock. To be able to filter
# and compare them from Python, we need to invoke clock_gettime from librt.
# Adapted from http://stackoverflow.com/a/1205762
CLOCK_MONOTONIC_RAW = 4 # see <linux/time.h>
class timespec(ctypes.Structure):
_fields_ = [
('tv_sec', ctypes.c_long),
('tv_nsec', ctypes.c_long)
]
librt = ctypes.CDLL('librt.so.1', use_errno=True)
clock_gettime = librt.clock_gettime
clock_gettime.argtypes = [ctypes.c_int, ctypes.POINTER(timespec)]
@staticmethod
def monotonic_time():
t = Time.timespec()
if Time.clock_gettime(Time.CLOCK_MONOTONIC_RAW , ctypes.pointer(t)) != 0:
errno_ = ctypes.get_errno()
raise OSError(errno_, os.strerror(errno_))
return t.tv_sec*1e9 + t.tv_nsec
# BPF timestamps come from the monotonic clock. To be able to filter
# and compare them from Python, we need to invoke clock_gettime.
# Adapted from http://stackoverflow.com/a/1205762
CLOCK_MONOTONIC_RAW = 4 # see <linux/time.h>
class timespec(ctypes.Structure):
_fields_ = [
('tv_sec', ctypes.c_long),
('tv_nsec', ctypes.c_long)
]
librt = ctypes.CDLL('librt.so.1', use_errno=True)
clock_gettime = librt.clock_gettime
clock_gettime.argtypes = [ctypes.c_int, ctypes.POINTER(timespec)]
@staticmethod
def monotonic_time():
t = Time.timespec()
if Time.clock_gettime(
Time.CLOCK_MONOTONIC_RAW, ctypes.pointer(t)) != 0:
errno_ = ctypes.get_errno()
raise OSError(errno_, os.strerror(errno_))
return t.tv_sec * 1e9 + t.tv_nsec
class StackDecoder(object):
def __init__(self, pid, bpf):
self.pid = pid
self.bpf = bpf
self.ranges_cache = {}
self.refresh_code_ranges()
def refresh_code_ranges(self):
if self.pid == -1:
return
self.code_ranges = self._get_code_ranges()
def _get_code_ranges(self):
ranges = {}
raw_ranges = open("/proc/%d/maps" % self.pid).readlines()
for raw_range in raw_ranges:
# A typical line from /proc/PID/maps looks like this:
# 7f21b6635000-7f21b67eb000 r-xp 00000000 fd:00 1442606 /usr/lib64/libc-2.21.so
# We are looking for executable segments that have a binary (.so
# or the main executable). The first two lines are the range of
# that memory segment, which we index by binary name.
parts = raw_range.split()
if len(parts) < 6 or parts[5][0] == '[' or not 'x' in parts[1]:
continue
binary = parts[5]
range_parts = parts[0].split('-')
addr_range = (int(range_parts[0], 16), int(range_parts[1], 16))
ranges[binary] = addr_range
return ranges
def _get_sym_ranges(self, binary):
if binary in self.ranges_cache:
return self.ranges_cache[binary]
sym_ranges = {}
raw_symbols = run_command_get_output("objdump -t %s" % binary)
for raw_symbol in raw_symbols:
# A typical line from objdump -t looks like this:
# 00000000004007f5 g F .text 000000000000010e main
# We only care about functions (F) in the .text segment. The first
# number is the start address, and the second number is the length.
parts = raw_symbol.split()
if len(parts) < 6 or parts[3] != ".text" or parts[2] != "F":
continue
sym_start = int(parts[0], 16)
sym_len = int(parts[4], 16)
sym_name = parts[5]
sym_ranges[sym_name] = (sym_start, sym_len)
self.ranges_cache[binary] = sym_ranges
return sym_ranges
def _decode_sym(self, binary, offset):
sym_ranges = self._get_sym_ranges(binary)
# Find the symbol that contains the specified offset. There might not be one.
for name, (start, length) in sym_ranges.items():
if offset >= start and offset <= (start + length):
return "%s+0x%x" % (name, offset - start)
return "%x" % offset
def _decode_addr(self, addr):
code_ranges = self._get_code_ranges()
# Find the binary that contains the specified address. For .so files, look
# at the relative address; for the main executable, look at the absolute
# address.
for binary, (start, end) in code_ranges.items():
if addr >= start and addr <= end:
offset = addr - start if binary.endswith(".so") else addr
return "%s [%s]" % (self._decode_sym(binary, offset), binary)
return "%x" % addr
def decode_stack(self, info, is_kernel_trace):
stack = ""
if info.num_frames <= 0:
return "???"
for i in range(0, info.num_frames):
addr = info.callstack[i]
if is_kernel_trace:
stack += " %s [kernel] (%x) ;" % (self.bpf.ksym(addr), addr)
else:
# At some point, we hope to have native BPF user-mode symbol
# decoding, but for now we have to use our own
stack += " %s (%x) ;" % (self._decode_addr(addr), addr)
return stack
def __init__(self, pid, bpf):
self.pid = pid
self.bpf = bpf
self.ranges_cache = {}
self.refresh_code_ranges()
def refresh_code_ranges(self):
if self.pid == -1:
return
self.code_ranges = self._get_code_ranges()
@staticmethod
def _is_binary_segment(parts):
return len(parts) == 6 and \
parts[5][0] == '[' and 'x' in parts[1]
def _get_code_ranges(self):
ranges = {}
raw_ranges = open("/proc/%d/maps" % self.pid).readlines()
# A typical line from /proc/PID/maps looks like this:
# 7f21b6635000-7f21b67eb000 r-xp ... /usr/lib64/libc-2.21.so
# We are looking for executable segments that have a .so file
# or the main executable. The first two lines are the range of
# that memory segment, which we index by binary name.
for raw_range in raw_ranges:
parts = raw_range.split()
if not StackDecoder._is_binary_segment(parts):
continue
binary = parts[5]
range_parts = parts[0].split('-')
addr_range = (int(range_parts[0], 16),
int(range_parts[1], 16))
ranges[binary] = addr_range
return ranges
@staticmethod
def _is_function_symbol(parts):
return len(parts) == 6 and parts[3] == ".text" \
and parts[2] == "F"
def _get_sym_ranges(self, binary):
if binary in self.ranges_cache:
return self.ranges_cache[binary]
sym_ranges = {}
raw_symbols = run_command_get_output("objdump -t %s" % binary)
for raw_symbol in raw_symbols:
# A typical line from objdump -t looks like this:
# 00000000004007f5 g F .text 000000000000010e main
# We only care about functions in the .text segment.
# The first number is the start address, and the second
# number is the length.
parts = raw_symbol.split()
if not StackDecoder._is_function_symbol(parts):
continue
sym_start = int(parts[0], 16)
sym_len = int(parts[4], 16)
sym_name = parts[5]
sym_ranges[sym_name] = (sym_start, sym_len)
self.ranges_cache[binary] = sym_ranges
return sym_ranges
def _decode_sym(self, binary, offset):
sym_ranges = self._get_sym_ranges(binary)
# Find the symbol that contains the specified offset.
# There might not be one.
for name, (start, length) in sym_ranges.items():
if offset >= start and offset <= (start + length):
return "%s+0x%x" % (name, offset - start)
return "%x" % offset
def _decode_addr(self, addr):
code_ranges = self._get_code_ranges()
# Find the binary that contains the specified address.
# For .so files, look at the relative address; for the main
# executable, look at the absolute address.
for binary, (start, end) in code_ranges.items():
if addr >= start and addr <= end:
offset = addr - start \
if binary.endswith(".so") else addr
return "%s [%s]" % (self._decode_sym(binary,
offset), binary)
return "%x" % addr
def decode_stack(self, info, is_kernel_trace):
stack = ""
if info.num_frames <= 0:
return "???"
for i in range(0, info.num_frames):
addr = info.callstack[i]
if is_kernel_trace:
stack += " %s [kernel] (%x) ;" % \
(self.bpf.ksym(addr), addr)
else:
# At some point, we hope to have native BPF
# user-mode symbol decoding, but for now we
# have to use our own.
stack += " %s (%x) ;" % \
(self._decode_addr(addr), addr)
return stack
def run_command_get_output(command):
p = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
return iter(p.stdout.readline, b'')
p = subprocess.Popen(command.split(),
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
return iter(p.stdout.readline, b'')
def run_command_get_pid(command):
p = subprocess.Popen(command.split())
return p.pid
p = subprocess.Popen(command.split())
return p.pid
examples = """
EXAMPLES:
./memleak.py -p $(pidof allocs)
Trace allocations and display a summary of "leaked" (outstanding)
allocations every 5 seconds
Trace allocations and display a summary of "leaked" (outstanding)
allocations every 5 seconds
./memleak.py -p $(pidof allocs) -t
Trace allocations and display each individual call to malloc/free
Trace allocations and display each individual call to malloc/free
./memleak.py -p $(pidof allocs) -a -i 10
Trace allocations and display allocated addresses, sizes, and stacks
every 10 seconds for outstanding allocations
Trace allocations and display allocated addresses, sizes, and stacks
every 10 seconds for outstanding allocations
./memleak.py -c "./allocs"
Run the specified command and trace its allocations
Run the specified command and trace its allocations
./memleak.py
Trace allocations in kernel mode and display a summary of outstanding
allocations every 5 seconds
Trace allocations in kernel mode and display a summary of outstanding
allocations every 5 seconds
./memleak.py -o 60000
Trace allocations in kernel mode and display a summary of outstanding
allocations that are at least one minute (60 seconds) old
Trace allocations in kernel mode and display a summary of outstanding
allocations that are at least one minute (60 seconds) old
"""
description = """
......@@ -150,20 +170,20 @@ allocations made with kmalloc/kfree.
"""
parser = argparse.ArgumentParser(description=description,
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("-p", "--pid",
help="the PID to trace; if not specified, trace kernel allocs")
help="the PID to trace; if not specified, trace kernel allocs")
parser.add_argument("-t", "--trace", action="store_true",
help="print trace messages for each alloc/free call")
help="print trace messages for each alloc/free call")
parser.add_argument("-i", "--interval", default=5,
help="interval in seconds to print outstanding allocations")
help="interval in seconds to print outstanding allocations")
parser.add_argument("-a", "--show-allocs", default=False, action="store_true",
help="show allocation addresses and sizes as well as call stacks")
help="show allocation addresses and sizes as well as call stacks")
parser.add_argument("-o", "--older", default=500,
help="prune allocations younger than this age in milliseconds")
help="prune allocations younger than this age in milliseconds")
parser.add_argument("-c", "--command",
help="execute and trace the specified command")
help="execute and trace the specified command")
args = parser.parse_args()
......@@ -172,11 +192,11 @@ command = args.command
kernel_trace = (pid == -1 and command is None)
trace_all = args.trace
interval = int(args.interval)
min_age_ns = 1e6*int(args.older)
min_age_ns = 1e6 * int(args.older)
if not command is None:
print("Executing '%s' and tracing the resulting process." % command)
pid = run_command_get_pid(command)
if command is not None:
print("Executing '%s' and tracing the resulting process." % command)
pid = run_command_get_pid(command)
bpf_source = open("memleak.c").read()
bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0")
......@@ -184,41 +204,49 @@ bpf_source = bpf_source.replace("SHOULD_PRINT", "1" if trace_all else "0")
bpf_program = BPF(text=bpf_source)
if not kernel_trace:
print("Attaching to malloc and free in pid %d, Ctrl+C to quit." % pid)
bpf_program.attach_uprobe(name="c", sym="malloc", fn_name="alloc_enter", pid=pid)
bpf_program.attach_uretprobe(name="c", sym="malloc", fn_name="alloc_exit", pid=pid)
bpf_program.attach_uprobe(name="c", sym="free", fn_name="free_enter", pid=pid)
print("Attaching to malloc and free in pid %d, Ctrl+C to quit." % pid)
bpf_program.attach_uprobe(name="c", sym="malloc",
fn_name="alloc_enter", pid=pid)
bpf_program.attach_uretprobe(name="c", sym="malloc",
fn_name="alloc_exit", pid=pid)
bpf_program.attach_uprobe(name="c", sym="free",
fn_name="free_enter", pid=pid)
else:
print("Attaching to kmalloc and kfree, Ctrl+C to quit.")
bpf_program.attach_kprobe(event="__kmalloc", fn_name="alloc_enter")
bpf_program.attach_kretprobe(event="__kmalloc", fn_name="alloc_exit")
bpf_program.attach_kprobe(event="kfree", fn_name="free_enter")
print("Attaching to kmalloc and kfree, Ctrl+C to quit.")
bpf_program.attach_kprobe(event="__kmalloc", fn_name="alloc_enter")
bpf_program.attach_kretprobe(event="__kmalloc", fn_name="alloc_exit")
bpf_program.attach_kprobe(event="kfree", fn_name="free_enter")
decoder = StackDecoder(pid, bpf_program)
def print_outstanding():
stacks = {}
print("*** Outstanding allocations:")
allocs = bpf_program.get_table("allocs")
for address, info in sorted(allocs.items(), key=lambda a: a[1].size):
if Time.monotonic_time() - min_age_ns < info.timestamp_ns:
continue
stack = decoder.decode_stack(info, kernel_trace)
if stack in stacks: stacks[stack] = (stacks[stack][0] + 1, stacks[stack][1] + info.size)
else: stacks[stack] = (1, info.size)
if args.show_allocs:
print("\taddr = %x size = %s" % (address.value, info.size))
for stack, (count, size) in sorted(stacks.items(), key=lambda s: s[1][1]):
print("\t%d bytes in %d allocations from stack\n\t\t%s" % (size, count, stack.replace(";", "\n\t\t")))
stacks = {}
print("*** Outstanding allocations:")
allocs = bpf_program.get_table("allocs")
for address, info in sorted(allocs.items(), key=lambda a: a[1].size):
if Time.monotonic_time() - min_age_ns < info.timestamp_ns:
continue
stack = decoder.decode_stack(info, kernel_trace)
if stack in stacks:
stacks[stack] = (stacks[stack][0] + 1,
stacks[stack][1] + info.size)
else:
stacks[stack] = (1, info.size)
if args.show_allocs:
print("\taddr = %x size = %s" %
(address.value, info.size))
for stack, (count, size) in sorted(stacks.items(),
key=lambda s: s[1][1]):
print("\t%d bytes in %d allocations from stack\n\t\t%s" %
(size, count, stack.replace(";", "\n\t\t")))
while True:
if trace_all:
print bpf_program.trace_fields()
else:
try:
sleep(interval)
except KeyboardInterrupt:
exit()
decoder.refresh_code_ranges()
print_outstanding()
if trace_all:
print bpf_program.trace_fields()
else:
try:
sleep(interval)
except KeyboardInterrupt:
exit()
decoder.refresh_code_ranges()
print_outstanding()
......@@ -19,6 +19,42 @@ Attaching to malloc and free in pid 5193, Ctrl+C to quit.
__libc_start_main+0xf0 [/usr/lib64/libc-2.21.so] (7fd460ac2790)
As time goes on, it becomes apparent that the main function in the allocs
process is leaking memory, 16 bytes at a time. Fortunately, you don't have to
inspect each allocation individually -- you get a nice summary of which stack
is responsible for a large leak.
Occasionally, you do want the individual allocation details. Perhaps the same
stack is allocating various sizes and you want to confirm which sizes are
prevalent. Use the -a switch:
# ./memleak.py -p $(pidof allocs) -a
Attaching to malloc and free in pid 5193, Ctrl+C to quit.
*** Outstanding allocations:
addr = 948cd0 size = 16
addr = 948d10 size = 16
addr = 948d30 size = 16
addr = 948cf0 size = 16
64 bytes in 4 allocations from stack
main+0x6d [/home/vagrant/allocs] (400862)
__libc_start_main+0xf0 [/usr/lib64/libc-2.21.so] (7fd460ac2790)
*** Outstanding allocations:
addr = 948d50 size = 16
addr = 948cd0 size = 16
addr = 948d10 size = 16
addr = 948d30 size = 16
addr = 948cf0 size = 16
addr = 948dd0 size = 16
addr = 948d90 size = 16
addr = 948db0 size = 16
addr = 948d70 size = 16
addr = 948df0 size = 16
160 bytes in 10 allocations from stack
main+0x6d [/home/vagrant/allocs] (400862)
__libc_start_main+0xf0 [/usr/lib64/libc-2.21.so] (7fd460ac2790)
When using the -p switch, memleak traces the allocations of a particular
process. Without this switch, kernel allocations (kmalloc) are traced instead.
For example:
......@@ -58,6 +94,10 @@ Attaching to kmalloc and kfree, Ctrl+C to quit.
perf_tp_event_init [kernel] (ffffffff81192479)
Here you can see that arming the kprobe to which our eBPF program is attached
consumed 8KB of memory. Loading the BPF program also consumed a couple hundred
bytes (in bpf_prog_load).
memleak stores each allocated block along with its size, timestamp, and the
stack that allocated it. When the block is deleted, this information is freed
to reduce the memory overhead.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment