Commit b186900f authored by Brenden Blanco's avatar Brenden Blanco

Merge pull request #172 from brendangregg/master

initial ksym() and ksymaddr()
parents 20d41cbc fc9bc487
.TH vfscount 8 "2015-08-18" "USER COMMANDS" .TH vfscount 8 "2015-08-18" "USER COMMANDS"
.SH NAME .SH NAME
vfscount \- Count some common VFS calls. Uses Linux eBPF/bcc. vfscount \- Count VFS calls ("vfs_*"). Uses Linux eBPF/bcc.
.SH SYNOPSIS .SH SYNOPSIS
.B vfscount .B vfscount
.SH DESCRIPTION .SH DESCRIPTION
This counts common VFS calls. This can be useful for general workload This counts VFS calls. This can be useful for general workload
characterization of these operations. characterization of these operations.
This works by tracing some kernel vfs functions using dynamic tracing, and will This works by tracing all kernel functions beginning with "vfs_" using dynamic
need updating to match any changes to these functions. Edit the script to tracing. This may match more functions than you are interested in measuring:
customize and add functions to trace, which is easy to do. Edit the script to customize which functions to trace.
Since this uses BPF, only the root user can use this tool. Since this uses BPF, only the root user can use this tool.
.SH REQUIREMENTS .SH REQUIREMENTS
...@@ -30,11 +30,13 @@ Kernel function name ...@@ -30,11 +30,13 @@ Kernel function name
COUNT COUNT
Number of calls while tracing Number of calls while tracing
.SH OVERHEAD .SH OVERHEAD
This traces various kernel vfs functions and maintains in-kernel counts, which This traces kernel vfs functions and maintains in-kernel counts, which
are asynchronously copied to user-space. While the rate of VFS operations can are asynchronously copied to user-space. While the rate of VFS operations can
be very high (>1M/sec), this is a relatively efficient way to trace these be very high (>1M/sec), this is a relatively efficient way to trace these
events, and so the overhead is expected to be small for normal workloads. events, and so the overhead is expected to be small for normal workloads.
Measure in a test environment. Measure in a test environment, and if overheads are an issue, edit the script
to reduce the types of vfs functions traced (currently all beginning with
"vfs_").
.SH SOURCE .SH SOURCE
This is from bcc. This is from bcc.
.IP .IP
......
...@@ -91,6 +91,10 @@ lib.bpf_detach_kprobe.argtypes = [ct.c_char_p] ...@@ -91,6 +91,10 @@ lib.bpf_detach_kprobe.argtypes = [ct.c_char_p]
open_kprobes = {} open_kprobes = {}
tracefile = None tracefile = None
TRACEFS = "/sys/kernel/debug/tracing" TRACEFS = "/sys/kernel/debug/tracing"
KALLSYMS = "/proc/kallsyms"
ksym_addrs = []
ksym_names = []
ksym_loaded = 0
@atexit.register @atexit.register
def cleanup_kprobes(): def cleanup_kprobes():
...@@ -546,3 +550,65 @@ class BPF(object): ...@@ -546,3 +550,65 @@ class BPF(object):
line = BPF.trace_readline(nonblocking=False) line = BPF.trace_readline(nonblocking=False)
print(line) print(line)
sys.stdout.flush() sys.stdout.flush()
@staticmethod
def _load_kallsyms():
global ksym_loaded, ksym_addrs, ksym_names
if ksym_loaded:
return
try:
syms = open(KALLSYMS, "r")
except:
raise Exception("Could not read %s" % KALLSYMS)
line = syms.readline()
for line in iter(syms):
cols = line.split()
name = cols[2]
addr = int(cols[0], 16)
ksym_addrs.append(addr)
ksym_names.append(name)
syms.close()
ksym_loaded = 1
@staticmethod
def _ksym_addr2index(addr):
global ksym_addrs
start = -1
end = len(ksym_addrs)
while end != start + 1:
mid = int((start + end) / 2)
if addr < ksym_addrs[mid]:
end = mid
else:
start = mid
return start
@staticmethod
def ksym(addr):
"""ksym(addr)
Translate a kernel memory address into a kernel function name, which is
returned. This is a simple translator that uses /proc/kallsyms.
"""
global ksym_names
BPF._load_kallsyms()
idx = BPF._ksym_addr2index(addr)
if idx == -1:
return "[unknown]"
return ksym_names[idx]
@staticmethod
def ksymaddr(addr):
"""ksymaddr(addr)
Translate a kernel memory address into a kernel function name plus the
instruction offset as a hexidecimal number, which is returned as a
string. This is a simple translator that uses /proc/kallsyms.
"""
global ksym_addrs, ksym_names
BPF._load_kallsyms()
idx = BPF._ksym_addr2index(addr)
if idx == -1:
return "[unknown]"
offset = int(addr - ksym_addrs[idx])
return ksym_names[idx] + hex(offset)
#!/usr/bin/python #!/usr/bin/python
# #
# vfscount Count some VFS calls. # vfscount Count VFS calls ("vfs_*").
# For Linux, uses BCC, eBPF. See .c file. # For Linux, uses BCC, eBPF. See .c file.
# #
# Written as a basic example of counting functions. # Written as a basic example of counting functions.
...@@ -16,47 +16,9 @@ from ctypes import c_ushort, c_int, c_ulonglong ...@@ -16,47 +16,9 @@ from ctypes import c_ushort, c_int, c_ulonglong
from time import sleep, strftime from time import sleep, strftime
from sys import stderr from sys import stderr
# kernel symbol translation
ksym_addrs = [] # addresses for binary search
ksym_names = [] # same index as ksym_addrs
def load_kallsyms():
symfile = "/proc/kallsyms"
try:
syms = open(symfile, "r")
except:
print("ERROR: reading " + symfile, file=sys.stderr)
exit()
line = syms.readline()
for line in iter(syms):
cols = line.split()
name = cols[2]
if name[:4] != "vfs_": # perf optimization
continue
addr = int(cols[0], 16)
ksym_addrs.append(addr)
ksym_names.append(name)
syms.close()
def ksym(addr):
start = -1
end = len(ksym_addrs)
while end != start + 1:
mid = int((start + end) / 2)
if addr < ksym_addrs[mid]:
end = mid
else:
start = mid
if start == -1:
return "[unknown]"
return ksym_names[start]
load_kallsyms()
# load BPF program # load BPF program
b = BPF(src_file = "vfscount.c") b = BPF(src_file = "vfscount.c")
b.attach_kprobe(event="vfs_read", fn_name="do_count") b.attach_kprobe(event_re="^vfs_.*", fn_name="do_count")
b.attach_kprobe(event="vfs_write", fn_name="do_count")
b.attach_kprobe(event="vfs_fsync", fn_name="do_count")
b.attach_kprobe(event="vfs_open", fn_name="do_count")
b.attach_kprobe(event="vfs_create", fn_name="do_count")
# header # header
print("Tracing... Ctrl-C to end.") print("Tracing... Ctrl-C to end.")
...@@ -67,7 +29,7 @@ try: ...@@ -67,7 +29,7 @@ try:
except KeyboardInterrupt: except KeyboardInterrupt:
pass pass
print("\n%-16s %-12s %8s" % ("ADDR", "FUNC", "COUNT")) print("\n%-16s %-26s %8s" % ("ADDR", "FUNC", "COUNT"))
counts = b.get_table("counts") counts = b.get_table("counts")
for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): for k, v in sorted(counts.items(), key=lambda counts: counts[1].value):
print("%-16x %-12s %8d" % (k.ip, ksym(k.ip), v.value)) print("%-16x %-26s %8d" % (k.ip, b.ksym(k.ip), v.value))
Demonstrations of vfscount, the Linux eBPF/bcc version. Demonstrations of vfscount, the Linux eBPF/bcc version.
This counts VFS calls, by tracing various kernel calls beginning with "vfs_" This counts VFS calls, by tracing all kernel functions beginning with "vfs_":
(edit the script to customize):
# ./vfscount # ./vfscount
Tracing... Ctrl-C to end. Tracing... Ctrl-C to end.
^C ^C
ADDR FUNC COUNT ADDR FUNC COUNT
ffffffff811f2cc1 vfs_create 24 ffffffff811f3c01 vfs_create 1
ffffffff811e71c1 vfs_write 203 ffffffff8120be71 vfs_getxattr 2
ffffffff811e6061 vfs_open 765 ffffffff811f5f61 vfs_unlink 2
ffffffff811e7091 vfs_read 1852 ffffffff81236ca1 vfs_lock_file 6
ffffffff81218fb1 vfs_fsync_range 6
ffffffff811ecaf1 vfs_fstat 319
ffffffff811e6f01 vfs_open 475
ffffffff811ecb51 vfs_fstatat 488
ffffffff811ecac1 vfs_getattr 704
ffffffff811ec9f1 vfs_getattr_nosec 704
ffffffff811e80a1 vfs_write 1764
ffffffff811e7f71 vfs_read 2283
This can be useful for workload characterization, to see what types of This can be useful for workload characterization, to see what types of
operations are in use. operations are in use.
You can edit the script to customize what kernel functions are matched.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment