Commit 51480d05 authored by yonghong-song's avatar yonghong-song Committed by GitHub

implement free_bcc_memory() API (#2097)

The main purpose of this API is to proactively release llvm/clang
.text memory which is brought in during compilation.
bcc .text memory for some other functions, e.g., attach_tracepoint,
bpf_prog_load, etc. can also be freed after all these tasks are done.

Note that such memory is reclaimable in kernel since it has
file backup. But certain applicaiton may want to reduce this
memory immediately to satisfy constraints imposed by sysadmin, etc.

The implementation uses madvise with MADV_DONTNEED.
For the case where bcc is static linked into the binary,
we do not really know the start and the end of memory regions
used by bcc, so the implementation here bluntly returned
all .text memory back to kernel. This will incur some performance
overhead as later on executed instructions will need to bring
back to memory again.

For static linked library, instrumented RandomRead example,
without this patch, the RSS memory before load is:
  VmRSS:     63644 kB
  RssAnon:           23876 kB
  RssFile:           39768 kB
  RssShmem:              0 kB

After this patch,
  VmRSS:     34264 kB
  RssAnon:           23880 kB
  RssFile:           10384 kB
  RssShmem:              0 kB

For shared library, a python unit test, test_free_llvm_memory.py, is
added, which shows for a do-nothing bpf program, we have
  Before freeing llvm memory: RssFile:  43000 kB
  After  freeing llvm memory: RssFile:  11992 kB

The RssFile reduction on Facebook internal applications
also ranges in 30-40MB.
Signed-off-by: default avatarYonghong Song <yhs@fb.com>
parent 05765eee
......@@ -117,6 +117,12 @@ int main(int argc, char** argv) {
return 1;
}
// done with all initial work, free bcc memory
if (bpf->free_bcc_memory()) {
std::cerr << "Failed to free llvm/clang memory" << std::endl;
return 1;
}
signal(SIGINT, signal_handler);
std::cout << "Started tracing, hit Ctrl-C to terminate." << std::endl;
while (true)
......
......@@ -45,7 +45,7 @@ set(bcc_util_sources ns_guard.cc common.cc)
set(bcc_sym_sources bcc_syms.cc bcc_elf.c bcc_perf_map.c bcc_proc.c)
set(bcc_common_headers libbpf.h perf_reader.h)
set(bcc_table_headers file_desc.h table_desc.h table_storage.h)
set(bcc_api_headers bpf_common.h bpf_module.h bcc_exception.h bcc_syms.h)
set(bcc_api_headers bpf_common.h bpf_module.h bcc_exception.h bcc_syms.h bcc_elf.h)
if(ENABLE_CLANG_JIT)
add_library(bcc-shared SHARED
......
......@@ -27,6 +27,7 @@
#include <vector>
#include "bcc_exception.h"
#include "bcc_elf.h"
#include "bcc_syms.h"
#include "bpf_module.h"
#include "common.h"
......@@ -707,6 +708,10 @@ StatusTuple BPF::detach_perf_event_all_cpu(open_probe_t& attr) {
return StatusTuple(0);
}
int BPF::free_bcc_memory() {
return bcc_free_memory();
}
USDT::USDT(const std::string& binary_path, const std::string& provider,
const std::string& name, const std::string& probe_func)
: initialized_(false),
......
......@@ -176,6 +176,8 @@ class BPF {
int& fd);
StatusTuple unload_func(const std::string& func_name);
int free_bcc_memory();
private:
std::string get_kprobe_event(const std::string& kernel_func,
bpf_probe_attach_type type);
......
......@@ -726,6 +726,164 @@ int bcc_elf_foreach_vdso_sym(bcc_elf_symcb callback, void *payload) {
return listsymbols(elf, callback, payload, &default_option);
}
// return value: 0 : success
// < 0 : error and no bcc lib found
// > 0 : error and bcc lib found
static int bcc_free_memory_with_file(const char *path) {
unsigned long sym_addr = 0, sym_shndx;
Elf_Scn *section = NULL;
int fd = -1, err;
GElf_Shdr header;
Elf *e = NULL;
if ((err = openelf(path, &e, &fd)) < 0)
goto exit;
// get symbol address of "bcc_free_memory", which
// will be used to calculate runtime .text address
// range, esp. for shared libraries.
err = -1;
while ((section = elf_nextscn(e, section)) != 0) {
Elf_Data *data = NULL;
size_t symsize;
if (!gelf_getshdr(section, &header))
continue;
if (header.sh_type != SHT_SYMTAB && header.sh_type != SHT_DYNSYM)
continue;
/* iterate all symbols */
symsize = header.sh_entsize;
while ((data = elf_getdata(section, data)) != 0) {
size_t i, symcount = data->d_size / symsize;
for (i = 0; i < symcount; ++i) {
GElf_Sym sym;
if (!gelf_getsym(data, (int)i, &sym))
continue;
if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
continue;
const char *name;
if ((name = elf_strptr(e, header.sh_link, sym.st_name)) == NULL)
continue;
if (strcmp(name, "bcc_free_memory") == 0) {
sym_addr = sym.st_value;
sym_shndx = sym.st_shndx;
break;
}
}
}
}
// Didn't find bcc_free_memory in the ELF file.
if (sym_addr == 0)
goto exit;
int sh_idx = 0;
section = NULL;
err = 1;
while ((section = elf_nextscn(e, section)) != 0) {
sh_idx++;
if (!gelf_getshdr(section, &header))
continue;
if (sh_idx == sym_shndx) {
unsigned long saddr, saddr_n, eaddr;
long page_size = sysconf(_SC_PAGESIZE);
saddr = (unsigned long)bcc_free_memory - sym_addr + header.sh_addr;
eaddr = saddr + header.sh_size;
extern unsigned long _start, _fini;
// adjust saddr and eaddr, start addr needs to be page aligned
saddr_n = (saddr + page_size - 1) & ~(page_size - 1);
eaddr -= saddr_n - saddr;
if (madvise((void *)saddr_n, eaddr - saddr_n, MADV_DONTNEED)) {
fprintf(stderr, "madvise failed, saddr %lx, eaddr %lx\n", saddr, eaddr);
goto exit;
}
err = 0;
break;
}
}
exit:
if (e)
elf_end(e);
if (fd >= 0)
close(fd);
return err;
}
// Free bcc mmemory
//
// The main purpose of this function is to free llvm/clang text memory
// through madvise MADV_DONTNEED.
//
// bcc could be linked statically or dynamically into the application.
// If it is static linking, there is no easy way to know which region
// inside .text section belongs to llvm/clang, so the whole .text section
// is freed. Otherwise, the process map is searched to find libbcc.so
// library and the whole .text section for that shared library is
// freed.
//
// Note that the text memory used by bcc (mainly llvm/clang) is reclaimable
// in the kernel as it is file backed. But the reclaim process
// may take some time if no memory pressure. So this API is mostly
// used for application who needs to immediately lowers its RssFile
// metric right after loading BPF program.
int bcc_free_memory() {
int err;
// First try whether bcc is statically linked or not
err = bcc_free_memory_with_file("/proc/self/exe");
if (err >= 0)
return -err;
// Not statically linked, let us find the libbcc.so
FILE *maps = fopen("/proc/self/maps", "r");
if (!maps)
return -1;
char *line = NULL;
size_t size;
while (getline(&line, &size, maps) > 0) {
char *libbcc = strstr(line, "libbcc.so");
if (!libbcc)
continue;
// Parse the line and get the full libbcc.so path
unsigned long addr_start, addr_end, offset, inode;
int path_start = 0, path_end = 0;
unsigned int devmajor, devminor;
char perms[8];
if (sscanf(line, "%lx-%lx %7s %lx %u:%u %lu %n%*[^\n]%n",
&addr_start, &addr_end, perms, &offset,
&devmajor, &devminor, &inode,
&path_start, &path_end) < 7)
break;
// Free the text in the bcc dynamic library.
char libbcc_path[4096];
memcpy(libbcc_path, line + path_start, path_end - path_start);
libbcc_path[path_end - path_start] = '\0';
err = bcc_free_memory_with_file(libbcc_path);
err = (err <= 0) ? err : -err;
}
fclose(maps);
free(line);
return err;
}
#if 0
#include <stdio.h>
......
......@@ -68,6 +68,7 @@ int bcc_elf_get_type(const char *path);
int bcc_elf_is_shared_obj(const char *path);
int bcc_elf_is_exe(const char *path);
int bcc_elf_is_vdso(const char *name);
int bcc_free_memory();
#ifdef __cplusplus
}
......
......@@ -1259,6 +1259,9 @@ class BPF(object):
"""
self.perf_buffer_poll(timeout)
def free_bcc_memory(self):
return lib.bcc_free_memory()
def donothing(self):
"""the do nothing exit handler"""
......
......@@ -177,6 +177,9 @@ lib.bcc_symcache_resolve_name.argtypes = [
lib.bcc_symcache_refresh.restype = None
lib.bcc_symcache_refresh.argtypes = [ct.c_void_p]
lib.bcc_free_memory.restype = ct.c_int
lib.bcc_free_memory.argtypes = None
lib.bcc_usdt_new_frompid.restype = ct.c_void_p
lib.bcc_usdt_new_frompid.argtypes = [ct.c_int, ct.c_char_p]
......
......@@ -77,3 +77,5 @@ add_test(NAME py_test_usdt3 WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${TEST_WRAPPER} py_test_usdt3 sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_usdt3.py)
add_test(NAME py_test_license WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${TEST_WRAPPER} py_test_license sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_license.py)
add_test(NAME py_test_free_bcc_memory WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${TEST_WRAPPER} py_test_free_bcc_memory sudo ${CMAKE_CURRENT_SOURCE_DIR}/test_free_bcc_memory.py)
#!/usr/bin/env python
#
# USAGE: test_usdt.py
#
# Copyright 2018 Facebook, Inc
# Licensed under the Apache License, Version 2.0 (the "License")
from __future__ import print_function
from bcc import BPF
from unittest import main, skipUnless, TestCase
from subprocess import Popen, PIPE
import distutils.version
import os
def kernel_version_ge(major, minor):
# True if running kernel is >= X.Y
version = distutils.version.LooseVersion(os.uname()[2]).version
if version[0] > major:
return True
if version[0] < major:
return False
if minor and version[1] < minor:
return False
return True
class TestFreeLLVMMemory(TestCase):
def getRssFile(self):
p = Popen(["cat", "/proc/" + str(os.getpid()) + "/status"],
stdout=PIPE)
rss = None
unit = None
for line in p.stdout.readlines():
if (line.find(b'RssFile') >= 0):
rss = line.split(b' ')[-2]
unit = line.split(b' ')[-1].rstrip()
break
return [rss, unit]
@skipUnless(kernel_version_ge(4,5), "requires kernel >= 4.5")
def testFreeLLVMMemory(self):
text = "int test() { return 0; }"
b = BPF(text=text)
# get the RssFile before freeing bcc memory
[rss1, unit1] = self.getRssFile()
self.assertTrue(rss1 != None)
# free the bcc memory
self.assertTrue(b.free_bcc_memory() == 0)
# get the RssFile after freeing bcc memory
[rss2, unit2] = self.getRssFile()
self.assertTrue(rss2 != None)
self.assertTrue(unit1 == unit2)
print("Before freeing llvm memory: RssFile: ", rss1, unit1)
print("After freeing llvm memory: RssFile: ", rss2, unit2)
self.assertTrue(rss1 > rss2)
if __name__ == "__main__":
main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment