Commit 3bfc877a authored by Teng Qin's avatar Teng Qin

Use ProcMountNSGuard to manage mount namespace

This commit adds `ProcMountNS` and `ProcMountNSGuard`, that
automatically opens and closes FDs, and enters and exists mount
namespace on construction and destruction.
parent d82e813e
...@@ -22,6 +22,14 @@ ...@@ -22,6 +22,14 @@
#include "bcc_perf_map.h" #include "bcc_perf_map.h"
bool bcc_is_perf_map(const char *path) {
char* pos = strstr(path, ".map");
// Path ends with ".map"
if (pos == NULL || *(pos + 4) != 0)
return false;
return access(path, R_OK) == 0;
}
int bcc_perf_map_nstgid(int pid) { int bcc_perf_map_nstgid(int pid) {
char status_path[64]; char status_path[64];
FILE *status; FILE *status;
......
...@@ -27,6 +27,8 @@ extern "C" { ...@@ -27,6 +27,8 @@ extern "C" {
typedef int (*bcc_perf_map_symcb)(const char *, uint64_t, uint64_t, int, typedef int (*bcc_perf_map_symcb)(const char *, uint64_t, uint64_t, int,
void *); void *);
bool bcc_is_perf_map(const char *path);
int bcc_perf_map_nstgid(int pid); int bcc_perf_map_nstgid(int pid);
bool bcc_perf_map_path(char *map_path, size_t map_len, int pid); bool bcc_perf_map_path(char *map_path, size_t map_len, int pid);
int bcc_perf_map_foreach_sym(const char *path, bcc_perf_map_symcb callback, int bcc_perf_map_foreach_sym(const char *path, bcc_perf_map_symcb callback,
......
...@@ -107,7 +107,7 @@ int bcc_procutils_each_module(int pid, bcc_procutils_modulecb callback, ...@@ -107,7 +107,7 @@ int bcc_procutils_each_module(int pid, bcc_procutils_modulecb callback,
while (isspace(mapname[0])) mapname++; while (isspace(mapname[0])) mapname++;
if (strchr(perm, 'x') && bcc_mapping_is_file_backed(mapname)) { if (strchr(perm, 'x') && bcc_mapping_is_file_backed(mapname)) {
if (callback(mapname, (uint64_t)begin, (uint64_t)end, payload) < 0) if (callback(mapname, (uint64_t)begin, (uint64_t)end, true, payload) < 0)
break; break;
} }
} }
...@@ -115,12 +115,18 @@ int bcc_procutils_each_module(int pid, bcc_procutils_modulecb callback, ...@@ -115,12 +115,18 @@ int bcc_procutils_each_module(int pid, bcc_procutils_modulecb callback,
fclose(procmap); fclose(procmap);
// Add a mapping to /tmp/perf-pid.map for the entire address space. This will // Address mapping for the entire address space maybe in /tmp/perf-<PID>.map
// be used if symbols aren't resolved in an earlier mapping. // This will be used if symbols aren't resolved in an earlier mapping.
char map_path[4096]; char map_path[4096];
// Try perf-<PID>.map path with process's mount namespace, chroot and NSPID,
// in case it is generated by the process itself.
if (bcc_perf_map_path(map_path, sizeof(map_path), pid)) if (bcc_perf_map_path(map_path, sizeof(map_path), pid))
callback(map_path, 0, -1, payload); callback(map_path, 0, -1, true, payload);
// Try perf-<PID>.map path with global root and PID, in case it is generated
// by other Process. Avoid checking mount namespace for this.
int res = snprintf(map_path, 4096, "/tmp/perf-%d.map", pid);
if (res > 0 && res < 4096)
callback(map_path, 0, -1, false, payload);
return 0; return 0;
} }
......
...@@ -29,7 +29,9 @@ struct ns_cookie { ...@@ -29,7 +29,9 @@ struct ns_cookie {
int nsc_newns; int nsc_newns;
}; };
typedef int (*bcc_procutils_modulecb)(const char *, uint64_t, uint64_t, void *); // Module name, start address, end address, whether to check mount namespace, payload
typedef int (*bcc_procutils_modulecb)(const char *, uint64_t, uint64_t, bool, void *);
// Symbol name, address, payload
typedef void (*bcc_procutils_ksymcb)(const char *, uint64_t, void *); typedef void (*bcc_procutils_ksymcb)(const char *, uint64_t, void *);
char *bcc_procutils_which_so(const char *libname, int pid); char *bcc_procutils_which_so(const char *libname, int pid);
......
...@@ -15,10 +15,13 @@ ...@@ -15,10 +15,13 @@
*/ */
#include <cxxabi.h> #include <cxxabi.h>
#include <fcntl.h>
#include <linux/elf.h>
#include <string.h> #include <string.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/types.h> #include <sys/types.h>
#include <unistd.h> #include <unistd.h>
#include <cstdio>
#include "bcc_elf.h" #include "bcc_elf.h"
#include "bcc_perf_map.h" #include "bcc_perf_map.h"
...@@ -35,7 +38,7 @@ ino_t ProcStat::getinode_() { ...@@ -35,7 +38,7 @@ ino_t ProcStat::getinode_() {
bool ProcStat::is_stale() { bool ProcStat::is_stale() {
ino_t cur_inode = getinode_(); ino_t cur_inode = getinode_();
return (cur_inode > 0) && (cur_inode != inode_); return (cur_inode > 0) && (cur_inode != inode_);
} }
ProcStat::ProcStat(int pid) ProcStat::ProcStat(int pid)
...@@ -92,7 +95,70 @@ bool KSyms::resolve_name(const char *_unused, const char *name, ...@@ -92,7 +95,70 @@ bool KSyms::resolve_name(const char *_unused, const char *name,
return true; return true;
} }
ProcSyms::ProcSyms(int pid) : pid_(pid), procstat_(pid) { load_modules(); } ProcMountNS::ProcMountNS(int pid) {
if (pid < 0)
return;
ebpf::FileDesc self_fd;
ebpf::FileDesc target_fd;
char path[256];
int res;
res = std::snprintf(path, 256, "/proc/self/ns/mnt");
if (res <= 0 || res >= 256)
return;
if ((self_fd = open(path, O_RDONLY)) < 0)
return;
res = std::snprintf(path, 256, "/proc/%d/ns/mnt", pid);
if (res <= 0 || res >= 256)
return;
if ((target_fd = open(path, O_RDONLY)) < 0)
return;
struct stat self_stat, target_stat;
if (fstat(self_fd, &self_stat) != 0)
return;
if (fstat(target_fd, &target_stat) != 0)
return;
if (self_stat.st_ino == target_stat.st_ino)
// Both current and target Process are in same mount namespace
return;
self_fd_ = std::move(self_fd);
target_fd_ = std::move(target_fd);
}
ProcMountNSGuard::ProcMountNSGuard(ProcMountNS *mount_ns)
: mount_ns_instance_(nullptr), mount_ns_(mount_ns), entered_(false) {
init();
}
ProcMountNSGuard::ProcMountNSGuard(int pid)
: mount_ns_instance_(pid > 0 ? new ProcMountNS(pid) : nullptr),
mount_ns_(mount_ns_instance_.get()),
entered_(false) {
init();
}
void ProcMountNSGuard::init() {
if (!mount_ns_ || mount_ns_->self_fd_ < 0 || mount_ns_->target_fd_ < 0)
return;
if (setns(mount_ns_->target_fd_, CLONE_NEWNS) == 0)
entered_ = true;
}
ProcMountNSGuard::~ProcMountNSGuard() {
if (mount_ns_ && entered_ && mount_ns_->self_fd_ >= 0)
setns(mount_ns_->self_fd_, CLONE_NEWNS);
}
ProcSyms::ProcSyms(int pid)
: pid_(pid), procstat_(pid), mount_ns_instance_(new ProcMountNS(pid_)) {
load_modules();
}
bool ProcSyms::load_modules() { bool ProcSyms::load_modules() {
return bcc_procutils_each_module(pid_, _add_module, this) == 0; return bcc_procutils_each_module(pid_, _add_module, this) == 0;
...@@ -100,43 +166,26 @@ bool ProcSyms::load_modules() { ...@@ -100,43 +166,26 @@ bool ProcSyms::load_modules() {
void ProcSyms::refresh() { void ProcSyms::refresh() {
modules_.clear(); modules_.clear();
mount_ns_instance_.reset(new ProcMountNS(pid_));
load_modules(); load_modules();
procstat_.reset(); procstat_.reset();
} }
int ProcSyms::_add_module(const char *modname, uint64_t start, uint64_t end, int ProcSyms::_add_module(const char *modname, uint64_t start, uint64_t end,
void *payload) { bool check_mount_ns, void *payload) {
struct ns_cookie nsc = {-1, -1};
bool ns_switch = false;
int arc;
ProcSyms *ps = static_cast<ProcSyms *>(payload); ProcSyms *ps = static_cast<ProcSyms *>(payload);
auto it = std::find_if(ps->modules_.begin(), ps->modules_.end(), auto it = std::find_if(
[=](const ProcSyms::Module &m) { return m.name_ == modname; }); ps->modules_.begin(), ps->modules_.end(),
[=](const ProcSyms::Module &m) { return m.name_ == modname; });
if (it == ps->modules_.end()) { if (it == ps->modules_.end()) {
// If modname references a perf-map, determine if we need to enter a mount auto module = Module(
// namespace in order to read symbols from it later. modname, check_mount_ns ? ps->mount_ns_instance_.get() : nullptr);
if (strstr(modname, ".map") != nullptr) { if (module.init())
ns_switch = bcc_procutils_enter_mountns(ps->pid_, &nsc); it = ps->modules_.insert(ps->modules_.end(), std::move(module));
if (ns_switch) { else
char new_modname[4096]; return 0;
arc = access(modname, R_OK);
bcc_procutils_exit_mountns(&nsc);
if (arc != 0) {
snprintf(new_modname, sizeof (new_modname), "/tmp/perf-%d.map",
ps->pid_);
it = ps->modules_.insert(ps->modules_.end(), Module(new_modname,
ps->pid_, false));
it->ranges_.push_back(ProcSyms::Module::Range(start, end));
return 0;
}
}
}
it = ps->modules_.insert(ps->modules_.end(), Module(modname, ps->pid_,
ns_switch));
} }
it->ranges_.push_back(ProcSyms::Module::Range(start, end)); it->ranges_.emplace_back(start, end);
return 0; return 0;
} }
...@@ -172,7 +221,7 @@ bool ProcSyms::resolve_addr(uint64_t addr, struct bcc_symbol *sym, ...@@ -172,7 +221,7 @@ bool ProcSyms::resolve_addr(uint64_t addr, struct bcc_symbol *sym,
if (original_module) if (original_module)
sym->module = original_module; sym->module = original_module;
return res; return res;
} else { } else if (mod.type_ != ModuleType::PERF_MAP) {
// Record the module to which this symbol belongs, so that even if it's // Record the module to which this symbol belongs, so that even if it's
// later found using a perf map, we still report the right module name. // later found using a perf map, we still report the right module name.
original_module = mod.name_.c_str(); original_module = mod.name_.c_str();
...@@ -194,13 +243,33 @@ bool ProcSyms::resolve_name(const char *module, const char *name, ...@@ -194,13 +243,33 @@ bool ProcSyms::resolve_name(const char *module, const char *name,
return false; return false;
} }
ProcSyms::Module::Module(const char *name, int pid, bool in_ns) ProcSyms::Module::Module(const char *name, ProcMountNS *mount_ns)
: name_(name), pid_(pid), in_ns_(in_ns), loaded_(false) { : name_(name),
struct ns_cookie nsc; loaded_(false),
mount_ns_(mount_ns),
type_(ModuleType::UNKNOWN) {}
bcc_procutils_enter_mountns(pid_, &nsc); bool ProcSyms::Module::init() {
is_so_ = bcc_elf_is_shared_obj(name) == 1; ProcMountNSGuard g(mount_ns_);
bcc_procutils_exit_mountns(&nsc); int elf_type = bcc_elf_get_type(name_.c_str());
if (elf_type >= 0) {
if (elf_type == ET_EXEC) {
type_ = ModuleType::EXEC;
return true;
}
if (elf_type == ET_DYN) {
type_ = ModuleType::SO;
return true;
}
return false;
}
if (bcc_is_perf_map(name_.c_str()) == 1) {
type_ = ModuleType::PERF_MAP;
return true;
}
return false;
} }
int ProcSyms::Module::_add_symbol(const char *symname, uint64_t start, int ProcSyms::Module::_add_symbol(const char *symname, uint64_t start,
...@@ -211,27 +280,17 @@ int ProcSyms::Module::_add_symbol(const char *symname, uint64_t start, ...@@ -211,27 +280,17 @@ int ProcSyms::Module::_add_symbol(const char *symname, uint64_t start,
return 0; return 0;
} }
bool ProcSyms::Module::is_perf_map() const {
return strstr(name_.c_str(), ".map") != nullptr;
}
void ProcSyms::Module::load_sym_table() { void ProcSyms::Module::load_sym_table() {
struct ns_cookie nsc = {-1, -1};
if (loaded_) if (loaded_)
return; return;
loaded_ = true; loaded_ = true;
if (is_perf_map()) { ProcMountNSGuard g(mount_ns_);
if (in_ns_)
bcc_procutils_enter_mountns(pid_, &nsc); if (type_ == ModuleType::PERF_MAP)
bcc_perf_map_foreach_sym(name_.c_str(), _add_symbol, this); bcc_perf_map_foreach_sym(name_.c_str(), _add_symbol, this);
} else { if (type_ == ModuleType::EXEC || type_ == ModuleType::SO)
bcc_procutils_enter_mountns(pid_, &nsc);
bcc_elf_foreach_sym(name_.c_str(), _add_symbol, this); bcc_elf_foreach_sym(name_.c_str(), _add_symbol, this);
}
bcc_procutils_exit_mountns(&nsc);
std::sort(syms_.begin(), syms_.end()); std::sort(syms_.begin(), syms_.end());
} }
...@@ -249,7 +308,7 @@ bool ProcSyms::Module::find_name(const char *symname, uint64_t *addr) { ...@@ -249,7 +308,7 @@ bool ProcSyms::Module::find_name(const char *symname, uint64_t *addr) {
for (Symbol &s : syms_) { for (Symbol &s : syms_) {
if (*(s.name) == symname) { if (*(s.name) == symname) {
*addr = is_so() ? start() + s.start : s.start; *addr = type_ == ModuleType::SO ? start() + s.start : s.start;
return true; return true;
} }
} }
...@@ -257,7 +316,7 @@ bool ProcSyms::Module::find_name(const char *symname, uint64_t *addr) { ...@@ -257,7 +316,7 @@ bool ProcSyms::Module::find_name(const char *symname, uint64_t *addr) {
} }
bool ProcSyms::Module::find_addr(uint64_t addr, struct bcc_symbol *sym) { bool ProcSyms::Module::find_addr(uint64_t addr, struct bcc_symbol *sym) {
uint64_t offset = is_so() ? (addr - start()) : addr; uint64_t offset = type_ == ModuleType::SO ? (addr - start()) : addr;
load_sym_table(); load_sym_table();
...@@ -354,7 +413,7 @@ struct mod_st { ...@@ -354,7 +413,7 @@ struct mod_st {
uint64_t start; uint64_t start;
}; };
static int _find_module(const char *modname, uint64_t start, uint64_t end, static int _find_module(const char *modname, uint64_t start, uint64_t end, bool,
void *p) { void *p) {
struct mod_st *mod = (struct mod_st *)p; struct mod_st *mod = (struct mod_st *)p;
if (!strcmp(modname, mod->name)) { if (!strcmp(modname, mod->name)) {
...@@ -405,7 +464,7 @@ static int _list_sym(const char *symname, uint64_t addr, uint64_t end, ...@@ -405,7 +464,7 @@ static int _list_sym(const char *symname, uint64_t addr, uint64_t end,
if (!ELF_TYPE_IS_FUNCTION(flags) || addr == 0) if (!ELF_TYPE_IS_FUNCTION(flags) || addr == 0)
return 0; return 0;
SYM_CB cb = (SYM_CB) payload; SYM_CB cb = (SYM_CB)payload;
return cb(symname, addr); return cb(symname, addr);
} }
...@@ -419,8 +478,6 @@ int bcc_foreach_symbol(const char *module, SYM_CB cb) { ...@@ -419,8 +478,6 @@ int bcc_foreach_symbol(const char *module, SYM_CB cb) {
int bcc_resolve_symname(const char *module, const char *symname, int bcc_resolve_symname(const char *module, const char *symname,
const uint64_t addr, int pid, struct bcc_symbol *sym) { const uint64_t addr, int pid, struct bcc_symbol *sym) {
uint64_t load_addr; uint64_t load_addr;
struct ns_cookie nsc = {-1, -1};
bool success = true;
sym->module = NULL; sym->module = NULL;
sym->name = NULL; sym->name = NULL;
...@@ -438,29 +495,23 @@ int bcc_resolve_symname(const char *module, const char *symname, ...@@ -438,29 +495,23 @@ int bcc_resolve_symname(const char *module, const char *symname,
if (sym->module == NULL) if (sym->module == NULL)
return -1; return -1;
bcc_procutils_enter_mountns(pid, &nsc); ProcMountNSGuard g(pid);
if (bcc_elf_loadaddr(sym->module, &load_addr) < 0) { if (bcc_elf_loadaddr(sym->module, &load_addr) < 0) {
sym->module = NULL; sym->module = NULL;
success = false; return -1;
goto exitns;
} }
sym->name = symname; sym->name = symname;
sym->offset = addr; sym->offset = addr;
if (sym->name && sym->offset == 0x0) { if (sym->name && sym->offset == 0x0)
if (bcc_find_symbol_addr(sym) < 0) { if (bcc_find_symbol_addr(sym) < 0) {
sym->module = NULL; sym->module = NULL;
success = false; return -1;
goto exitns;
} }
}
exitns:
bcc_procutils_exit_mountns(&nsc);
if (!success || sym->offset == 0x0) if (sym->offset == 0x0)
return -1; return -1;
sym->offset = (sym->offset - load_addr); sym->offset = (sym->offset - load_addr);
......
...@@ -16,12 +16,14 @@ ...@@ -16,12 +16,14 @@
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <memory>
#include <string> #include <string>
#include <sys/types.h>
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include <sys/types.h> #include "common.h"
class ProcStat { class ProcStat {
std::string procfs_; std::string procfs_;
...@@ -64,6 +66,35 @@ public: ...@@ -64,6 +66,35 @@ public:
virtual void refresh(); virtual void refresh();
}; };
class ProcMountNSGuard;
class ProcSyms;
class ProcMountNS {
private:
explicit ProcMountNS(int pid);
ebpf::FileDesc self_fd_;
ebpf::FileDesc target_fd_;
friend class ProcMountNSGuard;
friend class ProcSyms;
};
class ProcMountNSGuard {
public:
explicit ProcMountNSGuard(ProcMountNS *mount_ns);
explicit ProcMountNSGuard(int pid);
~ProcMountNSGuard();
private:
void init();
std::unique_ptr<ProcMountNS> mount_ns_instance_;
ProcMountNS *mount_ns_;
bool entered_;
};
class ProcSyms : SymbolCache { class ProcSyms : SymbolCache {
struct Symbol { struct Symbol {
Symbol(const std::string *name, uint64_t start, uint64_t size, int flags = 0) Symbol(const std::string *name, uint64_t start, uint64_t size, int flags = 0)
...@@ -78,6 +109,13 @@ class ProcSyms : SymbolCache { ...@@ -78,6 +109,13 @@ class ProcSyms : SymbolCache {
} }
}; };
enum class ModuleType {
UNKNOWN,
EXEC,
SO,
PERF_MAP
};
struct Module { struct Module {
struct Range { struct Range {
uint64_t start; uint64_t start;
...@@ -85,13 +123,15 @@ class ProcSyms : SymbolCache { ...@@ -85,13 +123,15 @@ class ProcSyms : SymbolCache {
Range(uint64_t s, uint64_t e) : start(s), end(e) {} Range(uint64_t s, uint64_t e) : start(s), end(e) {}
}; };
Module(const char *name, int pid, bool in_ns); Module(const char *name, ProcMountNS* mount_ns);
bool init();
std::string name_; std::string name_;
std::vector<Range> ranges_; std::vector<Range> ranges_;
bool is_so_;
int pid_;
bool in_ns_;
bool loaded_; bool loaded_;
ProcMountNS *mount_ns_;
ModuleType type_;
std::unordered_set<std::string> symnames_; std::unordered_set<std::string> symnames_;
std::vector<Symbol> syms_; std::vector<Symbol> syms_;
...@@ -100,8 +140,6 @@ class ProcSyms : SymbolCache { ...@@ -100,8 +140,6 @@ class ProcSyms : SymbolCache {
uint64_t start() const { return ranges_.begin()->start; } uint64_t start() const { return ranges_.begin()->start; }
bool find_addr(uint64_t addr, struct bcc_symbol *sym); bool find_addr(uint64_t addr, struct bcc_symbol *sym);
bool find_name(const char *symname, uint64_t *addr); bool find_name(const char *symname, uint64_t *addr);
bool is_so() const { return is_so_; }
bool is_perf_map() const;
static int _add_symbol(const char *symname, uint64_t start, uint64_t end, static int _add_symbol(const char *symname, uint64_t start, uint64_t end,
int flags, void *p); int flags, void *p);
...@@ -110,8 +148,9 @@ class ProcSyms : SymbolCache { ...@@ -110,8 +148,9 @@ class ProcSyms : SymbolCache {
int pid_; int pid_;
std::vector<Module> modules_; std::vector<Module> modules_;
ProcStat procstat_; ProcStat procstat_;
std::unique_ptr<ProcMountNS> mount_ns_instance_;
static int _add_module(const char *, uint64_t, uint64_t, void *); static int _add_module(const char *, uint64_t, uint64_t, bool, void *);
bool load_modules(); bool load_modules();
public: public:
......
...@@ -199,7 +199,7 @@ void Context::_each_probe(const char *binpath, const struct bcc_elf_usdt *probe, ...@@ -199,7 +199,7 @@ void Context::_each_probe(const char *binpath, const struct bcc_elf_usdt *probe,
ctx->add_probe(binpath, probe); ctx->add_probe(binpath, probe);
} }
int Context::_each_module(const char *modpath, uint64_t, uint64_t, void *p) { int Context::_each_module(const char *modpath, uint64_t, uint64_t, bool, void *p) {
Context *ctx = static_cast<Context *>(p); Context *ctx = static_cast<Context *>(p);
// Modules may be reported multiple times if they contain more than one // Modules may be reported multiple times if they contain more than one
// executable region. We are going to parse the ELF on disk anyway, so we // executable region. We are going to parse the ELF on disk anyway, so we
......
...@@ -199,7 +199,7 @@ class Context { ...@@ -199,7 +199,7 @@ class Context {
static void _each_probe(const char *binpath, const struct bcc_elf_usdt *probe, static void _each_probe(const char *binpath, const struct bcc_elf_usdt *probe,
void *p); void *p);
static int _each_module(const char *modpath, uint64_t, uint64_t, void *p); static int _each_module(const char *modpath, uint64_t, uint64_t, bool, void *p);
void add_probe(const char *binpath, const struct bcc_elf_usdt *probe); void add_probe(const char *binpath, const struct bcc_elf_usdt *probe);
std::string resolve_bin_path(const std::string &bin_path); std::string resolve_bin_path(const std::string &bin_path);
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <sys/mount.h> #include <sys/mount.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h> #include <unistd.h>
#include "bcc_elf.h" #include "bcc_elf.h"
...@@ -306,7 +307,7 @@ static int perf_map_func_noop(void *arg) { ...@@ -306,7 +307,7 @@ static int perf_map_func_noop(void *arg) {
static pid_t spawn_child(void *map_addr, bool own_pidns, bool own_mntns, static pid_t spawn_child(void *map_addr, bool own_pidns, bool own_mntns,
int (*child_func)(void *)) { int (*child_func)(void *)) {
int flags = 0; int flags = SIGCHLD;
if (own_pidns) if (own_pidns)
flags |= CLONE_NEWPID; flags |= CLONE_NEWPID;
if (own_mntns) if (own_mntns)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment