Commit 3bfc877a authored by Teng Qin's avatar Teng Qin

Use ProcMountNSGuard to manage mount namespace

This commit adds `ProcMountNS` and `ProcMountNSGuard`, that
automatically opens and closes FDs, and enters and exists mount
namespace on construction and destruction.
parent d82e813e
......@@ -22,6 +22,14 @@
#include "bcc_perf_map.h"
bool bcc_is_perf_map(const char *path) {
char* pos = strstr(path, ".map");
// Path ends with ".map"
if (pos == NULL || *(pos + 4) != 0)
return false;
return access(path, R_OK) == 0;
}
int bcc_perf_map_nstgid(int pid) {
char status_path[64];
FILE *status;
......
......@@ -27,6 +27,8 @@ extern "C" {
typedef int (*bcc_perf_map_symcb)(const char *, uint64_t, uint64_t, int,
void *);
bool bcc_is_perf_map(const char *path);
int bcc_perf_map_nstgid(int pid);
bool bcc_perf_map_path(char *map_path, size_t map_len, int pid);
int bcc_perf_map_foreach_sym(const char *path, bcc_perf_map_symcb callback,
......
......@@ -107,7 +107,7 @@ int bcc_procutils_each_module(int pid, bcc_procutils_modulecb callback,
while (isspace(mapname[0])) mapname++;
if (strchr(perm, 'x') && bcc_mapping_is_file_backed(mapname)) {
if (callback(mapname, (uint64_t)begin, (uint64_t)end, payload) < 0)
if (callback(mapname, (uint64_t)begin, (uint64_t)end, true, payload) < 0)
break;
}
}
......@@ -115,12 +115,18 @@ int bcc_procutils_each_module(int pid, bcc_procutils_modulecb callback,
fclose(procmap);
// Add a mapping to /tmp/perf-pid.map for the entire address space. This will
// be used if symbols aren't resolved in an earlier mapping.
// Address mapping for the entire address space maybe in /tmp/perf-<PID>.map
// This will be used if symbols aren't resolved in an earlier mapping.
char map_path[4096];
// Try perf-<PID>.map path with process's mount namespace, chroot and NSPID,
// in case it is generated by the process itself.
if (bcc_perf_map_path(map_path, sizeof(map_path), pid))
callback(map_path, 0, -1, payload);
callback(map_path, 0, -1, true, payload);
// Try perf-<PID>.map path with global root and PID, in case it is generated
// by other Process. Avoid checking mount namespace for this.
int res = snprintf(map_path, 4096, "/tmp/perf-%d.map", pid);
if (res > 0 && res < 4096)
callback(map_path, 0, -1, false, payload);
return 0;
}
......
......@@ -29,7 +29,9 @@ struct ns_cookie {
int nsc_newns;
};
typedef int (*bcc_procutils_modulecb)(const char *, uint64_t, uint64_t, void *);
// Module name, start address, end address, whether to check mount namespace, payload
typedef int (*bcc_procutils_modulecb)(const char *, uint64_t, uint64_t, bool, void *);
// Symbol name, address, payload
typedef void (*bcc_procutils_ksymcb)(const char *, uint64_t, void *);
char *bcc_procutils_which_so(const char *libname, int pid);
......
......@@ -15,10 +15,13 @@
*/
#include <cxxabi.h>
#include <fcntl.h>
#include <linux/elf.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <cstdio>
#include "bcc_elf.h"
#include "bcc_perf_map.h"
......@@ -35,7 +38,7 @@ ino_t ProcStat::getinode_() {
bool ProcStat::is_stale() {
ino_t cur_inode = getinode_();
return (cur_inode > 0) && (cur_inode != inode_);
return (cur_inode > 0) && (cur_inode != inode_);
}
ProcStat::ProcStat(int pid)
......@@ -92,7 +95,70 @@ bool KSyms::resolve_name(const char *_unused, const char *name,
return true;
}
ProcSyms::ProcSyms(int pid) : pid_(pid), procstat_(pid) { load_modules(); }
ProcMountNS::ProcMountNS(int pid) {
if (pid < 0)
return;
ebpf::FileDesc self_fd;
ebpf::FileDesc target_fd;
char path[256];
int res;
res = std::snprintf(path, 256, "/proc/self/ns/mnt");
if (res <= 0 || res >= 256)
return;
if ((self_fd = open(path, O_RDONLY)) < 0)
return;
res = std::snprintf(path, 256, "/proc/%d/ns/mnt", pid);
if (res <= 0 || res >= 256)
return;
if ((target_fd = open(path, O_RDONLY)) < 0)
return;
struct stat self_stat, target_stat;
if (fstat(self_fd, &self_stat) != 0)
return;
if (fstat(target_fd, &target_stat) != 0)
return;
if (self_stat.st_ino == target_stat.st_ino)
// Both current and target Process are in same mount namespace
return;
self_fd_ = std::move(self_fd);
target_fd_ = std::move(target_fd);
}
ProcMountNSGuard::ProcMountNSGuard(ProcMountNS *mount_ns)
: mount_ns_instance_(nullptr), mount_ns_(mount_ns), entered_(false) {
init();
}
ProcMountNSGuard::ProcMountNSGuard(int pid)
: mount_ns_instance_(pid > 0 ? new ProcMountNS(pid) : nullptr),
mount_ns_(mount_ns_instance_.get()),
entered_(false) {
init();
}
void ProcMountNSGuard::init() {
if (!mount_ns_ || mount_ns_->self_fd_ < 0 || mount_ns_->target_fd_ < 0)
return;
if (setns(mount_ns_->target_fd_, CLONE_NEWNS) == 0)
entered_ = true;
}
ProcMountNSGuard::~ProcMountNSGuard() {
if (mount_ns_ && entered_ && mount_ns_->self_fd_ >= 0)
setns(mount_ns_->self_fd_, CLONE_NEWNS);
}
ProcSyms::ProcSyms(int pid)
: pid_(pid), procstat_(pid), mount_ns_instance_(new ProcMountNS(pid_)) {
load_modules();
}
bool ProcSyms::load_modules() {
return bcc_procutils_each_module(pid_, _add_module, this) == 0;
......@@ -100,43 +166,26 @@ bool ProcSyms::load_modules() {
void ProcSyms::refresh() {
modules_.clear();
mount_ns_instance_.reset(new ProcMountNS(pid_));
load_modules();
procstat_.reset();
}
int ProcSyms::_add_module(const char *modname, uint64_t start, uint64_t end,
void *payload) {
struct ns_cookie nsc = {-1, -1};
bool ns_switch = false;
int arc;
bool check_mount_ns, void *payload) {
ProcSyms *ps = static_cast<ProcSyms *>(payload);
auto it = std::find_if(ps->modules_.begin(), ps->modules_.end(),
[=](const ProcSyms::Module &m) { return m.name_ == modname; });
auto it = std::find_if(
ps->modules_.begin(), ps->modules_.end(),
[=](const ProcSyms::Module &m) { return m.name_ == modname; });
if (it == ps->modules_.end()) {
// If modname references a perf-map, determine if we need to enter a mount
// namespace in order to read symbols from it later.
if (strstr(modname, ".map") != nullptr) {
ns_switch = bcc_procutils_enter_mountns(ps->pid_, &nsc);
if (ns_switch) {
char new_modname[4096];
arc = access(modname, R_OK);
bcc_procutils_exit_mountns(&nsc);
if (arc != 0) {
snprintf(new_modname, sizeof (new_modname), "/tmp/perf-%d.map",
ps->pid_);
it = ps->modules_.insert(ps->modules_.end(), Module(new_modname,
ps->pid_, false));
it->ranges_.push_back(ProcSyms::Module::Range(start, end));
return 0;
}
}
}
it = ps->modules_.insert(ps->modules_.end(), Module(modname, ps->pid_,
ns_switch));
auto module = Module(
modname, check_mount_ns ? ps->mount_ns_instance_.get() : nullptr);
if (module.init())
it = ps->modules_.insert(ps->modules_.end(), std::move(module));
else
return 0;
}
it->ranges_.push_back(ProcSyms::Module::Range(start, end));
it->ranges_.emplace_back(start, end);
return 0;
}
......@@ -172,7 +221,7 @@ bool ProcSyms::resolve_addr(uint64_t addr, struct bcc_symbol *sym,
if (original_module)
sym->module = original_module;
return res;
} else {
} else if (mod.type_ != ModuleType::PERF_MAP) {
// Record the module to which this symbol belongs, so that even if it's
// later found using a perf map, we still report the right module name.
original_module = mod.name_.c_str();
......@@ -194,13 +243,33 @@ bool ProcSyms::resolve_name(const char *module, const char *name,
return false;
}
ProcSyms::Module::Module(const char *name, int pid, bool in_ns)
: name_(name), pid_(pid), in_ns_(in_ns), loaded_(false) {
struct ns_cookie nsc;
ProcSyms::Module::Module(const char *name, ProcMountNS *mount_ns)
: name_(name),
loaded_(false),
mount_ns_(mount_ns),
type_(ModuleType::UNKNOWN) {}
bcc_procutils_enter_mountns(pid_, &nsc);
is_so_ = bcc_elf_is_shared_obj(name) == 1;
bcc_procutils_exit_mountns(&nsc);
bool ProcSyms::Module::init() {
ProcMountNSGuard g(mount_ns_);
int elf_type = bcc_elf_get_type(name_.c_str());
if (elf_type >= 0) {
if (elf_type == ET_EXEC) {
type_ = ModuleType::EXEC;
return true;
}
if (elf_type == ET_DYN) {
type_ = ModuleType::SO;
return true;
}
return false;
}
if (bcc_is_perf_map(name_.c_str()) == 1) {
type_ = ModuleType::PERF_MAP;
return true;
}
return false;
}
int ProcSyms::Module::_add_symbol(const char *symname, uint64_t start,
......@@ -211,27 +280,17 @@ int ProcSyms::Module::_add_symbol(const char *symname, uint64_t start,
return 0;
}
bool ProcSyms::Module::is_perf_map() const {
return strstr(name_.c_str(), ".map") != nullptr;
}
void ProcSyms::Module::load_sym_table() {
struct ns_cookie nsc = {-1, -1};
if (loaded_)
return;
loaded_ = true;
if (is_perf_map()) {
if (in_ns_)
bcc_procutils_enter_mountns(pid_, &nsc);
ProcMountNSGuard g(mount_ns_);
if (type_ == ModuleType::PERF_MAP)
bcc_perf_map_foreach_sym(name_.c_str(), _add_symbol, this);
} else {
bcc_procutils_enter_mountns(pid_, &nsc);
if (type_ == ModuleType::EXEC || type_ == ModuleType::SO)
bcc_elf_foreach_sym(name_.c_str(), _add_symbol, this);
}
bcc_procutils_exit_mountns(&nsc);
std::sort(syms_.begin(), syms_.end());
}
......@@ -249,7 +308,7 @@ bool ProcSyms::Module::find_name(const char *symname, uint64_t *addr) {
for (Symbol &s : syms_) {
if (*(s.name) == symname) {
*addr = is_so() ? start() + s.start : s.start;
*addr = type_ == ModuleType::SO ? start() + s.start : s.start;
return true;
}
}
......@@ -257,7 +316,7 @@ bool ProcSyms::Module::find_name(const char *symname, uint64_t *addr) {
}
bool ProcSyms::Module::find_addr(uint64_t addr, struct bcc_symbol *sym) {
uint64_t offset = is_so() ? (addr - start()) : addr;
uint64_t offset = type_ == ModuleType::SO ? (addr - start()) : addr;
load_sym_table();
......@@ -354,7 +413,7 @@ struct mod_st {
uint64_t start;
};
static int _find_module(const char *modname, uint64_t start, uint64_t end,
static int _find_module(const char *modname, uint64_t start, uint64_t end, bool,
void *p) {
struct mod_st *mod = (struct mod_st *)p;
if (!strcmp(modname, mod->name)) {
......@@ -405,7 +464,7 @@ static int _list_sym(const char *symname, uint64_t addr, uint64_t end,
if (!ELF_TYPE_IS_FUNCTION(flags) || addr == 0)
return 0;
SYM_CB cb = (SYM_CB) payload;
SYM_CB cb = (SYM_CB)payload;
return cb(symname, addr);
}
......@@ -419,8 +478,6 @@ int bcc_foreach_symbol(const char *module, SYM_CB cb) {
int bcc_resolve_symname(const char *module, const char *symname,
const uint64_t addr, int pid, struct bcc_symbol *sym) {
uint64_t load_addr;
struct ns_cookie nsc = {-1, -1};
bool success = true;
sym->module = NULL;
sym->name = NULL;
......@@ -438,29 +495,23 @@ int bcc_resolve_symname(const char *module, const char *symname,
if (sym->module == NULL)
return -1;
bcc_procutils_enter_mountns(pid, &nsc);
ProcMountNSGuard g(pid);
if (bcc_elf_loadaddr(sym->module, &load_addr) < 0) {
sym->module = NULL;
success = false;
goto exitns;
return -1;
}
sym->name = symname;
sym->offset = addr;
if (sym->name && sym->offset == 0x0) {
if (sym->name && sym->offset == 0x0)
if (bcc_find_symbol_addr(sym) < 0) {
sym->module = NULL;
success = false;
goto exitns;
return -1;
}
}
exitns:
bcc_procutils_exit_mountns(&nsc);
if (!success || sym->offset == 0x0)
if (sym->offset == 0x0)
return -1;
sym->offset = (sym->offset - load_addr);
......
......@@ -16,12 +16,14 @@
#pragma once
#include <algorithm>
#include <memory>
#include <string>
#include <sys/types.h>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <sys/types.h>
#include "common.h"
class ProcStat {
std::string procfs_;
......@@ -64,6 +66,35 @@ public:
virtual void refresh();
};
class ProcMountNSGuard;
class ProcSyms;
class ProcMountNS {
private:
explicit ProcMountNS(int pid);
ebpf::FileDesc self_fd_;
ebpf::FileDesc target_fd_;
friend class ProcMountNSGuard;
friend class ProcSyms;
};
class ProcMountNSGuard {
public:
explicit ProcMountNSGuard(ProcMountNS *mount_ns);
explicit ProcMountNSGuard(int pid);
~ProcMountNSGuard();
private:
void init();
std::unique_ptr<ProcMountNS> mount_ns_instance_;
ProcMountNS *mount_ns_;
bool entered_;
};
class ProcSyms : SymbolCache {
struct Symbol {
Symbol(const std::string *name, uint64_t start, uint64_t size, int flags = 0)
......@@ -78,6 +109,13 @@ class ProcSyms : SymbolCache {
}
};
enum class ModuleType {
UNKNOWN,
EXEC,
SO,
PERF_MAP
};
struct Module {
struct Range {
uint64_t start;
......@@ -85,13 +123,15 @@ class ProcSyms : SymbolCache {
Range(uint64_t s, uint64_t e) : start(s), end(e) {}
};
Module(const char *name, int pid, bool in_ns);
Module(const char *name, ProcMountNS* mount_ns);
bool init();
std::string name_;
std::vector<Range> ranges_;
bool is_so_;
int pid_;
bool in_ns_;
bool loaded_;
ProcMountNS *mount_ns_;
ModuleType type_;
std::unordered_set<std::string> symnames_;
std::vector<Symbol> syms_;
......@@ -100,8 +140,6 @@ class ProcSyms : SymbolCache {
uint64_t start() const { return ranges_.begin()->start; }
bool find_addr(uint64_t addr, struct bcc_symbol *sym);
bool find_name(const char *symname, uint64_t *addr);
bool is_so() const { return is_so_; }
bool is_perf_map() const;
static int _add_symbol(const char *symname, uint64_t start, uint64_t end,
int flags, void *p);
......@@ -110,8 +148,9 @@ class ProcSyms : SymbolCache {
int pid_;
std::vector<Module> modules_;
ProcStat procstat_;
std::unique_ptr<ProcMountNS> mount_ns_instance_;
static int _add_module(const char *, uint64_t, uint64_t, void *);
static int _add_module(const char *, uint64_t, uint64_t, bool, void *);
bool load_modules();
public:
......
......@@ -199,7 +199,7 @@ void Context::_each_probe(const char *binpath, const struct bcc_elf_usdt *probe,
ctx->add_probe(binpath, probe);
}
int Context::_each_module(const char *modpath, uint64_t, uint64_t, void *p) {
int Context::_each_module(const char *modpath, uint64_t, uint64_t, bool, void *p) {
Context *ctx = static_cast<Context *>(p);
// Modules may be reported multiple times if they contain more than one
// executable region. We are going to parse the ELF on disk anyway, so we
......
......@@ -199,7 +199,7 @@ class Context {
static void _each_probe(const char *binpath, const struct bcc_elf_usdt *probe,
void *p);
static int _each_module(const char *modpath, uint64_t, uint64_t, void *p);
static int _each_module(const char *modpath, uint64_t, uint64_t, bool, void *p);
void add_probe(const char *binpath, const struct bcc_elf_usdt *probe);
std::string resolve_bin_path(const std::string &bin_path);
......
......@@ -21,6 +21,7 @@
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include "bcc_elf.h"
......@@ -306,7 +307,7 @@ static int perf_map_func_noop(void *arg) {
static pid_t spawn_child(void *map_addr, bool own_pidns, bool own_mntns,
int (*child_func)(void *)) {
int flags = 0;
int flags = SIGCHLD;
if (own_pidns)
flags |= CLONE_NEWPID;
if (own_mntns)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment