Commit 50985ea1 authored by Ophélie Gagnard's avatar Ophélie Gagnard

WIP: Change directories organisation.

parent 0315165a
INCLUDE_PYTHON = $(shell pkg-config --cflags python3 | cut -f 1 -d " ")
bin/metadata-collect-agent:
mkdir -p build/metadata-collect-agent/logs
cp src/main.cpp build/metadata-collect-agent/
cp installation/parse_link_errors.py build/metadata-collect-agent/
cd build/metadata-collect-agent/
#Error expected here, writing a C program using the error messages
-g++ -O2 -g -Wno-unused-result -Wsign-compare -pthread $(INCLUDE_PYTHON) main.cpp -lcrypto -lfmt -o main 2> logs/link_errors
./parse_link_errors.py < logs/link_errors
make fake_python.o
g++ -O2 -g -Wno-unused-result -Wsign-compare -pthread $(INCLUDE_PYTHON) main.cpp fake_python.o -lcrypto -lfmt -o metadata-collect-agent
install -d ../../bin/
install -m 744 metadata-collect-agent ../../bin/
bin/fluentbit:
installation/fluentbit/fluentbit-install.sh
lib/fluentbit_wendelin.so:
mkdir -p build/
cd build/
git clone https://lab.nexedi.com/nexedi/fluentbit-plugin-wendelin.git
cd fluentbit-plugin-wendelin
git checkout -b autostop origin/autostop
make install
install -d lib/
install build/fluentbit-plugin-wendelin/libfluentbit_wendelin.so lib/fluentbit_wendelin.so
cd ../../
#installation/plugin-fluentbit/fluentbit_wendelin_install.sh
install-no-dracut:
#-ln -f ../../metadata-collect-agent metadata-collect-agent
-ln -f metadata-collect-agent ../../dracut.module/90metadata-collect/metadata-collect-agent
#-ln -f metadata-collect-agent ../../debian.package.unsafe/unsafe-boot-metadata-collect-agent/sbin/metadata-collect-agent
no-dracut: bin/metadata-collect-agent bin/fluentbit lib/fluentbit_wendelin.so
.PHONY: no-dracut install-no-dracut
*.cpp
*.c
*.h
*.html
*.o
*.exe
*.lock
*.json
*.out
rust/target/
# A Comparison program between Cython+, Python and Rust
As a way to write a Cython+ program under real conditions, we ported an existing small program already written in Python and Rust to Cython+.
The program was chosen because it is short and because it is very conducive to parallel execution, allowing us to try out Cython+'s approach to concurrent programming.
## What it does
The program in question scans a filesystem from the root down (without following symbolic links) and:
- stores the results of a `stat` system call for each directory, file and symbolic link,
- computes messages digests from each file using `md5`, `sha1`, `sh256` and `sha512` algorithms,
- stores the target path of each symbolic link,
A JSON representation of the gathered information is then dumped in a file.
As Cython+ lacks an argument parser, the root directory is hardcoded as `/` and the output file as `result.json`.
The Python and Rust programs originally were making use of an argument parser, gathering additional information, and then uploading the result online, but they were simplified to match the description above because Cython+ currently lacks a good standard library, substantially increasing the required development work for even seemingly basic tasks.
## Building the Cython+ version
### Dependencies:
The program works on linux with Python3.7 and depends on the following:
- openssl library
- [fmt library](https://fmt.dev/latest/index.html)
- python3.7 development headers, which depending on the distribution may be packaged separately from python itself
- [Nexedi's cython+ compiler](https://lab.nexedi.com/nexedi/cython)
The paths to the openssl and fmt libraries should be updated in `cython/Makefile` and `cython/setup.py`.
The path to the python development headers shoudl be updated in `cython/Makefile`.
The path to the cython+ compiler should be added to environment variable `PYTHONPATH`.
### Building
First go to the cython subdirectory:
```
$ cd cython/
```
To build:
```
$ make
```
Or equivalently: `python3 setup.py build_ext --inplace`
To run:
```
$ make run
```
### Building without the Python runtime
The Cython compiler systematically links compiled programs against the python runtime, however this program never actually calls into the Python runtime.
There is no support yet in the Cython+ language to express independance from the Python runtime and generate code which does not need to include the Python headers. However in the meantime we introduce a hack: we only use the cython compiler to generate a C++ file which we then compile and link manually, but instead of providing the python runtime to link against we tell the linker to ignore linking errors. We also provide a `main` function as an entry point to bypass the entry point for the python interpreter.
This hack can only work because this program happens not to require the python runtime for anything.
No guarantees whatsoever are made that the program produced will behave as expected.
It has been tested and shown to work with g++.
To build:
```
$ make nopython
```
To run:
```
$ make runnopython
```
from distutils.core import setup
from distutils.extension import Extension
from Cython.Build import cythonize
extensions = [
Extension(
"main",
language="c++",
sources=["main.pyx"],
include_dirs = [
"/srv/slapgrid/slappart6/srv/runner/shared/openssl/24bd61db512fe6e4e0d214ae77943d75/include",
"/srv/slapgrid/slappart6/srv/runner/shared/fmtlib/d524cc3d1a798a140778558556ec6d0c/include"
],
library_dirs = [
"/srv/slapgrid/slappart6/srv/runner/shared/openssl/24bd61db512fe6e4e0d214ae77943d75/lib",
"/srv/slapgrid/slappart6/srv/runner/shared/fmtlib/d524cc3d1a798a140778558556ec6d0c/lib"
],
libraries = ["crypto", "fmt"],
extra_link_args=[
"-Wl,-rpath=/srv/slapgrid/slappart6/srv/runner/shared/openssl/24bd61db512fe6e4e0d214ae77943d75/lib",
"-Wl,-rpath=/srv/slapgrid/slappart6/srv/runner/shared/fmtlib/d524cc3d1a798a140778558556ec6d0c/lib"
],
),
]
setup(
ext_modules = cythonize(extensions)
)
This source diff could not be displayed because it is too large. You can view the blob instead.
use anyhow::Result;
use clap::{App, Arg};
use rayon::prelude::*;
use serde::Serialize;
use std::collections::HashMap;
use std::{
ffi::OsString,
fs::DirEntry,
path::PathBuf,
sync::{Arc, Mutex},
};
use std::process::Command;
use std::io::prelude::*;
use std::fs::OpenOptions;
#[derive(Debug, Serialize)]
struct FileStat {
st_dev: u64,
st_ino: u64,
st_nlink: u64,
st_mode: u32,
st_uid: u32,
st_gid: u32,
st_rdev: u64,
st_size: i64,
st_blksize: i64,
st_blocks: i64,
st_atime: i64,
st_atime_nsec: i64,
st_mtime: i64,
st_mtime_nsec: i64,
st_ctime: i64,
st_ctime_nsec: i64,
}
impl From<nix::sys::stat::FileStat> for FileStat {
fn from(x: nix::sys::stat::FileStat) -> Self {
Self {
st_dev: x.st_dev,
st_ino: x.st_ino,
st_nlink: x.st_nlink,
st_mode: x.st_mode,
st_uid: x.st_uid,
st_gid: x.st_gid,
st_rdev: x.st_rdev,
st_size: x.st_size,
st_blksize: x.st_blksize,
st_blocks: x.st_blocks,
st_atime: x.st_atime,
st_atime_nsec: x.st_atime_nsec,
st_mtime: x.st_mtime,
st_mtime_nsec: x.st_mtime_nsec,
st_ctime: x.st_ctime,
st_ctime_nsec: x.st_ctime_nsec,
}
}
}
#[derive(Default, Debug, Serialize)]
struct Tree {
childs: HashMap<String, Tree>,
stat: Option<FileStat>,
xattrs: HashMap<String, Option<Vec<u8>>>,
posix_acls: Option<String>,
ignored: bool,
symlink_target: Option<OsString>,
md5: Option<String>,
sha1: Option<String>,
sha256: Option<String>,
sha512: Option<String>,
}
#[derive(Serialize)]
struct Snapshot {
disk_partitions: Vec<psutil::disk::Partition>,
fs_tree: Tree,
}
fn construct_fs_tree(
cur_tree: Option<Tree>,
path: &PathBuf,
dev_whitelist: &Vec<u64>,
ignored_dirs: &Vec<PathBuf>,
) -> Result<Tree> {
let mut cur_tree = match cur_tree {
Some(cur_tree) => cur_tree,
None => Tree {
stat: Some(nix::sys::stat::lstat(path)?.into()),
..Tree::default()
},
};
if !dev_whitelist.iter().any(|x| match &cur_tree.stat {
Some(stat) if stat.st_dev == *x => true,
_ => false,
}) {
return Ok(cur_tree);
}
if ignored_dirs.iter().any(|x| path.starts_with(x)) {
cur_tree.ignored = true;
return Ok(cur_tree);
}
let entries: Vec<Result<DirEntry, _>> = match std::fs::read_dir(&path) {
Ok(x) => x,
_ => return Ok(cur_tree),
}
.collect();
let cur_tree = {
let cur_tree = Arc::new(Mutex::new(cur_tree));
entries.par_iter().for_each(|entry| match entry {
Ok(entry) => {
let mut tree = Tree {
stat: match nix::sys::stat::lstat(&entry.path()) {
Ok(s) => Some(s.into()),
_ => None,
},
posix_acls: match posix_acl::PosixACL::read_acl(&entry.path()) {
Ok(acl) => Some(acl.as_text()),
_ => None,
},
xattrs: match xattr::list(&entry.path()) {
Ok(xattrs) => xattrs
.filter_map(|name| match xattr::get(&entry.path(), &name) {
Ok(xattr) => Some((name.to_str().unwrap().to_string(), xattr)),
_ => None,
})
.collect(),
_ => HashMap::new(),
},
..Tree::default()
};
match entry.file_type() {
Ok(file_type) if file_type.is_dir() => {
tree = construct_fs_tree(
Some(tree),
&entry.path(),
dev_whitelist,
ignored_dirs,
)
.unwrap();
}
Ok(file_type) if file_type.is_file() => {
if let Ok(mut file) = std::fs::File::open(&entry.path()) {
use md5::{Digest, Md5};
use sha1::Sha1;
use sha2::{Sha256, Sha512};
let mut md5 = Md5::new();
let mut sha1 = Sha1::new();
let mut sha256 = Sha256::new();
let mut sha512 = Sha512::new();
let buf: &mut [u8] = &mut [0; 8 * 1024];
loop {
match file.read(buf) {
Ok(0) => {
tree.md5 = Some(hex::encode(md5.finalize()));
tree.sha1 = Some(hex::encode(sha1.finalize()));
tree.sha256 = Some(hex::encode(sha256.finalize()));
tree.sha512 = Some(hex::encode(sha512.finalize()));
break;
}
Ok(n) => {
md5.update(&buf[0..n - 1]);
sha1.update(&buf[0..n - 1]);
sha256.update(&buf[0..n - 1]);
sha512.update(&buf[0..n - 1]);
}
Err(e) if e.kind() == std::io::ErrorKind::Interrupted => {
continue;
}
Err(e) => {
eprintln!("{:#?}", e);
break;
}
};
}
}
}
Ok(file_type) if file_type.is_symlink() => {
tree.symlink_target = std::fs::read_link(&entry.path())
.map(|x| x.as_os_str().into())
.ok();
}
_ => {}
};
{
let mut locked = cur_tree.lock().unwrap();
locked
.childs
.insert(entry.file_name().to_str().unwrap().to_string(), tree);
}
}
_ => {}
});
Arc::try_unwrap(cur_tree).unwrap().into_inner().unwrap()
};
Ok(cur_tree)
}
fn main() -> Result<()> {
let m = App::new("metadata-collect-agent")
.args(&[
Arg::with_name("start_directory")
.required(true)
.takes_value(true)
.multiple(false),
Arg::with_name("ignored-dirs")
.takes_value(true)
.multiple(true)
.required(false),
])
.get_matches();
let ignored_dirs = m
.values_of("ignored-dirs")
.unwrap_or(clap::Values::default())
.map(PathBuf::from)
.collect();
let mut file = OpenOptions::new().write(true).append(true).create(true).open("/var/log/metadata_collect.log").unwrap();
let mut child = Command::new("/sbin/fluent-bit").args(&["-e","/etc/fluentbit_wendelin.so","-c","/etc/flb.conf"]).spawn().unwrap();
let disk_partitions = psutil::disk::partitions_physical()?;
let dev_whitelist = disk_partitions
.iter()
.map(|p| nix::sys::stat::lstat(p.mountpoint()).unwrap().st_dev)
.collect();
let snapshot = Snapshot {
disk_partitions,
fs_tree: construct_fs_tree(
None,
&PathBuf::from(m.value_of("start_directory").unwrap()),
&dev_whitelist,
&ignored_dirs,
)?,
};
let packed = serde_json::to_string(&snapshot)?;
for s in packed.split(":{"){
file.write_all((s.to_owned()+"\n").as_bytes()).expect("Unable to write to log");
}
file.write_all("fluentbit_end\n".as_bytes()).expect("Unable to write to log");
println!("finished to write file");
let _result = child.wait().unwrap();
Ok(())
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment