Commit 4ffdaae6 authored by Xavier Thompson's avatar Xavier Thompson

Refactor scan-filesystem

parent 9f34d772
ifeq (,)
INCLUDE_DIRS = -I/usr/include/python3.8
else
INCLUDE_PYTHON = -I/srv/slapgrid/slappart6/srv/runner/shared/python3/2e435adf7e2cb7d97668da52532ac7f3/include/python3.7m
OPENSSL_PATH = /srv/slapgrid/slappart6/srv/runner/shared/openssl/24bd61db512fe6e4e0d214ae77943d75
INCLUDE_OPENSSL = -I$(OPENSSL_PATH)/include
LIBRARY_OPENSSL = -L$(OPENSSL_PATH)/lib
RUNPATH_OPENSSL = -Wl,-rpath=$(OPENSSL_PATH)/lib
FMTLIB_PATH = /srv/slapgrid/slappart6/srv/runner/shared/fmtlib/d524cc3d1a798a140778558556ec6d0c
INCLUDE_FMTLIB = -I$(FMTLIB_PATH)/include
LIBRARY_FMTLIB = -L$(FMTLIB_PATH)/lib
RUNPATH_FMTLIB = -Wl,-rpath=$(FMTLIB_PATH)/lib
INCLUDE_DIRS = $(INCLUDE_PYTHON) $(INCLUDE_OPENSSL) $(INCLUDE_FMTLIB)
LIBPATHS = $(LIBRARY_OPENSSL) $(LIBRARY_FMTLIB)
RUNPATHS = $(RUNPATH_OPENSSL) $(RUNPATH_FMTLIB)
LDFLAGS = $(LIBPATHS) $(RUNPATHS)
endif
EXE = main
CXX = g++
CPPFLAGS = -O2 -g -Wno-unused-result -Wsign-compare -pthread $(INCLUDE_DIRS)
......@@ -34,17 +6,6 @@ LDLIBS = -lcrypto -lfmt
EXT_SUFFIX := $(shell python3 -c "import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX'))")
EXT = $(EXE)$(EXT_SUFFIX)
# Build with Python runtime
all: $(EXT)
$(EXT): setup.py
@echo "[Cython Compiling $^ -> $@]"
python3 setup.py build_ext --inplace
# Run with Python runtime
run: $(EXT)
python3 -c "import $(EXE); $(EXE).python_main()" 2>/dev/null
# Build without Python runtime
nopython: $(EXE)
......@@ -57,10 +18,6 @@ nopython: $(EXE)
@echo "[C++ Compiling $^ -> $@]"
$(LINK.cpp) $^ $(LOADLIBES) $(LDLIBS) -o $@
# Run without Python runtime
runnopython: $(EXE)
./$(EXE) 2>/dev/null
clean:
-rm -f *.c *.cpp *.html
-rm -f *.h
......
# distutils: language = c++
# distutils: libraries = fmt crypto
from libcythonplus.list cimport cyplist
from stdlib.list cimport List
from stdlib.string cimport Str
from stdlib.format cimport format
from libc.stdio cimport fprintf, fopen, fclose, fread, fwrite, FILE, stdout, printf, ferror
from runtime.runtime cimport Scheduler, BatchMailBox
from runtime.runtime cimport SequentialMailBox, BatchMailBox, NullResult, Scheduler
from stdlib.stat cimport Stat, dev_t
from stdlib.digest cimport MessageDigest, md5sum, sha1sum, sha256sum, sha512sum
from stdlib.fmt cimport sprintf
from stdlib.string cimport string
from stdlib.dirent cimport DIR, struct_dirent, opendir, readdir, closedir
from util.hashlib cimport MessageDigest, md5sum, sha1sum, sha256sum, sha512sum
from util.stat cimport Stat, dev_t
from posix.unistd cimport readlink
cimport util.sys as sys
from util.sys cimport FILE, DIR
cdef lock Scheduler scheduler
cdef Str curdir = Str(".")
cdef Str pardir = Str("..")
cdef Str sep = Str("/")
cdef lock Scheduler scheduler = Scheduler()
cdef cypclass Node activable:
string path
string name
Stat st
string formatted
Str path
Stat stat
Str json
__init__(self, string path, string name, Stat st):
self._active_result_class = NullResult
__init__(self, Str path, Stat stat):
self._active_queue_class = consume BatchMailBox(scheduler)
self.path = path
self.name = name
self.st = st
self.stat = stat
void build_node(self, lock cyplist[dev_t] dev_whitelist, lock cyplist[string] ignore_paths):
# abstract
pass
void build_node(self, lock List[dev_t] dev_whitelist, lock List[Str] ignore_paths): pass
void format_node(self):
self.formatted = sprintf("""\
{
"%s": {
"stat": %s
}
},
self.json = format("""\
{{
"{}": {{
"stat": {}
}}
}},
""",
self.path,
self.st.to_json(),
self.stat.to_json(),
)
void write_node(self, FILE * stream):
# abstract
pass
void write_node(self, FILE * stream): pass
cdef iso Node make_node(string path, string name) nogil:
s = Stat(path)
cdef iso Node make_node(iso Str path) nogil:
cdef Node node
p = <Str> consume path
s = Stat(p)
if s is NULL:
return NULL
node = NULL
elif s.is_symlink():
return consume SymlinkNode(path, name, consume s)
node = SymlinkNode(p, s)
elif s.is_dir():
return consume DirNode(path, name, consume s)
node = DirNode(p, s)
elif s.is_regular():
return consume FileNode(path, name, consume s)
return NULL
node = FileNode(p, s)
else:
node = NULL
del p, s
return consume node
cdef cypclass DirNode(Node):
cyplist[active Node] children
__init__(self, string path, string name, Stat st):
Node.__init__(self, path, name, st)
self.children = new cyplist[active Node]()
self.children.__init__()
ctypedef List[active Node] Children
Children children
void build_node(self, lock cyplist[dev_t] dev_whitelist, lock cyplist[string] ignore_paths):
cdef DIR *d
cdef struct_dirent *entry
cdef string entry_name
cdef string entry_path
__init__(self, Str path, Stat stat):
Node.__init__(self, path, stat)
self.children = Children()
void build_node(self, lock List[dev_t] dev_whitelist, lock List[Str] ignore_paths):
if ignore_paths is not NULL:
if self.path in ignore_paths:
if consume self.path.copy() in ignore_paths:
return
if dev_whitelist is not NULL:
if self.st is NULL:
if self.stat is NULL:
return
elif not self.st.st_data.st_dev in dev_whitelist:
elif not self.stat.st_data.st_dev in dev_whitelist:
return
d = opendir(self.path.c_str())
if d is not NULL:
while 1:
entry = readdir(d)
if entry is NULL:
break
entry_name = entry.d_name
if entry_name == b'.' or entry_name == b'..':
entries = sys.listdir(self.path)
if entries is not NULL:
for name in entries:
if name == curdir or name == pardir:
continue
entry_path = self.path
if entry_path != b'/':
entry_path += b'/'
entry_path += entry_name
entry_node = make_node(entry_path, entry_name)
if entry_node is NULL:
path = self.path
if Str(path[-1]) != sep:
path = path + sep
path = path + name
child = make_node(consume path)
if child is NULL:
continue
active_entry = activate(consume entry_node)
self.children.append(active_entry)
closedir(d)
self.children.append(activate(consume child))
self.format_node()
......@@ -115,38 +107,35 @@ cdef cypclass DirNode(Node):
active_child.build_node(NULL, dev_whitelist, ignore_paths)
void write_node(self, FILE * stream):
fwrite(self.formatted.data(), 1, self.formatted.size(), stream)
sys.write(self.json, stream)
while self.children.__len__() > 0:
active_child = self.children[self.children.__len__() -1]
del self.children[self.children.__len__() -1]
active_child = self.children.pop()
child = consume active_child
child.write_node(stream)
cdef enum:
BUFSIZE = 64 * 1024
CHUNK = 64 * 1024
cdef cypclass FileNode(Node):
string md5_data
string sha1_data
string sha256_data
string sha512_data
Str md5_data
Str sha1_data
Str sha256_data
Str sha512_data
bint error
__init__(self, string path, string name, Stat st):
Node.__init__(self, path, name, st)
__init__(self, Str path, Stat stat):
Node.__init__(self, path, stat)
self.error = False
void build_node(self, lock cyplist[dev_t] dev_whitelist, lock cyplist[string] ignore_paths):
cdef unsigned char buffer[BUFSIZE]
cdef bint eof = False
void build_node(self, lock List[dev_t] dev_whitelist, lock List[Str] ignore_paths):
cdef bint md5_ok
cdef bint sha1_ok
cdef bint sha256_ok
cdef bint sha512_ok
cdef FILE * file = fopen(self.path.c_str(), 'rb')
cdef FILE * file = sys.open(self.path, 'rb')
if file is NULL:
self.error = True
......@@ -163,20 +152,21 @@ cdef cypclass FileNode(Node):
sha256_ok = sha256 is not NULL
sha512_ok = sha512 is not NULL
while not eof and (md5_ok or sha1_ok or sha256_ok or sha512_ok):
size = fread(buffer, 1, BUFSIZE, file)
if size != BUFSIZE:
self.error = ferror(file)
if self.error:
while (md5_ok or sha1_ok or sha256_ok or sha512_ok):
s = sys.read(file, CHUNK)
if s is NULL:
self.error = True
break
eof = True
if md5_ok: md5_ok = md5.update(buffer, size) == 0
if sha1_ok: sha1_ok = sha1.update(buffer, size) == 0
if sha256_ok: sha256_ok = sha256.update(buffer, size) == 0
if sha512_ok: sha512_ok = sha512.update(buffer, size) == 0
if md5_ok: md5_ok = md5.update(s) == 0
if sha1_ok: sha1_ok = sha1.update(s) == 0
if sha256_ok: sha256_ok = sha256.update(s) == 0
if sha512_ok: sha512_ok = sha512.update(s) == 0
fclose(file)
if s.__len__() != CHUNK:
break
sys.close(file)
if not self.error:
if md5_ok: self.md5_data = md5.hexdigest()
......@@ -190,21 +180,21 @@ cdef cypclass FileNode(Node):
if self.error:
Node.format_node(self)
else:
self.formatted = sprintf("""\
{
"%s": {
"stat": %s,
"digests": {
"md5": "%s",
"sha1": "%s",
"sha256": "%s",
"sha512": "%s"
}
}
},
self.json = format("""\
{{
"{}": {{
"stat": {},
"digests": {{
"md5": "{}",
"sha1": "{}",
"sha256": "{}",
"sha512": "{}"
}}
}}
}},
""",
self.path,
self.st.to_json(),
self.stat.to_json(),
self.md5_data,
self.sha1_data,
self.sha256_data,
......@@ -212,63 +202,55 @@ cdef cypclass FileNode(Node):
)
void write_node(self, FILE * stream):
fwrite(self.formatted.data(), 1, self.formatted.size(), stream)
sys.write(self.json, stream)
cdef cypclass SymlinkNode(Node):
string target
int error
void build_node(self, lock cyplist[dev_t] dev_whitelist, lock cyplist[string] ignore_paths):
size = self.st.st_data.st_size + 1
self.target.resize(size)
real_size = readlink(self.path.c_str(), <char*> self.target.data(), size)
self.error = not (0 < real_size < size)
self.target.resize(real_size)
Str target
void build_node(self, lock List[dev_t] dev_whitelist, lock List[Str] ignore_paths):
self.target = sys.readlink(self.path, self.stat.st_data.st_size)
self.format_node()
void format_node(self):
if self.error:
if self.target is NULL:
Node.format_node(self)
else:
self.formatted = sprintf("""\
{
"%s": {
"stat": %s,
"target": "%s"
}
},
self.json = format("""\
{{
"{}": {{
"stat": {},
"target": {}"
}}
}},
""",
self.path,
self.st.to_json(),
self.stat.to_json(),
self.target,
)
void write_node(self, FILE * stream):
fwrite(self.formatted.data(), 1, self.formatted.size(), stream)
sys.write(self.json, stream)
cdef int start(string path) nogil:
global scheduler
scheduler = Scheduler()
ignore_paths = cyplist[string]()
ignore_paths.append(b'/opt/slapgrid')
ignore_paths.append(b'/srv/slapgrid')
cdef int scan(iso Str root) nogil:
ignore_paths = List[Str]()
ignore_paths.append(Str("/opt/slapgrid"))
ignore_paths.append(Str("/srv/slapgrid"))
dev_whitelist_paths = cyplist[string]()
dev_whitelist_paths.append(b'.')
dev_whitelist_paths.append(b'/')
dev_whitelist_paths.append(b'/boot')
dev_whitelist_paths = List[Str]()
dev_whitelist_paths.append(Str("."))
dev_whitelist_paths.append(Str("/"))
dev_whitelist_paths.append(Str("/boot"))
dev_whitelist = cyplist[dev_t]()
dev_whitelist = List[dev_t]()
for p in dev_whitelist_paths:
p_stat = Stat(p)
if p_stat is not NULL:
p_dev = p_stat.st_data.st_dev
dev_whitelist.append(p_dev)
node = make_node(path, path)
node = make_node(consume root)
if node is NULL:
return -1
......@@ -276,26 +258,17 @@ cdef int start(string path) nogil:
active_node.build_node(NULL, consume dev_whitelist, consume ignore_paths)
scheduler.finish()
scheduler.join()
node = consume active_node
result = fopen('result.json', 'w')
if result is NULL:
return -1
fprintf(result, '[\n')
node.write_node(result)
fprintf(result, ' {}\n]\n')
fclose(result)
del scheduler
sys.write(Str("[\n"), sys.stdout)
node.write_node(sys.stdout)
sys.write(Str(" {}\n]\n"), sys.stdout)
return 0
cdef public int main() nogil:
return start(<char*>'.')
def main():
with nogil:
scan(consume Str("."))
def python_main():
start(<char*>'.')
cdef extern from "<sys/types.h>" nogil:
ctypedef long unsigned int pthread_t
ctypedef union pthread_attr_t:
pass
ctypedef union pthread_mutex_t:
pass
ctypedef union pthread_mutexattr_t:
pass
ctypedef union pthread_barrier_t:
pass
ctypedef union pthread_barrierattr_t:
pass
ctypedef union pthread_cond_t:
pass
ctypedef union pthread_condattr_t:
pass
cdef extern from "<pthread.h>" nogil:
int pthread_create(pthread_t *, const pthread_attr_t *, void *(*)(void *), void *)
void pthread_exit(void *)
int pthread_join(pthread_t, void **)
int pthread_cancel(pthread_t thread)
int pthread_attr_init(pthread_attr_t *)
int pthread_attr_setdetachstate(pthread_attr_t *, int)
int pthread_attr_destroy(pthread_attr_t *)
int pthread_mutex_init(pthread_mutex_t *, const pthread_mutexattr_t *)
int pthread_mutex_destroy(pthread_mutex_t *)
int pthread_mutex_lock(pthread_mutex_t *)
int pthread_mutex_unlock(pthread_mutex_t *)
int pthread_mutex_trylock(pthread_mutex_t *)
int pthread_barrier_init(pthread_barrier_t *, const pthread_barrierattr_t *, unsigned int)
int pthread_barrier_destroy(pthread_barrier_t *)
int pthread_barrier_wait(pthread_barrier_t *)
int pthread_cond_init(pthread_cond_t * cond, const pthread_condattr_t * attr)
int pthread_cond_destroy(pthread_cond_t *cond)
int pthread_cond_wait(pthread_cond_t * cond, pthread_mutex_t * mutex)
int pthread_cond_broadcast(pthread_cond_t *cond)
int pthread_cond_signal(pthread_cond_t *cond)
enum: PTHREAD_CREATE_JOINABLE
\ No newline at end of file
# distutils: language = c++
from libcpp.deque cimport deque
from libcpp.vector cimport vector
from libcpp.atomic cimport atomic
from libc.stdio cimport printf
from libc.stdlib cimport rand
from posix.unistd cimport sysconf
from runtime.pthreads cimport *
from runtime.semaphore cimport *
cdef extern from "<unistd.h>" nogil:
enum: _SC_NPROCESSORS_ONLN # Seems to not be included in "posix.unistd".
cdef cypclass Scheduler
cdef cypclass Worker
# The 'inline' qualifier on this function is a hack to convince Cython to allow a definition in a .pxd file.
# The C compiler will dismiss it because we pass the function pointer to create a thread which prevents inlining.
cdef inline void * worker_function(void * arg) nogil:
worker = <lock Worker> arg
sch = <Scheduler> <void*> worker.scheduler
# Wait until all the workers are ready.
pthread_barrier_wait(&sch.barrier)
while 1:
# Wait until a queue becomes available.
sem_wait(&sch.num_free_queues)
# If the scheduler is done there is nothing to do anymore.
if sch.is_done:
return <void*> 0
# Pop or steal a queue.
queue = worker.get_queue()
with wlocked queue:
# Do one task on the queue.
queue.activate()
if queue.is_empty():
# Mark the empty queue as not assigned to any worker.
queue.has_worker = False
# Decrement the number of non-completed queues.
if sch.num_pending_queues.fetch_sub(1) == 1:
# Signal that there are no more queues.
sem_post(&sch.done)
# Discard the empty queue and continue the main loop.
continue
# The queue is not empty: reinsert it in this worker's queues.
worker.queues.push_back(queue)
# Signal that the queue is available.
sem_post(&sch.num_free_queues)
cdef cypclass Worker:
deque[lock SequentialMailBox] queues
lock Scheduler scheduler
pthread_t thread
lock Worker __new__(alloc, lock Scheduler scheduler):
instance = consume alloc()
instance.scheduler = scheduler
locked_instance = <lock Worker> consume instance
if not pthread_create(&locked_instance.thread, NULL, worker_function, <void *> locked_instance):
return locked_instance
printf("pthread_create() failed\n")
lock SequentialMailBox get_queue(lock self):
# Get the next queue in the worker's list or steal one.
with wlocked self:
if not self.queues.empty():
queue = self.queues.front()
self.queues.pop_front()
return queue
return self.steal_queue()
lock SequentialMailBox steal_queue(lock self):
# Steal a queue from another worker:
# - inspect each worker in order starting at a random offset
# - skip any worker with an empty queue list
# - return the last queue of the first worker with a non-empty list
# - continue looping until a queue is found
cdef int i, index, num_workers, random_offset
sch = <Scheduler> <void*> self.scheduler
num_workers = <int> sch.workers.size()
index = rand() % num_workers
while True:
victim = sch.workers[index]
with wlocked victim:
if not victim.queues.empty():
stolen_queue = victim.queues.back()
victim.queues.pop_back()
return stolen_queue
index += 1
if index >= num_workers:
index = 0
int join(self):
# Join the worker thread.
return pthread_join(self.thread, NULL)
cdef cypclass Scheduler:
vector[lock Worker] workers
pthread_barrier_t barrier
sem_t num_free_queues
atomic[int] num_pending_queues
sem_t done
volatile bint is_done
int num_workers
lock Scheduler __new__(alloc, int num_workers=0):
self = <lock Scheduler> consume alloc()
if num_workers == 0: num_workers = sysconf(_SC_NPROCESSORS_ONLN)
self.num_workers = num_workers
sem_init(&self.num_free_queues, 0, 0)
sem_init(&self.done, 0, 0)
self.num_pending_queues.store(0)
if pthread_barrier_init(&self.barrier, NULL, num_workers + 1):
printf("Could not allocate memory for the thread barrier\n")
# Signal that no work will be done.
sem_post(&self.done)
return self
self.is_done = False
self.workers.reserve(num_workers)
for i in range(num_workers):
worker = Worker(self)
if worker is NULL:
# Signal that no work will be done.
sem_post(&self.done)
return self
self.workers.push_back(worker)
# Wait until all the worker threads are ready.
pthread_barrier_wait(&self.barrier)
return self
__dealloc__(self):
pthread_barrier_destroy(&self.barrier)
sem_destroy(&self.num_free_queues)
sem_destroy(&self.done)
void post_queue(lock self, lock SequentialMailBox queue):
cdef int num_workers, random_offset
sch = <Scheduler> <void*> self
# Add a queue to a random worker.
num_workers = <int> sch.workers.size()
random_offset = rand() % num_workers
receiver = sch.workers[random_offset]
with wlocked receiver:
queue.has_worker = True
receiver.queues.push_back(queue)
# Increment the number of non-completed queues.
sch.num_pending_queues.fetch_add(1)
# Signal that a queue is available.
sem_post(&sch.num_free_queues)
void finish(lock self):
# Wait until there is no more work.
done = &self.done
sem_wait(done)
# Signal the worker threads that there is no more work.
self.is_done = True
# Pretend that there are new queues to wake up the workers.
num_free_queues = &self.num_free_queues
for worker in self.workers:
sem_post(num_free_queues)
# Clear the workers to break reference cycles.
self.workers.clear()
cdef cypclass SequentialMailBox(ActhonQueueInterface):
deque[ActhonMessageInterface] messages
lock Scheduler scheduler
bint has_worker
__init__(self, lock Scheduler scheduler):
self.scheduler = scheduler
self.has_worker = False
bint is_empty(const self):
return self.messages.empty()
void push(locked self, ActhonMessageInterface message):
# Add a task to the queue.
self.messages.push_back(message)
if message._sync_method is not NULL:
message._sync_method.insertActivity()
# If no worker is already assigned this queue
# register it with the scheduler.
if not self.has_worker:
self.scheduler.post_queue(self)
bint activate(self):
# Try to process the first message in the queue.
cdef bint one_message_processed
if self.messages.empty():
return False
next_message = self.messages.front()
self.messages.pop_front()
one_message_processed = next_message.activate()
if one_message_processed:
if next_message._sync_method is not NULL:
next_message._sync_method.removeActivity()
else:
printf("Pushed front message to back :/\n")
self.messages.push_back(next_message)
return one_message_processed
cdef cypclass BatchMailBox(SequentialMailBox):
bint activate(self):
# Process as many messages as possible.
while not self.messages.empty():
next_message = self.messages.front()
self.messages.pop_front()
if not next_message.activate():
printf("Pushed front message to back :/\n")
self.messages.push_back(next_message)
return False
if next_message._sync_method is not NULL:
next_message._sync_method.removeActivity()
return True
cdef inline ActhonResultInterface NullResult() nogil:
return NULL
cdef extern from "<semaphore.h>" nogil:
ctypedef struct sem_t:
pass
int sem_init(sem_t *sem, int pshared, unsigned int value)
int sem_wait(sem_t *sem)
int sem_post(sem_t *sem)
int sem_getvalue(sem_t *, int *)
int sem_destroy(sem_t* sem)
from stdlib.string cimport string
cdef extern from "<fmt/printf.h>" namespace "fmt" nogil:
ctypedef struct FILE
int printf (const char* template, ...)
int fprintf (FILE *stream, const char* template, ...)
string sprintf (const char* template, ...)
# Differences with libcpp.string:
#
# - declarations for operator+= have been added.
cdef extern from "<string>" namespace "std" nogil:
size_t npos = -1
cdef cppclass string:
cppclass iterator:
iterator()
char& operator*()
iterator(iterator &)
iterator operator++()
iterator operator--()
bint operator==(iterator)
bint operator!=(iterator)
cppclass reverse_iterator:
char& operator*()
iterator operator++()
iterator operator--()
iterator operator+(size_t)
iterator operator-(size_t)
bint operator==(reverse_iterator)
bint operator!=(reverse_iterator)
bint operator<(reverse_iterator)
bint operator>(reverse_iterator)
bint operator<=(reverse_iterator)
bint operator>=(reverse_iterator)
cppclass const_iterator(iterator):
pass
cppclass const_reverse_iterator(reverse_iterator):
pass
string() except +
string(const char *) except +
string(const char *, size_t) except +
string(const string&) except +
# as a string formed by a repetition of character c, n times.
string(size_t, char) except +
# from a pair of iterators
string(iterator first, iterator last) except +
iterator begin()
const_iterator const_begin "begin"()
iterator end()
const_iterator const_end "end"()
reverse_iterator rbegin()
const_reverse_iterator const_rbegin "rbegin"()
reverse_iterator rend()
const_reverse_iterator const_rend "rend"()
const char* c_str()
const char* data()
size_t size()
size_t max_size()
size_t length()
void resize(size_t)
void resize(size_t, char c)
size_t capacity()
void reserve(size_t)
void clear()
bint empty()
iterator erase(iterator position)
iterator erase(const_iterator position)
iterator erase(iterator first, iterator last)
iterator erase(const_iterator first, const_iterator last)
char& at(size_t)
char& operator[](size_t)
char& front() # C++11
char& back() # C++11
int compare(const string&)
string& operator+=(const string&)
string& operator+=(char)
string& operator+=(const char *)
string& append(const string&)
string& append(const string&, size_t, size_t)
string& append(const char *)
string& append(const char *, size_t)
string& append(size_t, char)
void push_back(char c)
string& assign (const string&)
string& assign (const string&, size_t, size_t)
string& assign (const char *, size_t)
string& assign (const char *)
string& assign (size_t n, char c)
string& insert(size_t, const string&)
string& insert(size_t, const string&, size_t, size_t)
string& insert(size_t, const char* s, size_t)
string& insert(size_t, const char* s)
string& insert(size_t, size_t, char c)
size_t copy(char *, size_t, size_t)
size_t find(const string&)
size_t find(const string&, size_t)
size_t find(const char*, size_t pos, size_t)
size_t find(const char*, size_t pos)
size_t find(char, size_t pos)
size_t rfind(const string&, size_t)
size_t rfind(const char* s, size_t, size_t)
size_t rfind(const char*, size_t pos)
size_t rfind(char c, size_t)
size_t rfind(char c)
size_t find_first_of(const string&, size_t)
size_t find_first_of(const char* s, size_t, size_t)
size_t find_first_of(const char*, size_t pos)
size_t find_first_of(char c, size_t)
size_t find_first_of(char c)
size_t find_first_not_of(const string&, size_t)
size_t find_first_not_of(const char* s, size_t, size_t)
size_t find_first_not_of(const char*, size_t pos)
size_t find_first_not_of(char c, size_t)
size_t find_first_not_of(char c)
size_t find_last_of(const string&, size_t)
size_t find_last_of(const char* s, size_t, size_t)
size_t find_last_of(const char*, size_t pos)
size_t find_last_of(char c, size_t)
size_t find_last_of(char c)
size_t find_last_not_of(const string&, size_t)
size_t find_last_not_of(const char* s, size_t, size_t)
size_t find_last_not_of(const char*, size_t pos)
string substr(size_t, size_t)
string substr()
string substr(size_t)
size_t find_last_not_of(char c, size_t)
size_t find_last_not_of(char c)
#string& operator= (const string&)
#string& operator= (const char*)
#string& operator= (char)
string operator+ (const string& rhs)
string operator+ (const char* rhs)
bint operator==(const string&)
bint operator==(const char*)
bint operator!= (const string& rhs )
bint operator!= (const char* )
bint operator< (const string&)
bint operator< (const char*)
bint operator> (const string&)
bint operator> (const char*)
bint operator<= (const string&)
bint operator<= (const char*)
bint operator>= (const string&)
bint operator>= (const char*)
# distutils: language = c++
from stdlib.string cimport string
cdef extern from "<openssl/evp.h>" nogil:
ctypedef struct EVP_MD_CTX:
pass
......@@ -44,33 +39,3 @@ cdef extern from * nogil:
'''
cdef const char hexdigits[]
cdef cypclass MessageDigest:
EVP_MD_CTX * md_ctx
MessageDigest __new__(alloc, const EVP_MD * algo):
md_ctx = EVP_MD_CTX_create()
if md_ctx is NULL:
return NULL
if EVP_DigestInit_ex(md_ctx, algo, NULL) != 1:
return NULL
instance = alloc()
instance.md_ctx = md_ctx
return instance
__dealloc__(self):
EVP_MD_CTX_destroy(md_ctx)
int update(self, unsigned char * message, size_t size):
return EVP_DigestUpdate(self.md_ctx, message, size) - 1
string hexdigest(self):
cdef char result[EVP_MAX_MD_SIZE*2]
cdef unsigned char hashbuffer[EVP_MAX_MD_SIZE]
cdef unsigned int size
cdef unsigned int i
if EVP_DigestFinal_ex(self.md_ctx, hashbuffer, &size) == 1:
for i in range(size):
result[2*i] = hexdigits[hashbuffer[i] >> 4]
result[2*i+1] = hexdigits[hashbuffer[i] & 15]
return string(result, 2*size)
# distutils: language = c++
# Differences with posix.stat:
#
# - the declaration for the non-standard field st_birthtime was removed
......@@ -12,11 +10,18 @@
# were replaced by the fields st_atim, st_mtim, st_ctim
# of type struct timespec.
from stdlib.string cimport string
from stdlib.fmt cimport sprintf
from posix.types cimport (blkcnt_t, blksize_t, dev_t, gid_t, ino_t, mode_t,
nlink_t, off_t, time_t, uid_t)
from posix.types cimport (blkcnt_t,
blksize_t,
dev_t,
gid_t,
ino_t,
mode_t,
nlink_t,
off_t,
time_t,
uid_t)
cdef extern from "<sys/time.h>" nogil:
cdef struct struct_timespec "timespec":
......@@ -40,7 +45,8 @@ cdef extern from "<sys/stat.h>" nogil:
struct_timespec st_mtim
struct_timespec st_ctim
# POSIX prescribes including both <sys/stat.h> and <unistd.h> for these
# POSIX dictates including both <sys/stat.h> and <unistd.h> for these
cdef extern from "<unistd.h>" nogil:
int fchmod(int, mode_t)
int chmod(const char *, mode_t)
......@@ -87,59 +93,3 @@ cdef extern from "<unistd.h>" nogil:
mode_t S_IWOTH
mode_t S_IXOTH
# Cypclass to expose minimal stat support.
cdef cypclass Stat:
struct_stat st_data
Stat __new__(alloc, string path):
instance = alloc()
if not lstat(path.c_str(), &instance.st_data):
return instance
bint is_regular(self):
return S_ISREG(self.st_data.st_mode)
bint is_symlink(self):
return S_ISLNK(self.st_data.st_mode)
bint is_dir(self):
return S_ISDIR(self.st_data.st_mode)
string to_json(self):
return sprintf("""{
"st_dev": %lu,
"st_ino": %lu,
"st_mode": %lu,
"st_nlink": %lu,
"st_uid": %d,
"st_gid": %d,
"st_rdev": %lu,
"st_size": %ld,
"st_blksize": %ld,
"st_blocks": %ld,
"st_atime": %ld,
"st_mtime": %ld,
"st_ctime": %ld,
"st_atime_ns": %ld,
"st_mtime_ns": %ld,
"st_ctime_ns": %ld
}""",
self.st_data.st_dev,
self.st_data.st_ino,
self.st_data.st_mode,
self.st_data.st_nlink,
self.st_data.st_uid,
self.st_data.st_gid,
self.st_data.st_rdev,
self.st_data.st_size,
self.st_data.st_blksize,
self.st_data.st_blocks,
self.st_data.st_atim.tv_sec,
self.st_data.st_mtim.tv_sec,
self.st_data.st_ctim.tv_sec,
self.st_data.st_atim.tv_nsec,
self.st_data.st_mtim.tv_nsec,
self.st_data.st_ctim.tv_nsec,
)
from posix.types cimport ino_t
cdef extern from "<sys/types.h>" nogil:
ctypedef struct DIR
cdef extern from "<dirent.h>" nogil:
cdef struct struct_dirent "dirent":
ino_t d_ino
......@@ -12,3 +14,4 @@ cdef extern from "<dirent.h>" nogil:
struct_dirent *readdir(DIR *dirp)
int readdir_r(DIR *dirp, struct_dirent *entry, struct_dirent **result)
int closedir(DIR *dirp)
from stdlib.string cimport Str
from ._hashlib cimport *
cdef cypclass MessageDigest:
EVP_MD_CTX * md_ctx
MessageDigest __new__(alloc, const EVP_MD * algo):
md_ctx = EVP_MD_CTX_create()
if md_ctx is NULL:
return NULL
if EVP_DigestInit_ex(md_ctx, algo, NULL) != 1:
return NULL
instance = alloc()
instance.md_ctx = md_ctx
return instance
__dealloc__(self):
EVP_MD_CTX_destroy(md_ctx)
int update(self, Str message):
bytes = <unsigned char * > message.bytes()
return EVP_DigestUpdate(self.md_ctx, bytes, message._str.size()) - 1
Str hexdigest(self):
cdef char result[EVP_MAX_MD_SIZE*2 + 1]
cdef unsigned char hashbuffer[EVP_MAX_MD_SIZE]
cdef unsigned int size
cdef unsigned int i
if EVP_DigestFinal_ex(self.md_ctx, hashbuffer, &size) == 1:
for i in range(size):
result[2*i] = hexdigits[hashbuffer[i] >> 4]
result[2*i+1] = hexdigits[hashbuffer[i] & 15]
result[size] = 0
return Str(result)
# distutils: language = c++
from stdlib.string cimport Str
from stdlib.format cimport format
from ._stat cimport *
cdef cypclass Stat:
struct_stat st_data
Stat __new__(alloc, Str path):
instance = alloc()
if not lstat(path.bytes(), &instance.st_data):
return instance
bint is_regular(self):
return S_ISREG(self.st_data.st_mode)
bint is_symlink(self):
return S_ISLNK(self.st_data.st_mode)
bint is_dir(self):
return S_ISDIR(self.st_data.st_mode)
Str to_json(self):
return format("""{{
"st_dev": {},
"st_ino": {},
"st_mode": {},
"st_nlink": {},
"st_uid": {},
"st_gid": {},
"st_rdev": {},
"st_size": {},
"st_blksize": {},
"st_blocks": {},
"st_atime": {},
"st_mtime": {},
"st_ctime": {},
"st_atime_ns": {},
"st_mtime_ns": {},
"st_ctime_ns": {}
}}""",
self.st_data.st_dev,
self.st_data.st_ino,
self.st_data.st_mode,
self.st_data.st_nlink,
self.st_data.st_uid,
self.st_data.st_gid,
self.st_data.st_rdev,
self.st_data.st_size,
self.st_data.st_blksize,
self.st_data.st_blocks,
self.st_data.st_atim.tv_sec,
self.st_data.st_mtim.tv_sec,
self.st_data.st_ctim.tv_sec,
self.st_data.st_atim.tv_nsec,
self.st_data.st_mtim.tv_nsec,
self.st_data.st_ctim.tv_nsec,
)
from stdlib.string cimport Str
from stdlib.list cimport List
from libc.errno cimport errno
from libc.stdio cimport FILE, fopen, fclose, fread, fwrite, ferror
from libc.stdio cimport stdin, stdout, stderr
from posix cimport unistd
from ._sys cimport DIR, struct_dirent, opendir, readdir, closedir
cdef enum:
_BUFSIZE = 64 * 1024
cdef inline FILE * open(Str path, char * mode) nogil:
return fopen(path.bytes(), mode)
cdef inline Str read(FILE * file, int nbytes) nogil:
s = Str()
s._str.append(_BUFSIZE, 0)
cdef int size
size = fread(s._str.data(), 1, nbytes, file)
if size == nbytes or not ferror(file):
s._str.resize(size)
return s
cdef inline int close(FILE * file) nogil:
return fclose(file)
cdef inline int write(Str data, FILE * file) nogil:
return fwrite(data.bytes(), 1, data._str.size(), file)
cdef inline List[Str] listdir(Str path) nogil:
d = opendir(path.bytes())
if d is NULL:
return NULL
entries = List[Str]()
while True:
errno = 0
entry = readdir(d)
if entry is NULL:
break
entries.append(Str(entry.d_name))
read_error = errno
closedir(d)
if read_error:
return NULL
return entries
cdef inline Str readlink(Str path, int max_size) nogil:
s = Str()
s._str.resize(max_size)
size = unistd.readlink(path.bytes(), <char*> s._str.data(), max_size)
if size == -1:
return NULL
s._str.resize(size)
return s
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment