Commit 05d5030a authored by Xavier Thompson's avatar Xavier Thompson

Improve and simplify scan-filsystem

parent 939de4ea
...@@ -8,11 +8,11 @@ from stdlib.format cimport format ...@@ -8,11 +8,11 @@ from stdlib.format cimport format
from runtime.runtime cimport Scheduler, BatchMailBox from runtime.runtime cimport Scheduler, BatchMailBox
from util.hashlib cimport MessageDigest, md5sum, sha1sum, sha256sum, sha512sum cimport util.hashlib as hashlib
from util.stat cimport Stat, dev_t from util.hashlib cimport Hash
cimport util.sys as sys cimport util.os as os
from util.sys cimport FILE, DIR from util.os cimport FILE, DIR, Stat
cdef Str curdir = Str(".") cdef Str curdir = Str(".")
...@@ -36,36 +36,50 @@ cdef cypclass Node activable: ...@@ -36,36 +36,50 @@ cdef cypclass Node activable:
void build_node(self): pass void build_node(self): pass
void format_node(self): void format_node(self):
self.json = format("""\ self.json = format("{}\n {}\n",
{{
"{}": {{
"stat": {}
}}
}},
""",
self.path, self.path,
self.stat.to_json(), self.format_stat(),
) )
void write_node(self, FILE * stream): pass Str format_stat(self):
return format("{} {} {} {} {} {} {} {} {} {} {} {} {} {} {} {}",
self.stat.st_dev,
self.stat.st_ino,
self.stat.st_mode,
self.stat.st_nlink,
self.stat.st_uid,
self.stat.st_gid,
self.stat.st_rdev,
self.stat.st_size,
self.stat.st_blksize,
self.stat.st_blocks,
self.stat.st_atim.tv_sec,
self.stat.st_mtim.tv_sec,
self.stat.st_ctim.tv_sec,
self.stat.st_atim.tv_nsec,
self.stat.st_mtim.tv_nsec,
self.stat.st_ctim.tv_nsec,
)
void write_node(self, FILE * stream): pass
cdef iso Node make_node(iso Str path) nogil: @staticmethod
cdef Node node iso Node create(iso Str path):
p = <Str> consume path cdef Node node
s = Stat(p) p = <Str> consume path
if s is NULL: s = os.stat(p)
node = NULL if s is NULL:
elif s.is_symlink(): node = NULL
node = SymlinkNode(p, s) elif s.is_symlink():
elif s.is_dir(): node = SymlinkNode(p, s)
node = DirNode(p, s) elif s.is_dir():
elif s.is_regular(): node = DirNode(p, s)
node = FileNode(p, s) elif s.is_regular():
else: node = FileNode(p, s)
node = NULL else:
del p, s node = NULL
return consume node del p, s
return consume node
cdef cypclass DirNode(Node): cdef cypclass DirNode(Node):
...@@ -77,7 +91,7 @@ cdef cypclass DirNode(Node): ...@@ -77,7 +91,7 @@ cdef cypclass DirNode(Node):
self.children = Children() self.children = Children()
void build_node(self): void build_node(self):
entries = sys.listdir(self.path) entries = os.listdir(self.path)
if entries is not NULL: if entries is not NULL:
for name in entries: for name in entries:
if name == curdir or name == pardir: if name == curdir or name == pardir:
...@@ -86,10 +100,9 @@ cdef cypclass DirNode(Node): ...@@ -86,10 +100,9 @@ cdef cypclass DirNode(Node):
if Str(path[-1]) != sep: if Str(path[-1]) != sep:
path = path + sep path = path + sep
path = path + name path = path + name
child = make_node(consume path) child = Node.create(consume path)
if child is NULL: if child is not NULL:
continue self.children.append(activate(consume child))
self.children.append(activate(consume child))
self.format_node() self.format_node()
...@@ -97,7 +110,7 @@ cdef cypclass DirNode(Node): ...@@ -97,7 +110,7 @@ cdef cypclass DirNode(Node):
active_child.build_node(NULL) active_child.build_node(NULL)
void write_node(self, FILE * stream): void write_node(self, FILE * stream):
sys.write(self.json, stream) os.write(self.json, stream)
while self.children.__len__() > 0: while self.children.__len__() > 0:
active_child = self.children.pop() active_child = self.children.pop()
child = consume active_child child = consume active_child
...@@ -109,10 +122,8 @@ cdef enum: ...@@ -109,10 +122,8 @@ cdef enum:
cdef cypclass FileNode(Node): cdef cypclass FileNode(Node):
Str md5_data Str sha256
Str sha1_data Str sha512
Str sha256_data
Str sha512_data
bint error bint error
__init__(self, Str path, Stat stat): __init__(self, Str path, Stat stat):
...@@ -120,49 +131,41 @@ cdef cypclass FileNode(Node): ...@@ -120,49 +131,41 @@ cdef cypclass FileNode(Node):
self.error = False self.error = False
void build_node(self): void build_node(self):
cdef bint md5_ok
cdef bint sha1_ok
cdef bint sha256_ok cdef bint sha256_ok
cdef bint sha512_ok cdef bint sha512_ok
cdef FILE * file = sys.open(self.path, 'rb') cdef FILE * file = os.open(self.path, 'rb')
if file is NULL: if file is NULL:
self.error = True self.error = True
self.format_node() self.format_node()
return return
md5 = MessageDigest(md5sum()) sha256 = Hash(hashlib.sha256())
sha1 = MessageDigest(sha1sum()) sha512 = Hash(hashlib.sha512())
sha256 = MessageDigest(sha256sum())
sha512 = MessageDigest(sha512sum())
md5_ok = md5 is not NULL
sha1_ok = sha1 is not NULL
sha256_ok = sha256 is not NULL sha256_ok = sha256 is not NULL
sha512_ok = sha512 is not NULL sha512_ok = sha512 is not NULL
while (md5_ok or sha1_ok or sha256_ok or sha512_ok): while (sha256_ok or sha512_ok):
s = sys.read(file, CHUNK) s = os.read(file, CHUNK)
if s is NULL: if s is NULL:
self.error = True self.error = True
break break
if md5_ok: md5_ok = md5.update(s) == 0 if sha256_ok:
if sha1_ok: sha1_ok = sha1.update(s) == 0 sha256_ok = sha256.update(s) == 0
if sha256_ok: sha256_ok = sha256.update(s) == 0 if sha512_ok:
if sha512_ok: sha512_ok = sha512.update(s) == 0 sha512_ok = sha512.update(s) == 0
if s.__len__() != CHUNK: if s.__len__() != CHUNK:
break break
sys.close(file) os.close(file)
if not self.error: if not self.error:
if md5_ok: self.md5_data = md5.hexdigest() self.sha256 = sha256.hexdigest() if sha256_ok else Str("<errror>")
if sha1_ok: self.sha1_data = sha1.hexdigest() self.sha512 = sha512.hexdigest() if sha512_ok else Str("<errror>")
if sha256_ok: self.sha256_data = sha256.hexdigest()
if sha512_ok: self.sha512_data = sha512.hexdigest()
self.format_node() self.format_node()
...@@ -170,79 +173,56 @@ cdef cypclass FileNode(Node): ...@@ -170,79 +173,56 @@ cdef cypclass FileNode(Node):
if self.error: if self.error:
Node.format_node(self) Node.format_node(self)
else: else:
self.json = format("""\ self.json = format("{}\n {}\n {}\n {}\n",
{{
"{}": {{
"stat": {},
"digests": {{
"md5": "{}",
"sha1": "{}",
"sha256": "{}",
"sha512": "{}"
}}
}}
}},
""",
self.path, self.path,
self.stat.to_json(), self.sha256,
self.md5_data, self.sha512,
self.sha1_data, self.format_stat(),
self.sha256_data,
self.sha512_data,
) )
void write_node(self, FILE * stream): void write_node(self, FILE * stream):
sys.write(self.json, stream) os.write(self.json, stream)
cdef cypclass SymlinkNode(Node): cdef cypclass SymlinkNode(Node):
Str target Str target
void build_node(self): void build_node(self):
self.target = sys.readlink(self.path, self.stat.st_data.st_size) self.target = os.readlink(self.path, self.stat.st_size)
self.format_node() self.format_node()
void format_node(self): void format_node(self):
if self.target is NULL: if self.target is NULL:
Node.format_node(self) Node.format_node(self)
else: else:
self.json = format("""\ self.json = format("{} -> {}\n {}\n",
{{ self.path,
"{}": {{ self.target,
"stat": {}, self.format_stat(),
"target": {}" )
}}
}},
""",
self.path,
self.stat.to_json(),
self.target,
)
void write_node(self, FILE * stream): void write_node(self, FILE * stream):
sys.write(self.json, stream) os.write(self.json, stream)
cdef int scan(iso Str root) nogil: cdef int scan(iso Str root) nogil:
node = make_node(consume root) node = Node.create(consume root)
if node is NULL: if node is NULL:
return -1 return -1
active_node = activate(consume node) active_node = activate(consume node)
active_node.build_node(NULL) active_node.build_node(NULL)
scheduler.join() scheduler.join()
node = consume active_node node = consume active_node
node.write_node(os.stdout)
sys.write(Str("[\n"), sys.stdout)
node.write_node(sys.stdout)
sys.write(Str(" {}\n]\n"), sys.stdout)
return 0 return 0
def main():
def main(s = b'.'):
cdef char * root = s
with nogil: with nogil:
scan(consume Str(".")) scan(consume(Str(root)))
...@@ -25,10 +25,14 @@ cdef extern from "<openssl/evp.h>" nogil: ...@@ -25,10 +25,14 @@ cdef extern from "<openssl/evp.h>" nogil:
const int EVP_MAX_MD_SIZE const int EVP_MAX_MD_SIZE
# Algorithms # Algorithms
const EVP_MD *md5sum "EVP_md5"() const EVP_MD *md5 "EVP_md5"()
const EVP_MD *sha1sum "EVP_sha1"() const EVP_MD *blake2b "EVP_blake2b512"
const EVP_MD *sha256sum "EVP_sha256"() const EVP_MD *blake2s "EVP_blake2b256"
const EVP_MD *sha512sum "EVP_sha512"() const EVP_MD *sha1 "EVP_sha1"()
const EVP_MD *sha224 "EVP_sha224"()
const EVP_MD *sha256 "EVP_sha256"()
const EVP_MD *sha384 "EVP_sha384"()
const EVP_MD *sha512 "EVP_sha512"()
const EVP_MD *EVP_get_digestbyname(const char *name) const EVP_MD *EVP_get_digestbyname(const char *name)
......
# Differences with posix.stat:
#
# - the declaration for the non-standard field st_birthtime was removed
# because cypclass wrapping triggers the generation of a conversion
# function for the stat structure which references this field.
#
# - the absent declaration in posix.time of struct timespec was added.
#
# - the declarations for the time_t fields st_atime, st_mtime, st_ctime
# were replaced by the fields st_atim, st_mtim, st_ctim
# of type struct timespec.
from posix.types cimport (blkcnt_t, from posix.types cimport (blkcnt_t,
blksize_t, blksize_t,
dev_t, dev_t,
...@@ -23,6 +10,18 @@ from posix.types cimport (blkcnt_t, ...@@ -23,6 +10,18 @@ from posix.types cimport (blkcnt_t,
uid_t) uid_t)
# Differences with posix.stat:
#
# - the declaration for the non-standard field st_birthtime was removed
# because cypclass wrapping triggers the generation of a conversion
# function for the stat structure which references this field.
#
# - the absent declaration in posix.time of struct timespec was added.
#
# - the declarations for the time_t fields st_atime, st_mtime, st_ctime
# were replaced by the fields st_atim, st_mtim, st_ctim
# of type struct timespec.
cdef extern from "<sys/time.h>" nogil: cdef extern from "<sys/time.h>" nogil:
cdef struct struct_timespec "timespec": cdef struct struct_timespec "timespec":
time_t tv_sec time_t tv_sec
...@@ -30,7 +29,7 @@ cdef extern from "<sys/time.h>" nogil: ...@@ -30,7 +29,7 @@ cdef extern from "<sys/time.h>" nogil:
cdef extern from "<sys/stat.h>" nogil: cdef extern from "<sys/stat.h>" nogil:
cdef struct struct_stat "stat": cdef cppclass struct_stat "stat":
dev_t st_dev dev_t st_dev
ino_t st_ino ino_t st_ino
mode_t st_mode mode_t st_mode
...@@ -93,3 +92,18 @@ cdef extern from "<unistd.h>" nogil: ...@@ -93,3 +92,18 @@ cdef extern from "<unistd.h>" nogil:
mode_t S_IWOTH mode_t S_IWOTH
mode_t S_IXOTH mode_t S_IXOTH
cdef extern from "<sys/types.h>" nogil:
ctypedef struct DIR
cdef extern from "<dirent.h>" nogil:
cdef struct struct_dirent "dirent":
ino_t d_ino
char d_name[256]
DIR *opendir(const char *name)
struct_dirent *readdir(DIR *dirp)
int readdir_r(DIR *dirp, struct_dirent *entry, struct_dirent **result)
int closedir(DIR *dirp)
from posix.types cimport ino_t
cdef extern from "<sys/types.h>" nogil:
ctypedef struct DIR
cdef extern from "<dirent.h>" nogil:
cdef struct struct_dirent "dirent":
ino_t d_ino
char d_name[256]
DIR *opendir(const char *name)
struct_dirent *readdir(DIR *dirp)
int readdir_r(DIR *dirp, struct_dirent *entry, struct_dirent **result)
int closedir(DIR *dirp)
...@@ -3,10 +3,10 @@ from stdlib.string cimport Str ...@@ -3,10 +3,10 @@ from stdlib.string cimport Str
from ._hashlib cimport * from ._hashlib cimport *
cdef cypclass MessageDigest: cdef cypclass Hash:
EVP_MD_CTX * md_ctx EVP_MD_CTX * md_ctx
MessageDigest __new__(alloc, const EVP_MD * algo): Hash __new__(alloc, const EVP_MD * algo):
md_ctx = EVP_MD_CTX_create() md_ctx = EVP_MD_CTX_create()
if md_ctx is NULL: if md_ctx is NULL:
return NULL return NULL
......
...@@ -8,7 +8,8 @@ from libc.stdio cimport stdin, stdout, stderr ...@@ -8,7 +8,8 @@ from libc.stdio cimport stdin, stdout, stderr
from posix cimport unistd from posix cimport unistd
from ._sys cimport DIR, struct_dirent, opendir, readdir, closedir from ._os cimport DIR, struct_dirent, opendir, readdir, closedir
from ._os cimport struct_stat, lstat, S_ISREG, S_ISLNK, S_ISDIR
cdef enum: cdef enum:
...@@ -24,9 +25,11 @@ cdef inline Str read(FILE * file, int nbytes) nogil: ...@@ -24,9 +25,11 @@ cdef inline Str read(FILE * file, int nbytes) nogil:
s._str.append(_BUFSIZE, 0) s._str.append(_BUFSIZE, 0)
cdef int size cdef int size
size = fread(s._str.data(), 1, nbytes, file) size = fread(s._str.data(), 1, nbytes, file)
if size == nbytes or not ferror(file): if size != nbytes:
s._str.resize(size) if ferror(file):
return s return NULL
s._str.resize(size)
return s
cdef inline int close(FILE * file) nogil: cdef inline int close(FILE * file) nogil:
...@@ -64,3 +67,25 @@ cdef inline Str readlink(Str path, int max_size) nogil: ...@@ -64,3 +67,25 @@ cdef inline Str readlink(Str path, int max_size) nogil:
s._str.resize(size) s._str.resize(size)
return s return s
cdef cypclass Stat(struct_stat):
Stat __new__(alloc, Str path):
instance = alloc()
s = <struct_stat *> instance
if s is not NULL:
if not lstat(path.bytes(), s):
return instance
bint is_regular(self):
return S_ISREG(self.st_mode)
bint is_symlink(self):
return S_ISLNK(self.st_mode)
bint is_dir(self):
return S_ISDIR(self.st_mode)
cdef inline Stat stat(Str path) nogil:
return Stat(path)
# distutils: language = c++
from stdlib.string cimport Str
from stdlib.format cimport format
from ._stat cimport *
cdef cypclass Stat:
struct_stat st_data
Stat __new__(alloc, Str path):
instance = alloc()
if not lstat(path.bytes(), &instance.st_data):
return instance
bint is_regular(self):
return S_ISREG(self.st_data.st_mode)
bint is_symlink(self):
return S_ISLNK(self.st_data.st_mode)
bint is_dir(self):
return S_ISDIR(self.st_data.st_mode)
Str to_json(self):
return format("""{{
"st_dev": {},
"st_ino": {},
"st_mode": {},
"st_nlink": {},
"st_uid": {},
"st_gid": {},
"st_rdev": {},
"st_size": {},
"st_blksize": {},
"st_blocks": {},
"st_atime": {},
"st_mtime": {},
"st_ctime": {},
"st_atime_ns": {},
"st_mtime_ns": {},
"st_ctime_ns": {}
}}""",
self.st_data.st_dev,
self.st_data.st_ino,
self.st_data.st_mode,
self.st_data.st_nlink,
self.st_data.st_uid,
self.st_data.st_gid,
self.st_data.st_rdev,
self.st_data.st_size,
self.st_data.st_blksize,
self.st_data.st_blocks,
self.st_data.st_atim.tv_sec,
self.st_data.st_mtim.tv_sec,
self.st_data.st_ctim.tv_sec,
self.st_data.st_atim.tv_nsec,
self.st_data.st_mtim.tv_nsec,
self.st_data.st_ctim.tv_nsec,
)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment