Commit 05d5030a authored by Xavier Thompson's avatar Xavier Thompson

Improve and simplify scan-filsystem

parent 939de4ea
......@@ -8,11 +8,11 @@ from stdlib.format cimport format
from runtime.runtime cimport Scheduler, BatchMailBox
from util.hashlib cimport MessageDigest, md5sum, sha1sum, sha256sum, sha512sum
from util.stat cimport Stat, dev_t
cimport util.hashlib as hashlib
from util.hashlib cimport Hash
cimport util.sys as sys
from util.sys cimport FILE, DIR
cimport util.os as os
from util.os cimport FILE, DIR, Stat
cdef Str curdir = Str(".")
......@@ -36,24 +36,38 @@ cdef cypclass Node activable:
void build_node(self): pass
void format_node(self):
self.json = format("""\
{{
"{}": {{
"stat": {}
}}
}},
""",
self.json = format("{}\n {}\n",
self.path,
self.stat.to_json(),
self.format_stat(),
)
void write_node(self, FILE * stream): pass
Str format_stat(self):
return format("{} {} {} {} {} {} {} {} {} {} {} {} {} {} {} {}",
self.stat.st_dev,
self.stat.st_ino,
self.stat.st_mode,
self.stat.st_nlink,
self.stat.st_uid,
self.stat.st_gid,
self.stat.st_rdev,
self.stat.st_size,
self.stat.st_blksize,
self.stat.st_blocks,
self.stat.st_atim.tv_sec,
self.stat.st_mtim.tv_sec,
self.stat.st_ctim.tv_sec,
self.stat.st_atim.tv_nsec,
self.stat.st_mtim.tv_nsec,
self.stat.st_ctim.tv_nsec,
)
void write_node(self, FILE * stream): pass
cdef iso Node make_node(iso Str path) nogil:
@staticmethod
iso Node create(iso Str path):
cdef Node node
p = <Str> consume path
s = Stat(p)
s = os.stat(p)
if s is NULL:
node = NULL
elif s.is_symlink():
......@@ -77,7 +91,7 @@ cdef cypclass DirNode(Node):
self.children = Children()
void build_node(self):
entries = sys.listdir(self.path)
entries = os.listdir(self.path)
if entries is not NULL:
for name in entries:
if name == curdir or name == pardir:
......@@ -86,9 +100,8 @@ cdef cypclass DirNode(Node):
if Str(path[-1]) != sep:
path = path + sep
path = path + name
child = make_node(consume path)
if child is NULL:
continue
child = Node.create(consume path)
if child is not NULL:
self.children.append(activate(consume child))
self.format_node()
......@@ -97,7 +110,7 @@ cdef cypclass DirNode(Node):
active_child.build_node(NULL)
void write_node(self, FILE * stream):
sys.write(self.json, stream)
os.write(self.json, stream)
while self.children.__len__() > 0:
active_child = self.children.pop()
child = consume active_child
......@@ -109,10 +122,8 @@ cdef enum:
cdef cypclass FileNode(Node):
Str md5_data
Str sha1_data
Str sha256_data
Str sha512_data
Str sha256
Str sha512
bint error
__init__(self, Str path, Stat stat):
......@@ -120,49 +131,41 @@ cdef cypclass FileNode(Node):
self.error = False
void build_node(self):
cdef bint md5_ok
cdef bint sha1_ok
cdef bint sha256_ok
cdef bint sha512_ok
cdef FILE * file = sys.open(self.path, 'rb')
cdef FILE * file = os.open(self.path, 'rb')
if file is NULL:
self.error = True
self.format_node()
return
md5 = MessageDigest(md5sum())
sha1 = MessageDigest(sha1sum())
sha256 = MessageDigest(sha256sum())
sha512 = MessageDigest(sha512sum())
sha256 = Hash(hashlib.sha256())
sha512 = Hash(hashlib.sha512())
md5_ok = md5 is not NULL
sha1_ok = sha1 is not NULL
sha256_ok = sha256 is not NULL
sha512_ok = sha512 is not NULL
while (md5_ok or sha1_ok or sha256_ok or sha512_ok):
s = sys.read(file, CHUNK)
while (sha256_ok or sha512_ok):
s = os.read(file, CHUNK)
if s is NULL:
self.error = True
break
if md5_ok: md5_ok = md5.update(s) == 0
if sha1_ok: sha1_ok = sha1.update(s) == 0
if sha256_ok: sha256_ok = sha256.update(s) == 0
if sha512_ok: sha512_ok = sha512.update(s) == 0
if sha256_ok:
sha256_ok = sha256.update(s) == 0
if sha512_ok:
sha512_ok = sha512.update(s) == 0
if s.__len__() != CHUNK:
break
sys.close(file)
os.close(file)
if not self.error:
if md5_ok: self.md5_data = md5.hexdigest()
if sha1_ok: self.sha1_data = sha1.hexdigest()
if sha256_ok: self.sha256_data = sha256.hexdigest()
if sha512_ok: self.sha512_data = sha512.hexdigest()
self.sha256 = sha256.hexdigest() if sha256_ok else Str("<errror>")
self.sha512 = sha512.hexdigest() if sha512_ok else Str("<errror>")
self.format_node()
......@@ -170,79 +173,56 @@ cdef cypclass FileNode(Node):
if self.error:
Node.format_node(self)
else:
self.json = format("""\
{{
"{}": {{
"stat": {},
"digests": {{
"md5": "{}",
"sha1": "{}",
"sha256": "{}",
"sha512": "{}"
}}
}}
}},
""",
self.json = format("{}\n {}\n {}\n {}\n",
self.path,
self.stat.to_json(),
self.md5_data,
self.sha1_data,
self.sha256_data,
self.sha512_data,
self.sha256,
self.sha512,
self.format_stat(),
)
void write_node(self, FILE * stream):
sys.write(self.json, stream)
os.write(self.json, stream)
cdef cypclass SymlinkNode(Node):
Str target
void build_node(self):
self.target = sys.readlink(self.path, self.stat.st_data.st_size)
self.target = os.readlink(self.path, self.stat.st_size)
self.format_node()
void format_node(self):
if self.target is NULL:
Node.format_node(self)
else:
self.json = format("""\
{{
"{}": {{
"stat": {},
"target": {}"
}}
}},
""",
self.json = format("{} -> {}\n {}\n",
self.path,
self.stat.to_json(),
self.target,
self.format_stat(),
)
void write_node(self, FILE * stream):
sys.write(self.json, stream)
os.write(self.json, stream)
cdef int scan(iso Str root) nogil:
node = make_node(consume root)
node = Node.create(consume root)
if node is NULL:
return -1
active_node = activate(consume node)
active_node.build_node(NULL)
scheduler.join()
node = consume active_node
sys.write(Str("[\n"), sys.stdout)
node.write_node(sys.stdout)
sys.write(Str(" {}\n]\n"), sys.stdout)
node.write_node(os.stdout)
return 0
def main():
def main(s = b'.'):
cdef char * root = s
with nogil:
scan(consume Str("."))
scan(consume(Str(root)))
......@@ -25,10 +25,14 @@ cdef extern from "<openssl/evp.h>" nogil:
const int EVP_MAX_MD_SIZE
# Algorithms
const EVP_MD *md5sum "EVP_md5"()
const EVP_MD *sha1sum "EVP_sha1"()
const EVP_MD *sha256sum "EVP_sha256"()
const EVP_MD *sha512sum "EVP_sha512"()
const EVP_MD *md5 "EVP_md5"()
const EVP_MD *blake2b "EVP_blake2b512"
const EVP_MD *blake2s "EVP_blake2b256"
const EVP_MD *sha1 "EVP_sha1"()
const EVP_MD *sha224 "EVP_sha224"()
const EVP_MD *sha256 "EVP_sha256"()
const EVP_MD *sha384 "EVP_sha384"()
const EVP_MD *sha512 "EVP_sha512"()
const EVP_MD *EVP_get_digestbyname(const char *name)
......
# Differences with posix.stat:
#
# - the declaration for the non-standard field st_birthtime was removed
# because cypclass wrapping triggers the generation of a conversion
# function for the stat structure which references this field.
#
# - the absent declaration in posix.time of struct timespec was added.
#
# - the declarations for the time_t fields st_atime, st_mtime, st_ctime
# were replaced by the fields st_atim, st_mtim, st_ctim
# of type struct timespec.
from posix.types cimport (blkcnt_t,
blksize_t,
dev_t,
......@@ -23,6 +10,18 @@ from posix.types cimport (blkcnt_t,
uid_t)
# Differences with posix.stat:
#
# - the declaration for the non-standard field st_birthtime was removed
# because cypclass wrapping triggers the generation of a conversion
# function for the stat structure which references this field.
#
# - the absent declaration in posix.time of struct timespec was added.
#
# - the declarations for the time_t fields st_atime, st_mtime, st_ctime
# were replaced by the fields st_atim, st_mtim, st_ctim
# of type struct timespec.
cdef extern from "<sys/time.h>" nogil:
cdef struct struct_timespec "timespec":
time_t tv_sec
......@@ -30,7 +29,7 @@ cdef extern from "<sys/time.h>" nogil:
cdef extern from "<sys/stat.h>" nogil:
cdef struct struct_stat "stat":
cdef cppclass struct_stat "stat":
dev_t st_dev
ino_t st_ino
mode_t st_mode
......@@ -93,3 +92,18 @@ cdef extern from "<unistd.h>" nogil:
mode_t S_IWOTH
mode_t S_IXOTH
cdef extern from "<sys/types.h>" nogil:
ctypedef struct DIR
cdef extern from "<dirent.h>" nogil:
cdef struct struct_dirent "dirent":
ino_t d_ino
char d_name[256]
DIR *opendir(const char *name)
struct_dirent *readdir(DIR *dirp)
int readdir_r(DIR *dirp, struct_dirent *entry, struct_dirent **result)
int closedir(DIR *dirp)
from posix.types cimport ino_t
cdef extern from "<sys/types.h>" nogil:
ctypedef struct DIR
cdef extern from "<dirent.h>" nogil:
cdef struct struct_dirent "dirent":
ino_t d_ino
char d_name[256]
DIR *opendir(const char *name)
struct_dirent *readdir(DIR *dirp)
int readdir_r(DIR *dirp, struct_dirent *entry, struct_dirent **result)
int closedir(DIR *dirp)
......@@ -3,10 +3,10 @@ from stdlib.string cimport Str
from ._hashlib cimport *
cdef cypclass MessageDigest:
cdef cypclass Hash:
EVP_MD_CTX * md_ctx
MessageDigest __new__(alloc, const EVP_MD * algo):
Hash __new__(alloc, const EVP_MD * algo):
md_ctx = EVP_MD_CTX_create()
if md_ctx is NULL:
return NULL
......
......@@ -8,7 +8,8 @@ from libc.stdio cimport stdin, stdout, stderr
from posix cimport unistd
from ._sys cimport DIR, struct_dirent, opendir, readdir, closedir
from ._os cimport DIR, struct_dirent, opendir, readdir, closedir
from ._os cimport struct_stat, lstat, S_ISREG, S_ISLNK, S_ISDIR
cdef enum:
......@@ -24,7 +25,9 @@ cdef inline Str read(FILE * file, int nbytes) nogil:
s._str.append(_BUFSIZE, 0)
cdef int size
size = fread(s._str.data(), 1, nbytes, file)
if size == nbytes or not ferror(file):
if size != nbytes:
if ferror(file):
return NULL
s._str.resize(size)
return s
......@@ -64,3 +67,25 @@ cdef inline Str readlink(Str path, int max_size) nogil:
s._str.resize(size)
return s
cdef cypclass Stat(struct_stat):
Stat __new__(alloc, Str path):
instance = alloc()
s = <struct_stat *> instance
if s is not NULL:
if not lstat(path.bytes(), s):
return instance
bint is_regular(self):
return S_ISREG(self.st_mode)
bint is_symlink(self):
return S_ISLNK(self.st_mode)
bint is_dir(self):
return S_ISDIR(self.st_mode)
cdef inline Stat stat(Str path) nogil:
return Stat(path)
# distutils: language = c++
from stdlib.string cimport Str
from stdlib.format cimport format
from ._stat cimport *
cdef cypclass Stat:
struct_stat st_data
Stat __new__(alloc, Str path):
instance = alloc()
if not lstat(path.bytes(), &instance.st_data):
return instance
bint is_regular(self):
return S_ISREG(self.st_data.st_mode)
bint is_symlink(self):
return S_ISLNK(self.st_data.st_mode)
bint is_dir(self):
return S_ISDIR(self.st_data.st_mode)
Str to_json(self):
return format("""{{
"st_dev": {},
"st_ino": {},
"st_mode": {},
"st_nlink": {},
"st_uid": {},
"st_gid": {},
"st_rdev": {},
"st_size": {},
"st_blksize": {},
"st_blocks": {},
"st_atime": {},
"st_mtime": {},
"st_ctime": {},
"st_atime_ns": {},
"st_mtime_ns": {},
"st_ctime_ns": {}
}}""",
self.st_data.st_dev,
self.st_data.st_ino,
self.st_data.st_mode,
self.st_data.st_nlink,
self.st_data.st_uid,
self.st_data.st_gid,
self.st_data.st_rdev,
self.st_data.st_size,
self.st_data.st_blksize,
self.st_data.st_blocks,
self.st_data.st_atim.tv_sec,
self.st_data.st_mtim.tv_sec,
self.st_data.st_ctim.tv_sec,
self.st_data.st_atim.tv_nsec,
self.st_data.st_mtim.tv_nsec,
self.st_data.st_ctim.tv_nsec,
)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment