Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
S
scan-filesystem
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Xavier Thompson
scan-filesystem
Commits
05d5030a
Commit
05d5030a
authored
Oct 11, 2022
by
Xavier Thompson
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Improve and simplify scan-filsystem
parent
939de4ea
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
147 additions
and
203 deletions
+147
-203
cython/main.pyx
cython/main.pyx
+80
-100
cython/util/__init__.pxd
cython/util/__init__.pxd
+0
-0
cython/util/_hashlib.pxd
cython/util/_hashlib.pxd
+8
-4
cython/util/_os.pxd
cython/util/_os.pxd
+28
-14
cython/util/_sys.pxd
cython/util/_sys.pxd
+0
-17
cython/util/hashlib.pxd
cython/util/hashlib.pxd
+2
-2
cython/util/os.pxd
cython/util/os.pxd
+29
-4
cython/util/stat.pxd
cython/util/stat.pxd
+0
-62
No files found.
cython/main.pyx
View file @
05d5030a
...
...
@@ -8,11 +8,11 @@ from stdlib.format cimport format
from
runtime.runtime
cimport
Scheduler
,
BatchMailBox
from
util.hashlib
cimport
MessageDigest
,
md5sum
,
sha1sum
,
sha256sum
,
sha512sum
from
util.
stat
cimport
Stat
,
dev_t
cimport
util.hashlib
as
hashlib
from
util.
hashlib
cimport
Hash
cimport
util.
sys
as
sy
s
from
util.
sys
cimport
FILE
,
DIR
cimport
util.
os
as
o
s
from
util.
os
cimport
FILE
,
DIR
,
Stat
cdef
Str
curdir
=
Str
(
"."
)
...
...
@@ -36,24 +36,38 @@ cdef cypclass Node activable:
void
build_node
(
self
):
pass
void
format_node
(
self
):
self
.
json
=
format
(
"""
\
{{
"{}": {{
"stat": {}
}}
}},
"""
,
self
.
json
=
format
(
"{}
\
n
{}
\
n
"
,
self
.
path
,
self
.
stat
.
to_json
(),
self
.
format_stat
(),
)
void
write_node
(
self
,
FILE
*
stream
):
pass
Str
format_stat
(
self
):
return
format
(
"{} {} {} {} {} {} {} {} {} {} {} {} {} {} {} {}"
,
self
.
stat
.
st_dev
,
self
.
stat
.
st_ino
,
self
.
stat
.
st_mode
,
self
.
stat
.
st_nlink
,
self
.
stat
.
st_uid
,
self
.
stat
.
st_gid
,
self
.
stat
.
st_rdev
,
self
.
stat
.
st_size
,
self
.
stat
.
st_blksize
,
self
.
stat
.
st_blocks
,
self
.
stat
.
st_atim
.
tv_sec
,
self
.
stat
.
st_mtim
.
tv_sec
,
self
.
stat
.
st_ctim
.
tv_sec
,
self
.
stat
.
st_atim
.
tv_nsec
,
self
.
stat
.
st_mtim
.
tv_nsec
,
self
.
stat
.
st_ctim
.
tv_nsec
,
)
void
write_node
(
self
,
FILE
*
stream
):
pass
cdef
iso
Node
make_node
(
iso
Str
path
)
nogil
:
@
staticmethod
iso
Node
create
(
iso
Str
path
):
cdef
Node
node
p
=
<
Str
>
consume
path
s
=
S
tat
(
p
)
s
=
os
.
s
tat
(
p
)
if
s
is
NULL
:
node
=
NULL
elif
s
.
is_symlink
():
...
...
@@ -77,7 +91,7 @@ cdef cypclass DirNode(Node):
self
.
children
=
Children
()
void
build_node
(
self
):
entries
=
sy
s
.
listdir
(
self
.
path
)
entries
=
o
s
.
listdir
(
self
.
path
)
if
entries
is
not
NULL
:
for
name
in
entries
:
if
name
==
curdir
or
name
==
pardir
:
...
...
@@ -86,9 +100,8 @@ cdef cypclass DirNode(Node):
if
Str
(
path
[
-
1
])
!=
sep
:
path
=
path
+
sep
path
=
path
+
name
child
=
make_node
(
consume
path
)
if
child
is
NULL
:
continue
child
=
Node
.
create
(
consume
path
)
if
child
is
not
NULL
:
self
.
children
.
append
(
activate
(
consume
child
))
self
.
format_node
()
...
...
@@ -97,7 +110,7 @@ cdef cypclass DirNode(Node):
active_child
.
build_node
(
NULL
)
void
write_node
(
self
,
FILE
*
stream
):
sy
s
.
write
(
self
.
json
,
stream
)
o
s
.
write
(
self
.
json
,
stream
)
while
self
.
children
.
__len__
()
>
0
:
active_child
=
self
.
children
.
pop
()
child
=
consume
active_child
...
...
@@ -109,10 +122,8 @@ cdef enum:
cdef
cypclass
FileNode
(
Node
):
Str
md5_data
Str
sha1_data
Str
sha256_data
Str
sha512_data
Str
sha256
Str
sha512
bint
error
__init__
(
self
,
Str
path
,
Stat
stat
):
...
...
@@ -120,49 +131,41 @@ cdef cypclass FileNode(Node):
self
.
error
=
False
void
build_node
(
self
):
cdef
bint
md5_ok
cdef
bint
sha1_ok
cdef
bint
sha256_ok
cdef
bint
sha512_ok
cdef
FILE
*
file
=
sy
s
.
open
(
self
.
path
,
'rb'
)
cdef
FILE
*
file
=
o
s
.
open
(
self
.
path
,
'rb'
)
if
file
is
NULL
:
self
.
error
=
True
self
.
format_node
()
return
md5
=
MessageDigest
(
md5sum
())
sha1
=
MessageDigest
(
sha1sum
())
sha256
=
MessageDigest
(
sha256sum
())
sha512
=
MessageDigest
(
sha512sum
())
sha256
=
Hash
(
hashlib
.
sha256
())
sha512
=
Hash
(
hashlib
.
sha512
())
md5_ok
=
md5
is
not
NULL
sha1_ok
=
sha1
is
not
NULL
sha256_ok
=
sha256
is
not
NULL
sha512_ok
=
sha512
is
not
NULL
while
(
md5_ok
or
sha1_ok
or
sha256_ok
or
sha512_ok
):
s
=
sy
s
.
read
(
file
,
CHUNK
)
while
(
sha256_ok
or
sha512_ok
):
s
=
o
s
.
read
(
file
,
CHUNK
)
if
s
is
NULL
:
self
.
error
=
True
break
if
md5_ok
:
md5_ok
=
md5
.
update
(
s
)
==
0
if
sha1_ok
:
sha1_ok
=
sha1
.
update
(
s
)
==
0
if
sha
256_ok
:
sha256_ok
=
sha256
.
update
(
s
)
==
0
if
sha512_ok
:
sha512_ok
=
sha512
.
update
(
s
)
==
0
if
sha256_ok
:
sha256_ok
=
sha256
.
update
(
s
)
==
0
if
sha
512_ok
:
sha512_ok
=
sha512
.
update
(
s
)
==
0
if
s
.
__len__
()
!=
CHUNK
:
break
sy
s
.
close
(
file
)
o
s
.
close
(
file
)
if
not
self
.
error
:
if
md5_ok
:
self
.
md5_data
=
md5
.
hexdigest
()
if
sha1_ok
:
self
.
sha1_data
=
sha1
.
hexdigest
()
if
sha256_ok
:
self
.
sha256_data
=
sha256
.
hexdigest
()
if
sha512_ok
:
self
.
sha512_data
=
sha512
.
hexdigest
()
self
.
sha256
=
sha256
.
hexdigest
()
if
sha256_ok
else
Str
(
"<errror>"
)
self
.
sha512
=
sha512
.
hexdigest
()
if
sha512_ok
else
Str
(
"<errror>"
)
self
.
format_node
()
...
...
@@ -170,79 +173,56 @@ cdef cypclass FileNode(Node):
if
self
.
error
:
Node
.
format_node
(
self
)
else
:
self
.
json
=
format
(
"""
\
{{
"{}": {{
"stat": {},
"digests": {{
"md5": "{}",
"sha1": "{}",
"sha256": "{}",
"sha512": "{}"
}}
}}
}},
"""
,
self
.
json
=
format
(
"{}
\
n
{}
\
n
{}
\
n
{}
\
n
"
,
self
.
path
,
self
.
stat
.
to_json
(),
self
.
md5_data
,
self
.
sha1_data
,
self
.
sha256_data
,
self
.
sha512_data
,
self
.
sha256
,
self
.
sha512
,
self
.
format_stat
(),
)
void
write_node
(
self
,
FILE
*
stream
):
sy
s
.
write
(
self
.
json
,
stream
)
o
s
.
write
(
self
.
json
,
stream
)
cdef
cypclass
SymlinkNode
(
Node
):
Str
target
void
build_node
(
self
):
self
.
target
=
sys
.
readlink
(
self
.
path
,
self
.
stat
.
st_data
.
st_size
)
self
.
target
=
os
.
readlink
(
self
.
path
,
self
.
stat
.
st_size
)
self
.
format_node
()
void
format_node
(
self
):
if
self
.
target
is
NULL
:
Node
.
format_node
(
self
)
else
:
self
.
json
=
format
(
"""
\
{{
"{}": {{
"stat": {},
"target": {}"
}}
}},
"""
,
self
.
json
=
format
(
"{} -> {}
\
n
{}
\
n
"
,
self
.
path
,
self
.
stat
.
to_json
(),
self
.
target
,
self
.
format_stat
(),
)
void
write_node
(
self
,
FILE
*
stream
):
sy
s
.
write
(
self
.
json
,
stream
)
o
s
.
write
(
self
.
json
,
stream
)
cdef
int
scan
(
iso
Str
root
)
nogil
:
node
=
make_nod
e
(
consume
root
)
node
=
Node
.
creat
e
(
consume
root
)
if
node
is
NULL
:
return
-
1
active_node
=
activate
(
consume
node
)
active_node
.
build_node
(
NULL
)
scheduler
.
join
()
node
=
consume
active_node
sys
.
write
(
Str
(
"[
\
n
"
),
sys
.
stdout
)
node
.
write_node
(
sys
.
stdout
)
sys
.
write
(
Str
(
" {}
\
n
]
\
n
"
),
sys
.
stdout
)
node
.
write_node
(
os
.
stdout
)
return
0
def
main
():
def
main
(
s
=
b'.'
):
cdef
char
*
root
=
s
with
nogil
:
scan
(
consume
Str
(
"."
))
scan
(
consume
(
Str
(
root
)
))
cython/util/__init__.p
y
→
cython/util/__init__.p
xd
View file @
05d5030a
File moved
cython/util/_hashlib.pxd
View file @
05d5030a
...
...
@@ -25,10 +25,14 @@ cdef extern from "<openssl/evp.h>" nogil:
const
int
EVP_MAX_MD_SIZE
# Algorithms
const
EVP_MD
*
md5sum
"EVP_md5"
()
const
EVP_MD
*
sha1sum
"EVP_sha1"
()
const
EVP_MD
*
sha256sum
"EVP_sha256"
()
const
EVP_MD
*
sha512sum
"EVP_sha512"
()
const
EVP_MD
*
md5
"EVP_md5"
()
const
EVP_MD
*
blake2b
"EVP_blake2b512"
const
EVP_MD
*
blake2s
"EVP_blake2b256"
const
EVP_MD
*
sha1
"EVP_sha1"
()
const
EVP_MD
*
sha224
"EVP_sha224"
()
const
EVP_MD
*
sha256
"EVP_sha256"
()
const
EVP_MD
*
sha384
"EVP_sha384"
()
const
EVP_MD
*
sha512
"EVP_sha512"
()
const
EVP_MD
*
EVP_get_digestbyname
(
const
char
*
name
)
...
...
cython/util/_
stat
.pxd
→
cython/util/_
os
.pxd
View file @
05d5030a
# Differences with posix.stat:
#
# - the declaration for the non-standard field st_birthtime was removed
# because cypclass wrapping triggers the generation of a conversion
# function for the stat structure which references this field.
#
# - the absent declaration in posix.time of struct timespec was added.
#
# - the declarations for the time_t fields st_atime, st_mtime, st_ctime
# were replaced by the fields st_atim, st_mtim, st_ctim
# of type struct timespec.
from
posix.types
cimport
(
blkcnt_t
,
blksize_t
,
dev_t
,
...
...
@@ -23,6 +10,18 @@ from posix.types cimport (blkcnt_t,
uid_t
)
# Differences with posix.stat:
#
# - the declaration for the non-standard field st_birthtime was removed
# because cypclass wrapping triggers the generation of a conversion
# function for the stat structure which references this field.
#
# - the absent declaration in posix.time of struct timespec was added.
#
# - the declarations for the time_t fields st_atime, st_mtime, st_ctime
# were replaced by the fields st_atim, st_mtim, st_ctim
# of type struct timespec.
cdef
extern
from
"<sys/time.h>"
nogil
:
cdef
struct
struct_timespec
"timespec"
:
time_t
tv_sec
...
...
@@ -30,7 +29,7 @@ cdef extern from "<sys/time.h>" nogil:
cdef
extern
from
"<sys/stat.h>"
nogil
:
cdef
struct
struct_stat
"stat"
:
cdef
cppclass
struct_stat
"stat"
:
dev_t
st_dev
ino_t
st_ino
mode_t
st_mode
...
...
@@ -93,3 +92,18 @@ cdef extern from "<unistd.h>" nogil:
mode_t
S_IWOTH
mode_t
S_IXOTH
cdef
extern
from
"<sys/types.h>"
nogil
:
ctypedef
struct
DIR
cdef
extern
from
"<dirent.h>"
nogil
:
cdef
struct
struct_dirent
"dirent"
:
ino_t
d_ino
char
d_name
[
256
]
DIR
*
opendir
(
const
char
*
name
)
struct_dirent
*
readdir
(
DIR
*
dirp
)
int
readdir_r
(
DIR
*
dirp
,
struct_dirent
*
entry
,
struct_dirent
**
result
)
int
closedir
(
DIR
*
dirp
)
cython/util/_sys.pxd
deleted
100644 → 0
View file @
939de4ea
from
posix.types
cimport
ino_t
cdef
extern
from
"<sys/types.h>"
nogil
:
ctypedef
struct
DIR
cdef
extern
from
"<dirent.h>"
nogil
:
cdef
struct
struct_dirent
"dirent"
:
ino_t
d_ino
char
d_name
[
256
]
DIR
*
opendir
(
const
char
*
name
)
struct_dirent
*
readdir
(
DIR
*
dirp
)
int
readdir_r
(
DIR
*
dirp
,
struct_dirent
*
entry
,
struct_dirent
**
result
)
int
closedir
(
DIR
*
dirp
)
cython/util/hashlib.pxd
View file @
05d5030a
...
...
@@ -3,10 +3,10 @@ from stdlib.string cimport Str
from
._hashlib
cimport
*
cdef
cypclass
MessageDigest
:
cdef
cypclass
Hash
:
EVP_MD_CTX
*
md_ctx
MessageDigest
__new__
(
alloc
,
const
EVP_MD
*
algo
):
Hash
__new__
(
alloc
,
const
EVP_MD
*
algo
):
md_ctx
=
EVP_MD_CTX_create
()
if
md_ctx
is
NULL
:
return
NULL
...
...
cython/util/
sy
s.pxd
→
cython/util/
o
s.pxd
View file @
05d5030a
...
...
@@ -8,7 +8,8 @@ from libc.stdio cimport stdin, stdout, stderr
from
posix
cimport
unistd
from
._sys
cimport
DIR
,
struct_dirent
,
opendir
,
readdir
,
closedir
from
._os
cimport
DIR
,
struct_dirent
,
opendir
,
readdir
,
closedir
from
._os
cimport
struct_stat
,
lstat
,
S_ISREG
,
S_ISLNK
,
S_ISDIR
cdef
enum
:
...
...
@@ -24,7 +25,9 @@ cdef inline Str read(FILE * file, int nbytes) nogil:
s
.
_str
.
append
(
_BUFSIZE
,
0
)
cdef
int
size
size
=
fread
(
s
.
_str
.
data
(),
1
,
nbytes
,
file
)
if
size
==
nbytes
or
not
ferror
(
file
):
if
size
!=
nbytes
:
if
ferror
(
file
):
return
NULL
s
.
_str
.
resize
(
size
)
return
s
...
...
@@ -64,3 +67,25 @@ cdef inline Str readlink(Str path, int max_size) nogil:
s
.
_str
.
resize
(
size
)
return
s
cdef
cypclass
Stat
(
struct_stat
):
Stat
__new__
(
alloc
,
Str
path
):
instance
=
alloc
()
s
=
<
struct_stat
*>
instance
if
s
is
not
NULL
:
if
not
lstat
(
path
.
bytes
(),
s
):
return
instance
bint
is_regular
(
self
):
return
S_ISREG
(
self
.
st_mode
)
bint
is_symlink
(
self
):
return
S_ISLNK
(
self
.
st_mode
)
bint
is_dir
(
self
):
return
S_ISDIR
(
self
.
st_mode
)
cdef
inline
Stat
stat
(
Str
path
)
nogil
:
return
Stat
(
path
)
cython/util/stat.pxd
deleted
100644 → 0
View file @
939de4ea
# distutils: language = c++
from
stdlib.string
cimport
Str
from
stdlib.format
cimport
format
from
._stat
cimport
*
cdef
cypclass
Stat
:
struct_stat
st_data
Stat
__new__
(
alloc
,
Str
path
):
instance
=
alloc
()
if
not
lstat
(
path
.
bytes
(),
&
instance
.
st_data
):
return
instance
bint
is_regular
(
self
):
return
S_ISREG
(
self
.
st_data
.
st_mode
)
bint
is_symlink
(
self
):
return
S_ISLNK
(
self
.
st_data
.
st_mode
)
bint
is_dir
(
self
):
return
S_ISDIR
(
self
.
st_data
.
st_mode
)
Str
to_json
(
self
):
return
format
(
"""{{
"st_dev": {},
"st_ino": {},
"st_mode": {},
"st_nlink": {},
"st_uid": {},
"st_gid": {},
"st_rdev": {},
"st_size": {},
"st_blksize": {},
"st_blocks": {},
"st_atime": {},
"st_mtime": {},
"st_ctime": {},
"st_atime_ns": {},
"st_mtime_ns": {},
"st_ctime_ns": {}
}}"""
,
self
.
st_data
.
st_dev
,
self
.
st_data
.
st_ino
,
self
.
st_data
.
st_mode
,
self
.
st_data
.
st_nlink
,
self
.
st_data
.
st_uid
,
self
.
st_data
.
st_gid
,
self
.
st_data
.
st_rdev
,
self
.
st_data
.
st_size
,
self
.
st_data
.
st_blksize
,
self
.
st_data
.
st_blocks
,
self
.
st_data
.
st_atim
.
tv_sec
,
self
.
st_data
.
st_mtim
.
tv_sec
,
self
.
st_data
.
st_ctim
.
tv_sec
,
self
.
st_data
.
st_atim
.
tv_nsec
,
self
.
st_data
.
st_mtim
.
tv_nsec
,
self
.
st_data
.
st_ctim
.
tv_nsec
,
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment