Commit c7339e68 authored by Julien Jerphanion's avatar Julien Jerphanion

WIP: Add minimal interface

parent 319a7d4b
......@@ -6,7 +6,6 @@ __pycache__/
*$py.class
**/*.c
**/*.cpp
**/main
**/kdtree
**/*.h
......
# distutils: language = c++
# cython: language_level = 3
import numpy as np
cimport numpy as cnp
cimport numpy as np
cnp.import_array()
np.import_array()
from cython.view cimport array as cvarray
import numpy as np
from libcythonplus.list cimport cyplist
from runtime.runtime cimport BatchMailBox, NullResult, Scheduler
from libc.stdio cimport (
fprintf, fopen, fclose, fread,
fwrite, FILE, stdout, printf, ferror
)
from libc.stdio cimport printf
from libc.stdlib cimport malloc, free
from stdlib.stat cimport Stat, dev_t
from stdlib.fmt cimport sprintf
from stdlib.string cimport string
from posix.unistd cimport readlink
## Types declaration
ctypedef int I_t
ctypedef double D_t
......@@ -121,8 +111,6 @@ cdef I_t partition_node_indices(
n_points)
return 0
cdef cypclass Node activable:
"""A KDTree Node"""
......@@ -141,7 +129,6 @@ cdef cypclass Node activable:
self,
D_t * points,
I_t * indices,
I_t n,
I_t depth,
I_t n_dims,
I_t dim,
......@@ -149,6 +136,7 @@ cdef cypclass Node activable:
I_t end,
):
cdef I_t i
cdef I_t next_dim = (dim + 1) % n_dims
cdef I_t nn = end - start
cdef I_t split_index = (start + end) // 2
......@@ -157,7 +145,7 @@ cdef cypclass Node activable:
if (depth < 0) or (nn <= 1):
return
printf("Depth %d on dim %d: [%d, %d) med: %d\n\n\n", depth,
printf("Depth %d on dim %d: [%d, %d) med: %d\n\n", depth,
dim, start, end, split_index)
partition_node_indices(points + start,
......@@ -168,59 +156,120 @@ cdef cypclass Node activable:
self.left = activate(consume Node())
self.right = activate(consume Node())
next_dim = (dim + 1) % n_dims
self.left.build_node(NULL, points, indices,
n, depth - 1, n_dims, next_dim, start, split_index)
self.right.build_node(NULL, points, indices,
n, depth - 1, n_dims, next_dim, split_index, end)
cdef I_t start() nogil:
global scheduler
scheduler = Scheduler()
cdef I_t i
cdef I_t n = 12
cdef I_t d = 2
cdef I_t depth = 10
# TODO: use memory view for convenience
# cdef D_t p[12][2]
# cdef D_t [:, ::1] points_views = p
# Use Golden Spiral for the layout
cdef D_t golden_ratio = (1 + 5**0.5)/2
cdef D_t * points = <D_t *> malloc(n * d * sizeof(D_t))
cdef I_t * indices = <I_t *> malloc(n* sizeof(I_t))
for i in range(n):
indices[i] = i
points[i * d] = i # (i / golden_ratio) % 1
points[i * d + 1] = i # i / n
self.left.build_node(NULL,
points, indices,
depth = depth - 1,
n_dims=n_dims,
dim=next_dim,
start=start, end=split_index)
self.right.build_node(NULL,
points, indices,
depth=depth - 1,
n_dims=n_dims,
dim=next_dim,
start=split_index, end=end)
cdef cypclass KDTree:
"""A KDTree based on asynchronous and parallel computations.
It is similar to :class:`sklearn.neighbours.KDTree` but it is
using Cython+ features for asynchronous computations.
Asynchronuous and parallel computations can be used when
constructing the nodes (see above) and when querying (WIP).
This relies on a Cython+ runtime using actors.
"""
printf("Before\n")
node = consume Node()
if node is NULL:
return -1
I_t n # number of points
I_t d # number of dimensions / features
I_t depth # max_depth of the tree (to be unified with leaf_size)
root = activate(consume node)
root.build_node(NULL, points, indices, n,
depth, d, dim=0, start=0, end=n)
np.ndarray data_arr
np.ndarray idx_array_arr
scheduler.finish()
del scheduler
# TODO: use memoryview from the user-provided numpy array
# and pointers for backend implementation.
# D_t[:, ::1] data
# I_t[::1] idx_array
for i in range(n):
printf("indices[%d] = %d\n", i, indices[i])
active Node root
free(points)
free(indices)
D_t *points
I_t *indices
return 0
__init__(self,
np.ndarray data,
I_t depth,
):
cdef I_t i
cdef I_t n = data.shape[0]
cdef I_t d = data.shape[1]
self.n = n
self.d = d
self.depth = depth
# TODO: define it on the front-end
# Use Golden Spiral for the layout
cdef D_t golden_ratio = (1 + 5**0.5)/2
self.points = <D_t *> malloc(n * d * sizeof(D_t))
self.indices = <I_t *> malloc(n * sizeof(I_t))
for i in range(n):
self.indices[i] = i
self.points[i * d] = (i / golden_ratio) % 1
self.points[i * d + 1] = i / n
# TODO: end
self._recursive_build()
void _recursive_build(self):
# TODO: introducing a context for the runtime
# would be nice here:
# ```
# with scheduler:
# self.root = ...
# ```
global scheduler
scheduler = Scheduler()
self.root = activate(consume Node())
if self.root is NULL:
printf("Error consuming node\n")
# When object are activated (set as Actors), methods
# are reified. When using those reified methods
# a new argument is prepredend for the Promise,
# which we aren't using using here, hence the extra NULL.
self.root.build_node(NULL,
self.points,
self.indices,
depth, d, dim=0, start=0, end=n)
scheduler.finish()
del scheduler
for i in range(n):
printf("indices[%d] = %d\n", i, self.indices[i])
void __dealloc__(self):
printf("Deallocating KDTree datastructures\n")
free(self.points)
free(self.indices)
printf("Done deallocating KDTree datastructures\n")
cdef public int main() nogil:
return start()
# Entry point for the compiled binary file
tree = KDTree()
printf("Done\n")
# XXX a segfault is thrown when exiting
# this function, but not others
return 0
def python_main():
start()
tree = KDTree()
import numpy as np
import kdtree
if __name__ == "__main__":
X = np.random.randint(0, 100, size=(12, 2))
tree = kdtree.KDTree(X, depth=10)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment