Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython_plus_experiments
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Julien Jerphanion
cython_plus_experiments
Commits
c952315e
Commit
c952315e
authored
Apr 19, 2021
by
Julien Jerphanion
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[WIP] KDTree implementation
Add some types definitions and node partitionning in.
parent
1a848e92
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
132 additions
and
26 deletions
+132
-26
kdtree/main.pyx
kdtree/main.pyx
+132
-26
No files found.
kdtree/main.pyx
View file @
c952315e
# distutils: language = c++
# distutils: language = c++
import
numpy
as
np
cimport
numpy
as
cnp
cnp
.
import_array
()
from
cython.view
cimport
array
as
cvarray
from
cython.view
cimport
array
as
cvarray
import
numpy
as
np
import
numpy
as
np
...
@@ -17,9 +22,106 @@ from stdlib.string cimport string
...
@@ -17,9 +22,106 @@ from stdlib.string cimport string
from
posix.unistd
cimport
readlink
from
posix.unistd
cimport
readlink
## Types declaration
ctypedef
int
I_t
ctypedef
double
D_t
cdef
lock
Scheduler
scheduler
cdef
lock
Scheduler
scheduler
cdef
extern
from
*
:
"""
#include <algorithm>
template<class D, class I>
class IndexComparator {
private:
const D *data;
I split_dim, n_features;
public:
IndexComparator(const D *data, const I &split_dim, const I &n_features):
data(data), split_dim(split_dim), n_features(n_features) {}
bool operator()(const I &a, const I &b) const {
D a_value = data[a * n_features + split_dim];
D b_value = data[b * n_features + split_dim];
return a_value == b_value ? a < b : a_value < b_value;
}
};
template<class D, class I>
void partition_node_indices_inner(
const D *data,
I *node_indices,
const I &split_dim,
const I &split_index,
const I &n_features,
const I &n_points) {
IndexComparator<D, I> index_comparator(data, split_dim, n_features);
std::nth_element(
node_indices,
node_indices + split_index,
node_indices + n_points,
index_comparator);
}
"""
void
partition_node_indices_inner
[
D
,
I
](
D
*
data
,
I
*
node_indices
,
I
split_dim
,
I
split_index
,
I
n_features
,
I
n_points
)
except
+
cdef
I_t
partition_node_indices
(
D_t
*
data
,
I_t
*
node_indices
,
I_t
split_dim
,
I_t
split_index
,
I_t
n_features
,
I_t
n_points
)
except
-
1
:
"""Partition points in the node into two equal-sized groups.
Upon return, the values in node_indices will be rearranged such that
(assuming numpy-style indexing):
data[node_indices[0:split_index], split_dim]
<= data[node_indices[split_index], split_dim]
and
data[node_indices[split_index], split_dim]
<= data[node_indices[split_index:n_points], split_dim]
The algorithm is essentially a partial in-place quicksort around a
set pivot.
Parameters
----------
data : D_t pointer
Pointer to a 2D array of the training data, of shape [N, n_features].
N must be greater than any of the values in node_indices.
node_indices : I_t pointer
Pointer to a 1D array of length n_points. This lists the indices of
each of the points within the current node. This will be modified
in-place.
split_dim : int
the dimension on which to split. This will usually be computed via
the routine ``find_node_split_dim``.
split_index : int
the index within node_indices around which to split the points.
n_features: int
the number of features (i.e columns) in the 2D array pointed by data.
n_points : int
the length of node_indices. This is also the number of points in
the original dataset.
Returns
-------
status : int
integer exit status. On return, the contents of node_indices are
modified as noted above.
"""
partition_node_indices_inner
(
data
,
node_indices
,
split_dim
,
split_index
,
n_features
,
n_points
)
return
0
cdef
cypclass
Node
activable
:
cdef
cypclass
Node
activable
:
"""A KDTree Node"""
"""A KDTree Node"""
...
@@ -36,22 +138,24 @@ cdef cypclass Node activable:
...
@@ -36,22 +138,24 @@ cdef cypclass Node activable:
void
build_node
(
void
build_node
(
self
,
self
,
double
*
points
,
D_t
*
points
,
int
n
,
I_t
*
indices
,
int
depth
,
I_t
n
,
int
dims
,
I_t
depth
,
int
dim_for_split
I_t
n_dims
,
I_t
dim
):
):
cdef
in
t
i
cdef
I_
t
i
if
(
depth
<
0
):
if
(
depth
<
0
):
return
return
printf
(
"Depth %d
\
n
"
,
depth
)
printf
(
"Depth %d
\
n
"
,
depth
)
printf
(
"Dim %d
\
n
"
,
dim_for_split
)
printf
(
"Number of dimensions %d
\
n
"
,
n_dims
)
printf
(
"Splitting dimension %d
\
n
"
,
dim
)
for
i
in
range
(
n
):
for
i
in
range
(
n
):
printf
(
"X[%d, %d] = %f
\
n
"
,
printf
(
"X[%d, %d] = %f
\
n
"
,
i
,
dim
_for_split
,
i
,
dim
,
points
[
i
*
dims
+
dim_for_split
])
points
[
i
*
n_dims
+
dim
])
printf
(
"
\
n
"
)
printf
(
"
\
n
"
)
# TODO: find a way to partitions indices here
# TODO: find a way to partitions indices here
...
@@ -59,32 +163,33 @@ cdef cypclass Node activable:
...
@@ -59,32 +163,33 @@ cdef cypclass Node activable:
self
.
left
=
activate
(
consume
Node
())
self
.
left
=
activate
(
consume
Node
())
self
.
right
=
activate
(
consume
Node
())
self
.
right
=
activate
(
consume
Node
())
self
.
left
.
build_node
(
NULL
,
points
,
depth
-
1
,
n
,
dims
,
(
dim_for_split
+
self
.
left
.
build_node
(
NULL
,
points
,
indices
,
1
)
n
,
depth
-
1
,
n_dims
,
(
dim
+
1
)
%
n_dims
)
%
dims
)
self
.
right
.
build_node
(
NULL
,
points
,
indices
,
self
.
right
.
build_node
(
NULL
,
points
,
depth
-
1
,
n
,
dims
,
(
dim_for_split
n
,
depth
-
1
,
n_dims
,
(
dim
+
1
)
%
n_dims
)
+
1
%
dims
))
cdef
in
t
start
()
nogil
:
cdef
I_
t
start
()
nogil
:
global
scheduler
global
scheduler
scheduler
=
Scheduler
()
scheduler
=
Scheduler
()
cdef
int
i
cdef
I_t
i
cdef
int
n
=
12
cdef
I_t
n
=
12
cdef
int
d
=
2
cdef
I_t
d
=
2
cdef
I_t
depth
=
5
# TODO: use memory view for convenience
# TODO: use memory view for convenience
# cdef
double
p[12][2]
# cdef
D_t
p[12][2]
# cdef
double
[:, ::1] points_views = p
# cdef
D_t
[:, ::1] points_views = p
# Use Golden Spiral for the layout
# Use Golden Spiral for the layout
cdef
double
golden_ratio
=
(
1
+
5
**
0.5
)
/
2
cdef
D_t
golden_ratio
=
(
1
+
5
**
0.5
)
/
2
cdef
double
*
points
=
<
double
*>
malloc
(
n
*
d
*
sizeof
(
double
))
cdef
D_t
*
points
=
<
D_t
*>
malloc
(
n
*
d
*
sizeof
(
D_t
))
cdef
I_t
*
indices
=
<
I_t
*>
malloc
(
n
*
sizeof
(
I_t
))
for
i
in
range
(
n
):
for
i
in
range
(
n
):
indices
[
i
]
=
i
points
[
i
*
d
]
=
(
i
/
golden_ratio
)
%
1
points
[
i
*
d
]
=
(
i
/
golden_ratio
)
%
1
points
[
i
*
d
+
1
]
=
i
/
n
points
[
i
*
d
+
1
]
=
i
/
n
printf
(
"Before
\
n
"
)
printf
(
"Before
\
n
"
)
node
=
consume
Node
()
node
=
consume
Node
()
...
@@ -92,11 +197,12 @@ cdef int start() nogil:
...
@@ -92,11 +197,12 @@ cdef int start() nogil:
return
-
1
return
-
1
root
=
activate
(
consume
node
)
root
=
activate
(
consume
node
)
root
.
build_node
(
NULL
,
points
,
n
,
5
,
d
,
0
)
root
.
build_node
(
NULL
,
points
,
indices
,
n
,
depth
,
d
,
0
)
scheduler
.
finish
()
scheduler
.
finish
()
del
scheduler
del
scheduler
free
(
points
)
free
(
points
)
free
(
indices
)
return
0
return
0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment