Commit d49871ec authored by Julien Jerphanion's avatar Julien Jerphanion

[WIP] Adapt querying logic

This change the logic to query the tree for nearest neighbors.
Start with a simple sequential query for each point.
parent 2b26f7d8
......@@ -254,7 +254,7 @@ cdef void _simultaneous_sort(
size - pivot_idx - 1)
cdef cypclass NeighborsHeap activable:
cdef cypclass NeighborsHeaps activable:
"""A max-heap structure to keep track of distances/indices of neighbors
This implements an efficient pre-allocated set of fixed-size heaps
......@@ -392,8 +392,6 @@ cdef cypclass Node activable:
Leafs are terminal Nodes and do not have children.
"""
# KDTree pointer reference
# TODO: can those be made class attributes?
NodeData_t *_node_data_ptr
D_t * _node_bounds_ptr
......@@ -462,39 +460,6 @@ cdef cypclass Node activable:
leaf_size, n_dims, next_dim,
mid, end, counter)
void query(self,
D_t * query_points,
I_t i,
active NeighborsHeap heaps,
):
cdef:
I_t j, k
D_t dist
D_t tmp
if True: #self._is_leaf:
# Computing all the euclideans distances here.
# for j in range(self._start, self._end):
# dist = 0
# for k in range(self._n_dims):
# tmp = (
# query_points[i * self._n_dims + k] -
# self._data_ptr[self._indices_ptr[j] * self._n_dims + k]
# )
# dist += tmp * tmp
#
#
# # The heap is responsible for keeping the closest
# # points for each query point i.
# heaps.push(NULL, i, dist, self._indices_ptr[j])
#
# return
return
# TODO: one can implement a pruning strategy here.
self._left.query(NULL, query_points, i, heaps)
self._right.query(NULL, query_points, i, heaps)
cdef cypclass KDTree:
"""A KDTree based on asynchronous and parallel computations.
......@@ -574,12 +539,18 @@ cdef cypclass KDTree:
#
# Also using this separate method allowing using actors
# because __init__ can't be reified.
self._root.build_node(NULL, 0,
self._data_ptr,
self._indices_ptr,
self._leaf_size, n_dims=self._d,
dim=0, start=0, end=self._n,
counter=counter)
self._root.build_node(
NULL,
0,
self._data_ptr,
self._indices_ptr,
self._leaf_size,
n_dims=self._d,
dim=0,
start=0,
end=self._n,
counter=counter,
)
# Waiting for the tree construction to end
# Somewhat similar to a thread barrier
......@@ -596,28 +567,81 @@ cdef cypclass KDTree:
free(self._node_bounds_ptr)
cdef int _query_single_depthfirst(self,
ITYPE_t i_node,
DTYPE_t* pt,
ITYPE_t i_pt,
NeighborsHeaps heaps,
DTYPE_t reduced_dist_LB,
) nogil except -1:
"""Recursive Single-tree k-neighbors query, depth-first approach"""
cdef NodeData_t node_info = self._node_data_ptr[i_node]
cdef DTYPE_t dist_pt, reduced_dist_LB_1, reduced_dist_LB_2
cdef ITYPE_t i, i1, i2
cdef DTYPE_t* data = &self.data[0, 0]
#------------------------------------------------------------
# Case 1: query point is outside node radius:
# trim it from the query
if reduced_dist_LB > heaps.largest(i_pt):
pass
#------------------------------------------------------------
# Case 2: this is a leaf node. Update set of nearby points
elif node_info.is_leaf:
for i in range(node_info.idx_start, node_info.idx_end):
dist_pt = sqeuclidean_dist(
pt,
&self.data[self.idx_array[i], 0],
self.data.shape[1]
)
heaps._push(i_pt, dist_pt, self.idx_array[i])
#------------------------------------------------------------
# Case 3: Node is not a leaf. Recursively query subnodes
# starting with the closest
else:
i1 = 2 * i_node + 1
i2 = i1 + 1
reduced_dist_LB_1 = min_rdist(self, i1, pt)
reduced_dist_LB_2 = min_rdist(self, i2, pt)
# recursively query subnodes
if reduced_dist_LB_1 <= reduced_dist_LB_2:
self._query_single_depthfirst(i1, pt, i_pt, heaps, reduced_dist_LB_1)
self._query_single_depthfirst(i2, pt, i_pt, heaps, reduced_dist_LB_2)
else:
self._query_single_depthfirst(i2, pt, i_pt, heaps, reduced_dist_LB_2)
self._query_single_depthfirst(i1, pt, i_pt, heaps, reduced_dist_LB_1)
return 0
void query(self,
np.ndarray query_points, # IN
np.ndarray closests, # OUT
):
np.ndarray query_points, # IN
np.ndarray closests, # OUT
):
cdef:
I_t completed_queries = 0
I_t i
I_t n_query = query_points.shape[0]
I_t n_features = query_points.shape[1]
I_t n_neighbors = closests.shape[1]
I_t total_n_pushes = n_query * self._n
D_t reduced_dist_LB
D_t * query_point = query_points.data
active NeighborsHeap heaps
active NeighborsHeaps heaps
heaps = consume NeighborsHeap(<I_t *> closests.data,
n_query,
n_neighbors)
heaps = consume NeighborsHeaps(<I_t *> closests.data,
n_query,
n_neighbors)
for i in range(n_query):
self._root.query(NULL, <D_t *> query_points.data, i, heaps)
while(completed_queries < total_n_pushes):
completed_queries = heaps.n_pushes(NULL).getIntResult()
for i in range(Xarr.shape[0]):
rdist_lower_bound = min_rdist(self, 0, query_point)
_query_single_depthfirst(0, pt, i, heaps, rdist_lower_bound)
query_point += n_features
heaps.sort(NULL)
......@@ -625,6 +649,30 @@ cdef cypclass KDTree:
pass
cdef inline DTYPE_t min_rdist(
KDTree tree,
ITYPE_t i_node,
DTYPE_t* pt,
) nogil except -1:
"""Compute the minimum reduced-distance between a point and a node"""
cdef ITYPE_t n_features = tree.data.shape[1]
cdef DTYPE_t d, d_lo, d_hi, rdist=0.0
cdef ITYPE_t j
for j in range(n_features):
d_lo = tree._node_bounds_ptr[0, i_node, j] - pt[j]
d_hi = pt[j] - tree._node_bounds_ptr[1, i_node, j]
# We use the following identity:
#
# 0.5 * (x + abs(x) = max(x, 0)
#
# twice.
d = 0.5 (d_lo + fabs(d_lo)) + (d_hi + fabs(d_hi))
rdist += d ** 2
return rdist
cdef public int main() nogil:
# Entry point for the compiled binary file
printf("empty public int main() nogil:")
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment