Commit a5bb04be authored by Julien Jerphanion's avatar Julien Jerphanion

[WIP] Implementing pruning

This won't C++ compile because of problem
with types. I think that we need to work
on the type system first, allowing treating
such cases here.
parent ad5a2d36
......@@ -8,9 +8,10 @@ np.import_array()
from runtime.runtime cimport BatchMailBox, NullResult, Scheduler, WaitResult
from libc.stdio cimport printf
from libc.stdlib cimport malloc, free
from libc.math cimport fmin, fmax, fabs
from libcpp.vector cimport vector
from openmp cimport omp_get_max_threads
from cython.parallel import prange
## Types declaration
......@@ -251,6 +252,9 @@ cdef cypclass NeighborsHeap activable:
void __dealloc__(self):
free(self._distances)
void increment_counter(self, I_t val):
self._n_pushes += val
void push(self, I_t row, D_t val, I_t i_val):
"""push (val, i_val) into the given row"""
cdef:
......@@ -302,7 +306,6 @@ cdef cypclass NeighborsHeap activable:
distances[i] = val
indices[i] = i_val
void sort(self):
# NOTE: Ideally we could sort results in parallel, but
# OpenMP threadpool and this runtime's aren't working
......@@ -322,6 +325,10 @@ cdef cypclass NeighborsHeap activable:
int n_pushes(self):
return self._n_pushes
int is_larger(self, I_t row, D_t val):
# TODO: support for double via promises?
return 1 if val > self._distances[row * self._n_nbrs] else 0
int is_sorted(self):
# TODO: is there a support for returning bool via
# promises? As of now returning a int for making
......@@ -357,9 +364,13 @@ cdef cypclass Node activable:
bint _is_leaf
active Node _top
active Node _left
active Node _right
vector[D_t] _lower_bounds
vector[D_t] _upper_bounds
__init__(self):
self._active_result_class = WaitResult.construct
self._active_queue_class = consume BatchMailBox(scheduler)
......@@ -372,6 +383,7 @@ cdef cypclass Node activable:
# because __init__ can't be reified.
void build_node(
self,
active Node top,
D_t * data_ptr,
I_t * indices_ptr,
I_t leaf_size,
......@@ -381,9 +393,13 @@ cdef cypclass Node activable:
I_t end,
active Counter counter,
):
cdef I_t i
cdef I_t next_dim = (dim + 1) % n_dims
cdef I_t mid = (start + end) // 2
cdef:
I_t k
I_t next_dim = (dim + 1) % n_dims
I_t mid = (start + end) // 2
D_t * point
self._top = top
self._data_ptr = data_ptr
self._indices_ptr = indices_ptr
......@@ -393,8 +409,25 @@ cdef cypclass Node activable:
self._start = start
self._end = end
self._upper_bounds.assign(self._n_dims, -INF)
self._lower_bounds.assign(self._n_dims, INF)
if (end - start <= leaf_size):
self._is_leaf = True
# Compute the actual data range to be able to prune then
for i in range(self._start + 1, self._end):
point = self._data_ptr + self._indices_ptr[i] * self._n_dims
for k in range(self._n_dims):
self._lower_bounds[k] = fmin(self._lower_bounds[k], point[k])
self._upper_bounds[k] = fmax(self._upper_bounds[k], point[k])
if self._top is not NULL and self._top is not self:
# Notifying the top Node of changes
for k in range(self._n_dims):
self._top._notify_lower_bound(NULL, k, self._lower_bounds[k])
self._top._notify_upper_bound(NULL, k, self._upper_bounds[k])
# Adding to the global counter the number
# of samples the leaf is responsible of
counter.add(NULL, end - start)
......@@ -408,44 +441,70 @@ cdef cypclass Node activable:
self._left = consume Node()
self._right = consume Node()
# Recursing on both partition
self._left.build_node(NULL,
# Recursing on both partitions
self._left.build_node(NULL, consume self,
data_ptr, indices_ptr,
leaf_size, n_dims, next_dim,
start, mid, counter)
self._right.build_node(NULL,
self._right.build_node(NULL, consume self,
data_ptr, indices_ptr,
leaf_size, n_dims, next_dim,
mid, end, counter)
void _notify_lower_bound(self, I_t dim, D_t value):
if self._lower_bounds[dim] > value:
self._lower_bounds[dim] = value
if self._top is not NULL and self._top is not self:
self._top._notify_lower_bound(NULL, dim, value)
void _notify_upper_bound(self, I_t dim, D_t value):
if self._upper_bounds[dim] < value:
self._upper_bounds[dim] = value
if self._top is not NULL and self._top is not self:
self._top._notify_upper_bound(NULL, dim, value)
void query(self,
D_t * query_points,
I_t i,
active NeighborsHeap heaps,
D_t * query_points,
I_t i,
active NeighborsHeap heaps,
):
cdef:
I_t j, k
D_t dist
D_t tmp
I_t k
D_t dist = 0.0
D_t * point = query_points + i * self._n_dims
D_t d, d_lo, d_hi
# Pruning strategy: we don't explore the node
# if it is outside the bounds
for k in range(self._n_dims):
d_lo = self._lower_bounds[k] - point[k]
d_hi = point[k] - self._upper_bounds[k]
# Here we use the fact that:
# x + abs(x) = 2 * max(x, 0)
d = (d_lo + fabs(d_lo)) + (d_hi + fabs(d_hi))
dist += 0.5 * d * d
if heaps.is_larger(NULL, i, dist).getIntResult():
# We dont't explore this node here
heaps.increment_counter(NULL, self._end - self._start)
return
if self._is_leaf:
# Computing all the euclideans distances here
for j in range(self._start, self._end):
dist = 0
for k in range(self._n_dims):
tmp = (
query_points[i * self._n_dims + k] -
d = (
point[k] -
self._data_ptr[self._indices_ptr[j] * self._n_dims + k]
)
dist += tmp * tmp
dist += d * d
# The heap is doing the smart work of keeping
# the closest points for each query point i
heaps.push(NULL, i, dist, self._indices_ptr[j])
return
# TODO: one can implement a pruning strategy here
self._left.query(NULL, query_points, i, heaps)
self._right.query(NULL, query_points, i, heaps)
......@@ -512,8 +571,11 @@ cdef cypclass KDTree:
# which we aren't using using here, hence the extra NULL.
#
# Also using this separate method allowing using actors
# because __init__ can't be reified.
self._root.build_node(NULL,
# because __init__ can't be reified.
self._root.build_node(NULL, consume self._root,
# TODO: there's a problem of types here (^) at C++
# compile time. Probably, we need the new type system
# before pursuing.
self._data_ptr,
self._indices_ptr,
self._leaf_size, n_dims=self._d,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment