Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython_plus_experiments
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Julien Jerphanion
cython_plus_experiments
Commits
32e855bc
Commit
32e855bc
authored
Jun 11, 2021
by
Julien Jerphanion
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Update benchmark
With some uggly duplication because we just want a quick test.
parent
e8d90ccd
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
80 additions
and
18 deletions
+80
-18
kdtree/benchmarks/bench.py
kdtree/benchmarks/bench.py
+69
-5
kdtree/benchmarks/config.yml
kdtree/benchmarks/config.yml
+11
-13
No files found.
kdtree/benchmarks/bench.py
View file @
32e855bc
...
...
@@ -27,6 +27,7 @@ def benchmark(config, results_folder, bench_name):
datasets
=
config
[
"datasets"
]
estimators
=
config
[
"estimators"
]
leaf_sizes
=
config
[
"leaf_sizes"
]
n_neighbors
=
config
[
"n_neighbors"
]
n_trials
=
config
.
get
(
"n_trials"
,
3
)
return_distance
=
config
.
get
(
"return_distance"
,
False
)
...
...
@@ -57,23 +58,28 @@ def benchmark(config, results_folder, bench_name):
for
leaf_size
in
leaf_sizes
:
for
trial
in
range
(
n_trials
):
dataset
=
{
k
:
int
(
float
(
v
))
for
k
,
v
in
dataset
.
items
()}
ns_train
,
n_features
=
dataset
.
values
()
ns_train
,
n
s_test
,
n
_features
=
dataset
.
values
()
X_train
=
np
.
random
.
rand
(
ns_train
,
n_features
)
bytes_processed_data
=
X_train
.
nbytes
X_test
=
np
.
random
.
rand
(
ns_test
,
n_features
)
bytes_processed_data_init
=
X_train
.
nbytes
bytes_processed_data_query
=
X_test
.
nbytes
t0_
=
time
.
perf_counter
()
tree
=
KDTree
(
X_train
,
leaf_size
=
256
)
sk_
tree
=
KDTree
(
X_train
,
leaf_size
=
256
)
t1_
=
time
.
perf_counter
()
time_elapsed
=
round
(
t1_
-
t0_
,
5
)
row
=
dict
(
trial
=
trial
,
func
=
"init"
,
implementation
=
"sklearn"
,
leaf_size
=
leaf_size
,
n_samples_train
=
ns_train
,
n_samples_test
=
ns_test
,
n_features
=
n_features
,
n_neighbors
=
np
.
nan
,
time_elapsed
=
time_elapsed
,
throughput
=
bytes_processed_data
/
time_elapsed
/
one_GiB
,
throughput
=
bytes_processed_data
_init
/
time_elapsed
/
one_GiB
,
)
benchmarks
=
benchmarks
.
append
(
row
,
ignore_index
=
True
)
...
...
@@ -87,12 +93,15 @@ def benchmark(config, results_folder, bench_name):
row
=
dict
(
trial
=
trial
,
func
=
"init"
,
implementation
=
"kdtree"
,
leaf_size
=
leaf_size
,
n_samples_train
=
ns_train
,
n_samples_test
=
ns_test
,
n_features
=
n_features
,
n_neighbors
=
np
.
nan
,
time_elapsed
=
time_elapsed
,
throughput
=
bytes_processed_data
/
time_elapsed
/
one_GiB
,
throughput
=
bytes_processed_data
_init
/
time_elapsed
/
one_GiB
,
)
benchmarks
=
benchmarks
.
append
(
row
,
ignore_index
=
True
)
...
...
@@ -104,6 +113,61 @@ def benchmark(config, results_folder, bench_name):
mode
=
"w+"
,
index
=
False
,
)
for
k
in
n_neighbors
:
t0_
=
time
.
perf_counter
()
sk_tree
.
query
(
X_test
,
k
=
k
,
return_distance
=
False
)
t1_
=
time
.
perf_counter
()
time_elapsed
=
round
(
t1_
-
t0_
,
5
)
row
=
dict
(
trial
=
trial
,
func
=
"query"
,
implementation
=
"sklearn"
,
leaf_size
=
leaf_size
,
n_samples_train
=
ns_train
,
n_samples_test
=
ns_test
,
n_features
=
n_features
,
n_neighbors
=
k
,
time_elapsed
=
time_elapsed
,
throughput
=
bytes_processed_data_query
/
time_elapsed
/
one_GiB
,
)
benchmarks
=
benchmarks
.
append
(
row
,
ignore_index
=
True
)
pprint
(
row
)
print
(
"---"
)
closests
=
np
.
zeros
((
ns_test
,
k
),
dtype
=
np
.
int32
)
t0_
=
time
.
perf_counter
()
tree
.
query
(
X_test
,
closests
)
t1_
=
time
.
perf_counter
()
time_elapsed
=
round
(
t1_
-
t0_
,
5
)
row
=
dict
(
trial
=
trial
,
func
=
"query"
,
implementation
=
"kdtree"
,
leaf_size
=
leaf_size
,
n_samples_train
=
ns_train
,
n_samples_test
=
ns_test
,
n_features
=
n_features
,
n_neighbors
=
k
,
time_elapsed
=
time_elapsed
,
throughput
=
bytes_processed_data_query
/
time_elapsed
/
one_GiB
,
)
benchmarks
=
benchmarks
.
append
(
row
,
ignore_index
=
True
)
pprint
(
row
)
print
(
"---"
)
benchmarks
.
to_csv
(
f"
{
results_folder
}
/
{
bench_name
}
.csv"
,
mode
=
"w+"
,
index
=
False
,
)
# Overriding again now that all the dyn. lib. have been loaded
env_specs
[
"threadpool_info"
]
=
threadpoolctl
.
threadpool_info
()
...
...
kdtree/benchmarks/config.yml
View file @
32e855bc
...
...
@@ -7,22 +7,15 @@ estimators:
n_trials
:
1
datasets
:
-
n_samples_train
:
1e3
n_features
:
10
-
n_samples_train
:
1e3
n_features
:
50
-
n_samples_train
:
1e3
n_features
:
100
-
n_samples_train
:
1e3
n_features
:
1000
-
n_samples_train
:
1e6
n_samples_test
:
1e4
n_features
:
5
-
n_samples_train
:
1e6
n_samples_test
:
1e4
n_features
:
10
-
n_samples_train
:
1e6
n_samples_test
:
1e4
n_features
:
50
-
n_samples_train
:
1e6
n_features
:
100
leaf_sizes
:
-
64
...
...
@@ -30,4 +23,9 @@ leaf_sizes:
-
256
-
512
-
1024
-
2048
\ No newline at end of file
-
2048
n_neighbors
:
-
1
-
10
-
100
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment