Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython_plus_experiments
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Julien Jerphanion
cython_plus_experiments
Commits
b14cba17
Commit
b14cba17
authored
Jun 28, 2021
by
Julien Jerphanion
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add setup for reporting results
parent
4d309ddd
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
93 additions
and
63 deletions
+93
-63
kdtree/Makefile
kdtree/Makefile
+6
-0
kdtree/benchmarks/benchmark.py
kdtree/benchmarks/benchmark.py
+8
-63
kdtree/benchmarks/report.py
kdtree/benchmarks/report.py
+79
-0
No files found.
kdtree/Makefile
View file @
b14cba17
...
...
@@ -78,6 +78,12 @@ benchmark:
for
i
in
{
0..5
}
;
do
\
taskset
-c
0-
$$
((
2
**
i-1
))
${PYTHON_EXECUTABLE}
benchmarks/benchmark.py
`
git rev-parse
--short
HEAD
`
_
$$
((
2
**
i-1
))
_thread
;
\
done
${PYTHON_EXECUTABLE}
benchmarks/report.py
`
git rev-parse
--short
HEAD
`
## report: Report benchmark results
.PHONY
:
report
report
:
${PYTHON_EXECUTABLE}
benchmarks/report.py
`
git rev-parse
--short
HEAD
`
## test: Launch all the test.
.PHONY
:
test
...
...
kdtree/benchmarks/benchmark.py
View file @
b14cba17
import
argparse
import
glob
import
importlib
import
json
import
os
import
sys
import
subprocess
import
time
import
kdtree
...
...
@@ -17,6 +17,7 @@ from matplotlib import pyplot as plt
from
memory_profiler
import
memory_usage
from
sklearn
import
set_config
from
sklearn.neighbors
import
KDTree
from
sklearn.utils._openmp_helpers
import
_openmp_effective_n_threads
# Be gentle with eyes
...
...
@@ -34,6 +35,8 @@ def benchmark(config, results_folder, bench_name):
one_GiB
=
1e9
benchmarks
=
pd
.
DataFrame
()
n_threads
=
_openmp_effective_n_threads
()
env_specs_file
=
f"
{
results_folder
}
/
{
bench_name
}
.json"
# TODO: This is ugly, but I haven't found something better.
...
...
@@ -47,6 +50,7 @@ def benchmark(config, results_folder, bench_name):
threadpool_info
=
threadpoolctl
.
threadpool_info
(),
commit
=
commit
,
config
=
config
,
n_threads
=
n_threads
,
)
set_config
(
assume_finite
=
True
)
...
...
@@ -73,6 +77,7 @@ def benchmark(config, results_folder, bench_name):
trial
=
trial
,
func
=
"init"
,
implementation
=
"sklearn"
,
n_threads
=
n_threads
,
leaf_size
=
leaf_size
,
n_samples_train
=
ns_train
,
n_samples_test
=
ns_test
,
...
...
@@ -95,6 +100,7 @@ def benchmark(config, results_folder, bench_name):
trial
=
trial
,
func
=
"init"
,
implementation
=
"kdtree"
,
n_threads
=
n_threads
,
leaf_size
=
leaf_size
,
n_samples_train
=
ns_train
,
n_samples_test
=
ns_test
,
...
...
@@ -176,63 +182,6 @@ def benchmark(config, results_folder, bench_name):
json
.
dump
(
env_specs
,
outfile
)
def
report
(
results_folder
,
bench_name
):
df
=
pd
.
read_csv
(
glob
.
glob
(
f"
{
results_folder
}
/*.csv"
)[
0
])
with
open
(
glob
.
glob
(
f"
{
results_folder
}
/*.json"
)[
0
],
"r"
)
as
json_file
:
env_specs
=
json
.
load
(
json_file
)
cols
=
[
"n_samples_train"
,
"n_features"
,
"leaf_size"
,
]
df
[
cols
]
=
df
[
cols
].
astype
(
np
.
uint32
)
df
[
'd'
]
=
df
.
n_features
.
apply
(
str
)
df
[
'leaf'
]
=
df
.
leaf_size
.
apply
(
str
)
df_grouped
=
df
.
groupby
(
cols
)
for
i
,
(
vals
,
df
)
in
enumerate
(
df_grouped
):
# 16:9 ratio
fig
=
plt
.
figure
(
figsize
=
(
24
,
13.5
))
ax
=
plt
.
gca
()
splot
=
sns
.
barplot
(
y
=
"leaf"
,
x
=
"throughput"
,
hue
=
"implementation"
,
data
=
df
,
ax
=
ax
)
_
=
ax
.
set_xlabel
(
"Throughput (in GB/s)"
)
_
=
ax
.
set_ylabel
(
"Leaf Size"
)
_
=
ax
.
tick_params
(
labelrotation
=
45
)
# Adding the numerical values of "x" to bar
for
p
in
splot
.
patches
:
_
=
splot
.
annotate
(
f"
{
p
.
get_width
():.
4
e
}
"
,
(
p
.
get_width
(),
p
.
get_y
()
+
p
.
get_height
()
/
2
),
ha
=
"center"
,
va
=
"center"
,
size
=
10
,
xytext
=
(
0
,
-
12
),
textcoords
=
"offset points"
,
)
title
=
(
f"KDTree@
{
env_specs
[
'commit'
]
}
- "
f"Euclidean Distance, dtype=np.float64,
{
df
.
trial
.
max
()
+
1
}
trials - Bench. Name:
{
bench_name
}\
n
"
)
title
+=
(
"n_samples_train=%s - n_features=%s - leaf_size=%s"
%
vals
)
_
=
fig
.
suptitle
(
title
,
fontsize
=
16
)
plt
.
savefig
(
f"
{
results_folder
}
/
{
bench_name
}
_
{
i
}
.pdf"
,
bbox_inches
=
"tight"
)
# Unifying pdf files into one
pdf_files
=
sorted
(
glob
.
glob
(
f"
{
results_folder
}
/
{
bench_name
}
*.pdf"
))
subprocess
.
check_output
(
[
"pdfunite"
,
*
pdf_files
,
f"
{
results_folder
}
/
{
bench_name
}
.pdf"
]
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
(
"benchmark"
)
...
...
@@ -249,8 +198,4 @@ if __name__ == "__main__":
print
(
f"Benchmarking
{
bench_name
}
"
)
benchmark
(
config
,
results_folder
,
bench_name
)
print
(
f"Benchmark results wrote in
{
results_folder
}
"
)
print
(
f"Reporting results for
{
bench_name
}
"
)
report
(
results_folder
,
bench_name
)
print
(
f"Reporting results wrote in
{
results_folder
}
"
)
\ No newline at end of file
print
(
f"Benchmark results wrote in
{
results_folder
}
"
)
\ No newline at end of file
kdtree/benchmarks/report.py
0 → 100644
View file @
b14cba17
import
os
import
argparse
import
numpy
as
np
import
glob
import
subprocess
import
seaborn
as
sns
import
pandas
as
pd
import
matplotlib.pyplot
as
plt
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
(
"report"
)
parser
.
add_argument
(
"commit"
)
args
=
parser
.
parse_args
()
results_folder
=
os
.
path
.
abspath
(
os
.
path
.
join
(
__file__
,
os
.
pardir
,
"results"
))
commit
=
args
.
commit
def
n_threads
(
filename
):
# Extracts '2742685_0_thread.csv'
basename
=
os
.
path
.
basename
(
filename
)
return
int
(
basename
.
split
(
"_"
)[
1
])
csv_bench_results
=
sorted
(
glob
.
glob
(
f"results/
{
commit
}
*/*.csv"
),
key
=
n_threads
)
if
len
(
csv_bench_results
)
==
0
:
raise
RuntimeError
(
f"No results for commit
{
commit
}
"
)
commit_result_folder
=
f"
{
results_folder
}
/
{
commit
}
"
os
.
makedirs
(
commit_result_folder
,
exist_ok
=
True
)
df
=
pd
.
concat
(
map
(
pd
.
read_csv
,
csv_bench_results
))
df
=
df
.
drop
(
columns
=
[
"n_neighbors"
,
"func"
])
cols
=
[
"n_samples_train"
,
"n_samples_test"
,
"n_features"
,
"leaf_size"
,
]
# This creates a category used for grouping
df
[
't'
]
=
df
.
n_threads
.
apply
(
str
)
df_grouped
=
df
.
groupby
(
cols
)
for
i
,
(
vals
,
df_g
)
in
enumerate
(
df_grouped
):
# 16:9 ratio
fig
=
plt
.
figure
(
figsize
=
(
24
,
13.5
))
ax
=
plt
.
gca
()
splot
=
sns
.
barplot
(
y
=
"t"
,
x
=
"throughput"
,
hue
=
"implementation"
,
data
=
df_g
,
ax
=
ax
)
_
=
ax
.
set_xlabel
(
"Throughput (in GB/s)"
)
_
=
ax
.
set_ylabel
(
"Number of threads"
)
_
=
ax
.
tick_params
(
labelrotation
=
45
)
# Adding the numerical values of "x" to bar
for
p
in
splot
.
patches
:
_
=
splot
.
annotate
(
f"
{
p
.
get_width
():.
4
e
}
"
,
(
p
.
get_width
(),
p
.
get_y
()
+
p
.
get_height
()
/
2
),
ha
=
"center"
,
va
=
"center"
,
size
=
10
,
xytext
=
(
0
,
-
12
),
textcoords
=
"offset points"
,
)
title
=
(
f"KDTree.__init__@
{
commit
}
- "
f"Euclidean Distance, dtype=np.float64,
{
df_g
.
trial
.
max
()
+
1
}
trials
\
n
"
)
title
+=
(
"n_samples_train=%s - n_samples_test=%s - "
"n_features=%s - leaf_size=%s"
%
vals
)
_
=
fig
.
suptitle
(
title
,
fontsize
=
16
)
plt
.
savefig
(
f"
{
commit_result_folder
}
/
{
i
}
.pdf"
,
bbox_inches
=
"tight"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment