Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython_plus_experiments
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Julien Jerphanion
cython_plus_experiments
Commits
b14cba17
Commit
b14cba17
authored
Jun 28, 2021
by
Julien Jerphanion
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add setup for reporting results
parent
4d309ddd
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
93 additions
and
63 deletions
+93
-63
kdtree/Makefile
kdtree/Makefile
+6
-0
kdtree/benchmarks/benchmark.py
kdtree/benchmarks/benchmark.py
+8
-63
kdtree/benchmarks/report.py
kdtree/benchmarks/report.py
+79
-0
No files found.
kdtree/Makefile
View file @
b14cba17
...
@@ -78,6 +78,12 @@ benchmark:
...
@@ -78,6 +78,12 @@ benchmark:
for
i
in
{
0..5
}
;
do
\
for
i
in
{
0..5
}
;
do
\
taskset
-c
0-
$$
((
2
**
i-1
))
${PYTHON_EXECUTABLE}
benchmarks/benchmark.py
`
git rev-parse
--short
HEAD
`
_
$$
((
2
**
i-1
))
_thread
;
\
taskset
-c
0-
$$
((
2
**
i-1
))
${PYTHON_EXECUTABLE}
benchmarks/benchmark.py
`
git rev-parse
--short
HEAD
`
_
$$
((
2
**
i-1
))
_thread
;
\
done
done
${PYTHON_EXECUTABLE}
benchmarks/report.py
`
git rev-parse
--short
HEAD
`
## report: Report benchmark results
.PHONY
:
report
report
:
${PYTHON_EXECUTABLE}
benchmarks/report.py
`
git rev-parse
--short
HEAD
`
## test: Launch all the test.
## test: Launch all the test.
.PHONY
:
test
.PHONY
:
test
...
...
kdtree/benchmarks/benchmark.py
View file @
b14cba17
import
argparse
import
argparse
import
glob
import
glob
import
importlib
import
json
import
json
import
os
import
os
import
sys
import
subprocess
import
subprocess
import
time
import
time
import
kdtree
import
kdtree
...
@@ -17,6 +17,7 @@ from matplotlib import pyplot as plt
...
@@ -17,6 +17,7 @@ from matplotlib import pyplot as plt
from
memory_profiler
import
memory_usage
from
memory_profiler
import
memory_usage
from
sklearn
import
set_config
from
sklearn
import
set_config
from
sklearn.neighbors
import
KDTree
from
sklearn.neighbors
import
KDTree
from
sklearn.utils._openmp_helpers
import
_openmp_effective_n_threads
# Be gentle with eyes
# Be gentle with eyes
...
@@ -34,6 +35,8 @@ def benchmark(config, results_folder, bench_name):
...
@@ -34,6 +35,8 @@ def benchmark(config, results_folder, bench_name):
one_GiB
=
1e9
one_GiB
=
1e9
benchmarks
=
pd
.
DataFrame
()
benchmarks
=
pd
.
DataFrame
()
n_threads
=
_openmp_effective_n_threads
()
env_specs_file
=
f"
{
results_folder
}
/
{
bench_name
}
.json"
env_specs_file
=
f"
{
results_folder
}
/
{
bench_name
}
.json"
# TODO: This is ugly, but I haven't found something better.
# TODO: This is ugly, but I haven't found something better.
...
@@ -47,6 +50,7 @@ def benchmark(config, results_folder, bench_name):
...
@@ -47,6 +50,7 @@ def benchmark(config, results_folder, bench_name):
threadpool_info
=
threadpoolctl
.
threadpool_info
(),
threadpool_info
=
threadpoolctl
.
threadpool_info
(),
commit
=
commit
,
commit
=
commit
,
config
=
config
,
config
=
config
,
n_threads
=
n_threads
,
)
)
set_config
(
assume_finite
=
True
)
set_config
(
assume_finite
=
True
)
...
@@ -73,6 +77,7 @@ def benchmark(config, results_folder, bench_name):
...
@@ -73,6 +77,7 @@ def benchmark(config, results_folder, bench_name):
trial
=
trial
,
trial
=
trial
,
func
=
"init"
,
func
=
"init"
,
implementation
=
"sklearn"
,
implementation
=
"sklearn"
,
n_threads
=
n_threads
,
leaf_size
=
leaf_size
,
leaf_size
=
leaf_size
,
n_samples_train
=
ns_train
,
n_samples_train
=
ns_train
,
n_samples_test
=
ns_test
,
n_samples_test
=
ns_test
,
...
@@ -95,6 +100,7 @@ def benchmark(config, results_folder, bench_name):
...
@@ -95,6 +100,7 @@ def benchmark(config, results_folder, bench_name):
trial
=
trial
,
trial
=
trial
,
func
=
"init"
,
func
=
"init"
,
implementation
=
"kdtree"
,
implementation
=
"kdtree"
,
n_threads
=
n_threads
,
leaf_size
=
leaf_size
,
leaf_size
=
leaf_size
,
n_samples_train
=
ns_train
,
n_samples_train
=
ns_train
,
n_samples_test
=
ns_test
,
n_samples_test
=
ns_test
,
...
@@ -176,63 +182,6 @@ def benchmark(config, results_folder, bench_name):
...
@@ -176,63 +182,6 @@ def benchmark(config, results_folder, bench_name):
json
.
dump
(
env_specs
,
outfile
)
json
.
dump
(
env_specs
,
outfile
)
def
report
(
results_folder
,
bench_name
):
df
=
pd
.
read_csv
(
glob
.
glob
(
f"
{
results_folder
}
/*.csv"
)[
0
])
with
open
(
glob
.
glob
(
f"
{
results_folder
}
/*.json"
)[
0
],
"r"
)
as
json_file
:
env_specs
=
json
.
load
(
json_file
)
cols
=
[
"n_samples_train"
,
"n_features"
,
"leaf_size"
,
]
df
[
cols
]
=
df
[
cols
].
astype
(
np
.
uint32
)
df
[
'd'
]
=
df
.
n_features
.
apply
(
str
)
df
[
'leaf'
]
=
df
.
leaf_size
.
apply
(
str
)
df_grouped
=
df
.
groupby
(
cols
)
for
i
,
(
vals
,
df
)
in
enumerate
(
df_grouped
):
# 16:9 ratio
fig
=
plt
.
figure
(
figsize
=
(
24
,
13.5
))
ax
=
plt
.
gca
()
splot
=
sns
.
barplot
(
y
=
"leaf"
,
x
=
"throughput"
,
hue
=
"implementation"
,
data
=
df
,
ax
=
ax
)
_
=
ax
.
set_xlabel
(
"Throughput (in GB/s)"
)
_
=
ax
.
set_ylabel
(
"Leaf Size"
)
_
=
ax
.
tick_params
(
labelrotation
=
45
)
# Adding the numerical values of "x" to bar
for
p
in
splot
.
patches
:
_
=
splot
.
annotate
(
f"
{
p
.
get_width
():.
4
e
}
"
,
(
p
.
get_width
(),
p
.
get_y
()
+
p
.
get_height
()
/
2
),
ha
=
"center"
,
va
=
"center"
,
size
=
10
,
xytext
=
(
0
,
-
12
),
textcoords
=
"offset points"
,
)
title
=
(
f"KDTree@
{
env_specs
[
'commit'
]
}
- "
f"Euclidean Distance, dtype=np.float64,
{
df
.
trial
.
max
()
+
1
}
trials - Bench. Name:
{
bench_name
}\
n
"
)
title
+=
(
"n_samples_train=%s - n_features=%s - leaf_size=%s"
%
vals
)
_
=
fig
.
suptitle
(
title
,
fontsize
=
16
)
plt
.
savefig
(
f"
{
results_folder
}
/
{
bench_name
}
_
{
i
}
.pdf"
,
bbox_inches
=
"tight"
)
# Unifying pdf files into one
pdf_files
=
sorted
(
glob
.
glob
(
f"
{
results_folder
}
/
{
bench_name
}
*.pdf"
))
subprocess
.
check_output
(
[
"pdfunite"
,
*
pdf_files
,
f"
{
results_folder
}
/
{
bench_name
}
.pdf"
]
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
(
"benchmark"
)
parser
=
argparse
.
ArgumentParser
(
"benchmark"
)
...
@@ -249,8 +198,4 @@ if __name__ == "__main__":
...
@@ -249,8 +198,4 @@ if __name__ == "__main__":
print
(
f"Benchmarking
{
bench_name
}
"
)
print
(
f"Benchmarking
{
bench_name
}
"
)
benchmark
(
config
,
results_folder
,
bench_name
)
benchmark
(
config
,
results_folder
,
bench_name
)
print
(
f"Benchmark results wrote in
{
results_folder
}
"
)
print
(
f"Benchmark results wrote in
{
results_folder
}
"
)
\ No newline at end of file
print
(
f"Reporting results for
{
bench_name
}
"
)
report
(
results_folder
,
bench_name
)
print
(
f"Reporting results wrote in
{
results_folder
}
"
)
\ No newline at end of file
kdtree/benchmarks/report.py
0 → 100644
View file @
b14cba17
import
os
import
argparse
import
numpy
as
np
import
glob
import
subprocess
import
seaborn
as
sns
import
pandas
as
pd
import
matplotlib.pyplot
as
plt
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
(
"report"
)
parser
.
add_argument
(
"commit"
)
args
=
parser
.
parse_args
()
results_folder
=
os
.
path
.
abspath
(
os
.
path
.
join
(
__file__
,
os
.
pardir
,
"results"
))
commit
=
args
.
commit
def
n_threads
(
filename
):
# Extracts '2742685_0_thread.csv'
basename
=
os
.
path
.
basename
(
filename
)
return
int
(
basename
.
split
(
"_"
)[
1
])
csv_bench_results
=
sorted
(
glob
.
glob
(
f"results/
{
commit
}
*/*.csv"
),
key
=
n_threads
)
if
len
(
csv_bench_results
)
==
0
:
raise
RuntimeError
(
f"No results for commit
{
commit
}
"
)
commit_result_folder
=
f"
{
results_folder
}
/
{
commit
}
"
os
.
makedirs
(
commit_result_folder
,
exist_ok
=
True
)
df
=
pd
.
concat
(
map
(
pd
.
read_csv
,
csv_bench_results
))
df
=
df
.
drop
(
columns
=
[
"n_neighbors"
,
"func"
])
cols
=
[
"n_samples_train"
,
"n_samples_test"
,
"n_features"
,
"leaf_size"
,
]
# This creates a category used for grouping
df
[
't'
]
=
df
.
n_threads
.
apply
(
str
)
df_grouped
=
df
.
groupby
(
cols
)
for
i
,
(
vals
,
df_g
)
in
enumerate
(
df_grouped
):
# 16:9 ratio
fig
=
plt
.
figure
(
figsize
=
(
24
,
13.5
))
ax
=
plt
.
gca
()
splot
=
sns
.
barplot
(
y
=
"t"
,
x
=
"throughput"
,
hue
=
"implementation"
,
data
=
df_g
,
ax
=
ax
)
_
=
ax
.
set_xlabel
(
"Throughput (in GB/s)"
)
_
=
ax
.
set_ylabel
(
"Number of threads"
)
_
=
ax
.
tick_params
(
labelrotation
=
45
)
# Adding the numerical values of "x" to bar
for
p
in
splot
.
patches
:
_
=
splot
.
annotate
(
f"
{
p
.
get_width
():.
4
e
}
"
,
(
p
.
get_width
(),
p
.
get_y
()
+
p
.
get_height
()
/
2
),
ha
=
"center"
,
va
=
"center"
,
size
=
10
,
xytext
=
(
0
,
-
12
),
textcoords
=
"offset points"
,
)
title
=
(
f"KDTree.__init__@
{
commit
}
- "
f"Euclidean Distance, dtype=np.float64,
{
df_g
.
trial
.
max
()
+
1
}
trials
\
n
"
)
title
+=
(
"n_samples_train=%s - n_samples_test=%s - "
"n_features=%s - leaf_size=%s"
%
vals
)
_
=
fig
.
suptitle
(
title
,
fontsize
=
16
)
plt
.
savefig
(
f"
{
commit_result_folder
}
/
{
i
}
.pdf"
,
bbox_inches
=
"tight"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment