Commit b14cba17 authored by Julien Jerphanion's avatar Julien Jerphanion

Add setup for reporting results

parent 4d309ddd
...@@ -78,6 +78,12 @@ benchmark: ...@@ -78,6 +78,12 @@ benchmark:
for i in {0..5}; do \ for i in {0..5}; do \
taskset -c 0-$$((2**i-1)) ${PYTHON_EXECUTABLE} benchmarks/benchmark.py `git rev-parse --short HEAD`_$$((2**i-1))_thread ;\ taskset -c 0-$$((2**i-1)) ${PYTHON_EXECUTABLE} benchmarks/benchmark.py `git rev-parse --short HEAD`_$$((2**i-1))_thread ;\
done done
${PYTHON_EXECUTABLE} benchmarks/report.py `git rev-parse --short HEAD`
## report: Report benchmark results
.PHONY: report
report:
${PYTHON_EXECUTABLE} benchmarks/report.py `git rev-parse --short HEAD`
## test: Launch all the test. ## test: Launch all the test.
.PHONY: test .PHONY: test
......
import argparse import argparse
import glob import glob
import importlib
import json import json
import os import os
import sys
import subprocess import subprocess
import time import time
import kdtree import kdtree
...@@ -17,6 +17,7 @@ from matplotlib import pyplot as plt ...@@ -17,6 +17,7 @@ from matplotlib import pyplot as plt
from memory_profiler import memory_usage from memory_profiler import memory_usage
from sklearn import set_config from sklearn import set_config
from sklearn.neighbors import KDTree from sklearn.neighbors import KDTree
from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
# Be gentle with eyes # Be gentle with eyes
...@@ -34,6 +35,8 @@ def benchmark(config, results_folder, bench_name): ...@@ -34,6 +35,8 @@ def benchmark(config, results_folder, bench_name):
one_GiB = 1e9 one_GiB = 1e9
benchmarks = pd.DataFrame() benchmarks = pd.DataFrame()
n_threads = _openmp_effective_n_threads()
env_specs_file = f"{results_folder}/{bench_name}.json" env_specs_file = f"{results_folder}/{bench_name}.json"
# TODO: This is ugly, but I haven't found something better. # TODO: This is ugly, but I haven't found something better.
...@@ -47,6 +50,7 @@ def benchmark(config, results_folder, bench_name): ...@@ -47,6 +50,7 @@ def benchmark(config, results_folder, bench_name):
threadpool_info=threadpoolctl.threadpool_info(), threadpool_info=threadpoolctl.threadpool_info(),
commit=commit, commit=commit,
config=config, config=config,
n_threads=n_threads,
) )
set_config(assume_finite=True) set_config(assume_finite=True)
...@@ -73,6 +77,7 @@ def benchmark(config, results_folder, bench_name): ...@@ -73,6 +77,7 @@ def benchmark(config, results_folder, bench_name):
trial=trial, trial=trial,
func="init", func="init",
implementation="sklearn", implementation="sklearn",
n_threads=n_threads,
leaf_size=leaf_size, leaf_size=leaf_size,
n_samples_train=ns_train, n_samples_train=ns_train,
n_samples_test=ns_test, n_samples_test=ns_test,
...@@ -95,6 +100,7 @@ def benchmark(config, results_folder, bench_name): ...@@ -95,6 +100,7 @@ def benchmark(config, results_folder, bench_name):
trial=trial, trial=trial,
func="init", func="init",
implementation="kdtree", implementation="kdtree",
n_threads=n_threads,
leaf_size=leaf_size, leaf_size=leaf_size,
n_samples_train=ns_train, n_samples_train=ns_train,
n_samples_test=ns_test, n_samples_test=ns_test,
...@@ -176,63 +182,6 @@ def benchmark(config, results_folder, bench_name): ...@@ -176,63 +182,6 @@ def benchmark(config, results_folder, bench_name):
json.dump(env_specs, outfile) json.dump(env_specs, outfile)
def report(results_folder, bench_name):
df = pd.read_csv(glob.glob(f"{results_folder}/*.csv")[0])
with open(glob.glob(f"{results_folder}/*.json")[0], "r") as json_file:
env_specs = json.load(json_file)
cols = [
"n_samples_train",
"n_features",
"leaf_size",
]
df[cols] = df[cols].astype(np.uint32)
df['d'] = df.n_features.apply(str)
df['leaf'] = df.leaf_size.apply(str)
df_grouped = df.groupby(cols)
for i, (vals, df) in enumerate(df_grouped):
# 16:9 ratio
fig = plt.figure(figsize=(24, 13.5))
ax = plt.gca()
splot = sns.barplot(
y="leaf", x="throughput", hue="implementation", data=df, ax=ax
)
_ = ax.set_xlabel("Throughput (in GB/s)")
_ = ax.set_ylabel("Leaf Size")
_ = ax.tick_params(labelrotation=45)
# Adding the numerical values of "x" to bar
for p in splot.patches:
_ = splot.annotate(
f"{p.get_width():.4e}",
(p.get_width(), p.get_y() + p.get_height() / 2),
ha="center",
va="center",
size=10,
xytext=(0, -12),
textcoords="offset points",
)
title = (
f"KDTree@{env_specs['commit']} - "
f"Euclidean Distance, dtype=np.float64, {df.trial.max() + 1} trials - Bench. Name: {bench_name}\n"
)
title += (
"n_samples_train=%s - n_features=%s - leaf_size=%s" % vals
)
_ = fig.suptitle(title, fontsize=16)
plt.savefig(f"{results_folder}/{bench_name}_{i}.pdf", bbox_inches="tight")
# Unifying pdf files into one
pdf_files = sorted(glob.glob(f"{results_folder}/{bench_name}*.pdf"))
subprocess.check_output(
["pdfunite", *pdf_files, f"{results_folder}/{bench_name}.pdf"]
)
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser("benchmark") parser = argparse.ArgumentParser("benchmark")
...@@ -249,8 +198,4 @@ if __name__ == "__main__": ...@@ -249,8 +198,4 @@ if __name__ == "__main__":
print(f"Benchmarking {bench_name}") print(f"Benchmarking {bench_name}")
benchmark(config, results_folder, bench_name) benchmark(config, results_folder, bench_name)
print(f"Benchmark results wrote in {results_folder}") print(f"Benchmark results wrote in {results_folder}")
\ No newline at end of file
print(f"Reporting results for {bench_name}")
report(results_folder, bench_name)
print(f"Reporting results wrote in {results_folder}")
\ No newline at end of file
import os
import argparse
import numpy as np
import glob
import subprocess
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
if __name__ == "__main__":
parser = argparse.ArgumentParser("report")
parser.add_argument("commit")
args = parser.parse_args()
results_folder = os.path.abspath(os.path.join(__file__, os.pardir, "results"))
commit = args.commit
def n_threads(filename):
# Extracts '2742685_0_thread.csv'
basename = os.path.basename(filename)
return int(basename.split("_")[1])
csv_bench_results = sorted(glob.glob(f"results/{commit}*/*.csv"), key=n_threads)
if len(csv_bench_results) == 0:
raise RuntimeError(f"No results for commit {commit}")
commit_result_folder = f"{results_folder}/{commit}"
os.makedirs(commit_result_folder, exist_ok=True)
df = pd.concat(map(pd.read_csv, csv_bench_results))
df = df.drop(columns=["n_neighbors", "func"])
cols = [
"n_samples_train",
"n_samples_test",
"n_features",
"leaf_size",
]
# This creates a category used for grouping
df['t'] = df.n_threads.apply(str)
df_grouped = df.groupby(cols)
for i, (vals, df_g) in enumerate(df_grouped):
# 16:9 ratio
fig = plt.figure(figsize=(24, 13.5))
ax = plt.gca()
splot = sns.barplot(
y="t", x="throughput", hue="implementation", data=df_g, ax=ax
)
_ = ax.set_xlabel("Throughput (in GB/s)")
_ = ax.set_ylabel("Number of threads")
_ = ax.tick_params(labelrotation=45)
# Adding the numerical values of "x" to bar
for p in splot.patches:
_ = splot.annotate(
f"{p.get_width():.4e}",
(p.get_width(), p.get_y() + p.get_height() / 2),
ha="center",
va="center",
size=10,
xytext=(0, -12),
textcoords="offset points",
)
title = (
f"KDTree.__init__@{commit} - "
f"Euclidean Distance, dtype=np.float64, {df_g.trial.max() + 1} trials\n"
)
title += (
"n_samples_train=%s - n_samples_test=%s - "
"n_features=%s - leaf_size=%s"
% vals
)
_ = fig.suptitle(title, fontsize=16)
plt.savefig(f"{commit_result_folder}/{i}.pdf", bbox_inches="tight")
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment