Commit b14cba17 authored by Julien Jerphanion's avatar Julien Jerphanion

Add setup for reporting results

parent 4d309ddd
......@@ -78,6 +78,12 @@ benchmark:
for i in {0..5}; do \
taskset -c 0-$$((2**i-1)) ${PYTHON_EXECUTABLE} benchmarks/benchmark.py `git rev-parse --short HEAD`_$$((2**i-1))_thread ;\
done
${PYTHON_EXECUTABLE} benchmarks/report.py `git rev-parse --short HEAD`
## report: Report benchmark results
.PHONY: report
report:
${PYTHON_EXECUTABLE} benchmarks/report.py `git rev-parse --short HEAD`
## test: Launch all the test.
.PHONY: test
......
import argparse
import glob
import importlib
import json
import os
import sys
import subprocess
import time
import kdtree
......@@ -17,6 +17,7 @@ from matplotlib import pyplot as plt
from memory_profiler import memory_usage
from sklearn import set_config
from sklearn.neighbors import KDTree
from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
# Be gentle with eyes
......@@ -34,6 +35,8 @@ def benchmark(config, results_folder, bench_name):
one_GiB = 1e9
benchmarks = pd.DataFrame()
n_threads = _openmp_effective_n_threads()
env_specs_file = f"{results_folder}/{bench_name}.json"
# TODO: This is ugly, but I haven't found something better.
......@@ -47,6 +50,7 @@ def benchmark(config, results_folder, bench_name):
threadpool_info=threadpoolctl.threadpool_info(),
commit=commit,
config=config,
n_threads=n_threads,
)
set_config(assume_finite=True)
......@@ -73,6 +77,7 @@ def benchmark(config, results_folder, bench_name):
trial=trial,
func="init",
implementation="sklearn",
n_threads=n_threads,
leaf_size=leaf_size,
n_samples_train=ns_train,
n_samples_test=ns_test,
......@@ -95,6 +100,7 @@ def benchmark(config, results_folder, bench_name):
trial=trial,
func="init",
implementation="kdtree",
n_threads=n_threads,
leaf_size=leaf_size,
n_samples_train=ns_train,
n_samples_test=ns_test,
......@@ -176,63 +182,6 @@ def benchmark(config, results_folder, bench_name):
json.dump(env_specs, outfile)
def report(results_folder, bench_name):
df = pd.read_csv(glob.glob(f"{results_folder}/*.csv")[0])
with open(glob.glob(f"{results_folder}/*.json")[0], "r") as json_file:
env_specs = json.load(json_file)
cols = [
"n_samples_train",
"n_features",
"leaf_size",
]
df[cols] = df[cols].astype(np.uint32)
df['d'] = df.n_features.apply(str)
df['leaf'] = df.leaf_size.apply(str)
df_grouped = df.groupby(cols)
for i, (vals, df) in enumerate(df_grouped):
# 16:9 ratio
fig = plt.figure(figsize=(24, 13.5))
ax = plt.gca()
splot = sns.barplot(
y="leaf", x="throughput", hue="implementation", data=df, ax=ax
)
_ = ax.set_xlabel("Throughput (in GB/s)")
_ = ax.set_ylabel("Leaf Size")
_ = ax.tick_params(labelrotation=45)
# Adding the numerical values of "x" to bar
for p in splot.patches:
_ = splot.annotate(
f"{p.get_width():.4e}",
(p.get_width(), p.get_y() + p.get_height() / 2),
ha="center",
va="center",
size=10,
xytext=(0, -12),
textcoords="offset points",
)
title = (
f"KDTree@{env_specs['commit']} - "
f"Euclidean Distance, dtype=np.float64, {df.trial.max() + 1} trials - Bench. Name: {bench_name}\n"
)
title += (
"n_samples_train=%s - n_features=%s - leaf_size=%s" % vals
)
_ = fig.suptitle(title, fontsize=16)
plt.savefig(f"{results_folder}/{bench_name}_{i}.pdf", bbox_inches="tight")
# Unifying pdf files into one
pdf_files = sorted(glob.glob(f"{results_folder}/{bench_name}*.pdf"))
subprocess.check_output(
["pdfunite", *pdf_files, f"{results_folder}/{bench_name}.pdf"]
)
if __name__ == "__main__":
parser = argparse.ArgumentParser("benchmark")
......@@ -249,8 +198,4 @@ if __name__ == "__main__":
print(f"Benchmarking {bench_name}")
benchmark(config, results_folder, bench_name)
print(f"Benchmark results wrote in {results_folder}")
print(f"Reporting results for {bench_name}")
report(results_folder, bench_name)
print(f"Reporting results wrote in {results_folder}")
\ No newline at end of file
print(f"Benchmark results wrote in {results_folder}")
\ No newline at end of file
import os
import argparse
import numpy as np
import glob
import subprocess
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
if __name__ == "__main__":
parser = argparse.ArgumentParser("report")
parser.add_argument("commit")
args = parser.parse_args()
results_folder = os.path.abspath(os.path.join(__file__, os.pardir, "results"))
commit = args.commit
def n_threads(filename):
# Extracts '2742685_0_thread.csv'
basename = os.path.basename(filename)
return int(basename.split("_")[1])
csv_bench_results = sorted(glob.glob(f"results/{commit}*/*.csv"), key=n_threads)
if len(csv_bench_results) == 0:
raise RuntimeError(f"No results for commit {commit}")
commit_result_folder = f"{results_folder}/{commit}"
os.makedirs(commit_result_folder, exist_ok=True)
df = pd.concat(map(pd.read_csv, csv_bench_results))
df = df.drop(columns=["n_neighbors", "func"])
cols = [
"n_samples_train",
"n_samples_test",
"n_features",
"leaf_size",
]
# This creates a category used for grouping
df['t'] = df.n_threads.apply(str)
df_grouped = df.groupby(cols)
for i, (vals, df_g) in enumerate(df_grouped):
# 16:9 ratio
fig = plt.figure(figsize=(24, 13.5))
ax = plt.gca()
splot = sns.barplot(
y="t", x="throughput", hue="implementation", data=df_g, ax=ax
)
_ = ax.set_xlabel("Throughput (in GB/s)")
_ = ax.set_ylabel("Number of threads")
_ = ax.tick_params(labelrotation=45)
# Adding the numerical values of "x" to bar
for p in splot.patches:
_ = splot.annotate(
f"{p.get_width():.4e}",
(p.get_width(), p.get_y() + p.get_height() / 2),
ha="center",
va="center",
size=10,
xytext=(0, -12),
textcoords="offset points",
)
title = (
f"KDTree.__init__@{commit} - "
f"Euclidean Distance, dtype=np.float64, {df_g.trial.max() + 1} trials\n"
)
title += (
"n_samples_train=%s - n_samples_test=%s - "
"n_features=%s - leaf_size=%s"
% vals
)
_ = fig.suptitle(title, fontsize=16)
plt.savefig(f"{commit_result_folder}/{i}.pdf", bbox_inches="tight")
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment