Commit 6ba0d820 authored by Julien Jerphanion's avatar Julien Jerphanion

Adapt setup for benchmark

parent 6bd854ca
all: all: mrproper build bench
python setup.py build_ext --inplace
# Remove benchmark fixtures
clean:
-rm -Rf bench_res
-rm -f *.json
-rm -r *.html
-rm perf.data
-rm perf.data.old
mrproper: # Remove all but sources
mrproper: clean
rm -Rf build
rm -f *.cpp rm -f *.cpp
rm -f *.c rm -f *.c
rm -f *.so rm -f *.so
rm -Rf build
build:
python setup.py build_ext --inplace
# Create a full tracing html report via Viztracer
# for reference, see: https://www.maartenbreddels.com/perf/jupyter/python/tracing/gil/2021/01/14/Tracing-the-Python-GIL.html
bench: clean
@ mkdir -p bench_res
@ perf record -e sched:sched_switch -e sched:sched_process_fork -e 'sched:sched_wak*' \
-k CLOCK_MONOTONIC \
--call-graph dwarf -- viztracer -o bench_res/bench.json --ignore_frozen kmeans.py
@ perf script --no-inline | per4m perf2trace sched -o bench_res/perf.json
@ viztracer --combine bench_res/perf.json bench_res/bench.json -o bench.html
@ open bench.html
# cython: profile=True, boundscheck=False, wraparound=False, cdivision=True # cython: profile=True
# # cython: boundscheck=False
# cython: wraparound=False
# cython: cdivision=True
# cython: language_level = 3
# cython: linetrace=True
# distutils: define_macros=CYTHON_TRACE_NOGIL=1
# Licence: BSD 3 clause # Licence: BSD 3 clause
# TODO: We still need to use ndarrays instead of typed memoryviews when using # TODO: We still need to use ndarrays instead of typed memoryviews when using
......
import time
from viztracer import VizTracer
import numpy as np import numpy as np
from threadpoolctl import threadpool_limits from threadpoolctl import threadpool_limits
from sklearn.datasets import make_classification
from kmeans._kmeans import lloyd_iter_chunked_dense, _inertia_dense from _kmeans import lloyd_iter_chunked_dense, _inertia_dense
def _kmeans_single_lloyd(X, sample_weight, centers_init, max_iter=300, def _kmeans_single_lloyd(X, centers_init, sample_weight=None, max_iter=300,
verbose=False, x_squared_norms=None, tol=1e-4, verbose=False, x_squared_norms=None, tol=1e-4,
n_threads=1): n_threads=1):
"""A single run of k-means lloyd, assumes preparation completed prior. """A single run of k-means lloyd, assumes preparation completed prior.
...@@ -58,8 +63,6 @@ def _kmeans_single_lloyd(X, sample_weight, centers_init, max_iter=300, ...@@ -58,8 +63,6 @@ def _kmeans_single_lloyd(X, sample_weight, centers_init, max_iter=300,
# NOTE: We only rely on dense array for those analysis # NOTE: We only rely on dense array for those analysis
if sample_weight is None: if sample_weight is None:
sample_weight = np.ones((X.shape[0],), dtype=X.dtype) sample_weight = np.ones((X.shape[0],), dtype=X.dtype)
lloyd_iter = lloyd_iter_chunked_dense
_inertia = _inertia_dense
strict_convergence = False strict_convergence = False
...@@ -67,11 +70,12 @@ def _kmeans_single_lloyd(X, sample_weight, centers_init, max_iter=300, ...@@ -67,11 +70,12 @@ def _kmeans_single_lloyd(X, sample_weight, centers_init, max_iter=300,
# nested parallelism (i.e. BLAS) to avoid oversubsciption. # nested parallelism (i.e. BLAS) to avoid oversubsciption.
with threadpool_limits(limits=1, user_api="blas"): with threadpool_limits(limits=1, user_api="blas"):
for i in range(max_iter): for i in range(max_iter):
lloyd_iter(X, sample_weight, x_squared_norms, centers, centers_new, lloyd_iter_chunked_dense(X, sample_weight, x_squared_norms, centers,
weight_in_clusters, labels, center_shift, n_threads) centers_new, weight_in_clusters, labels,
center_shift, n_threads)
if verbose: if verbose:
inertia = _inertia(X, sample_weight, centers, labels) inertia = _inertia_dense(X, sample_weight, centers, labels)
print(f"Iteration {i}, inertia {inertia}.") print(f"Iteration {i}, inertia {inertia}.")
centers, centers_new = centers_new, centers centers, centers_new = centers_new, centers
...@@ -95,10 +99,28 @@ def _kmeans_single_lloyd(X, sample_weight, centers_init, max_iter=300, ...@@ -95,10 +99,28 @@ def _kmeans_single_lloyd(X, sample_weight, centers_init, max_iter=300,
if not strict_convergence: if not strict_convergence:
# rerun E-step so that predicted labels match cluster centers # rerun E-step so that predicted labels match cluster centers
lloyd_iter(X, sample_weight, x_squared_norms, centers, centers, lloyd_iter_chunked_dense(X, sample_weight, x_squared_norms,
weight_in_clusters, labels, center_shift, n_threads, centers, centers, weight_in_clusters,
labels, center_shift, n_threads,
update_centers=False) update_centers=False)
inertia = _inertia(X, sample_weight, centers, labels) inertia = _inertia_dense(X, sample_weight, centers, labels)
return labels, inertia, centers, i + 1 return labels, inertia, centers, i + 1
if __name__ == "__main__":
np.random.seed(1337)
n_classes = 2
X, y = make_classification(n_samples=1000,
n_classes=n_classes,
n_clusters_per_class=1,
n_informative=10)
centers_init = X[:n_classes]
start = time.time()
labels, inertia, centers, _ = _kmeans_single_lloyd(X,
centers_init,
max_iter=100000,
n_threads=100)
print(time.time() - start)
\ No newline at end of file
...@@ -11,7 +11,10 @@ extensions = [ ...@@ -11,7 +11,10 @@ extensions = [
Extension("_kmeans", Extension("_kmeans",
sources=["_kmeans.pyx"], sources=["_kmeans.pyx"],
include_dirs=[numpy.get_include()], include_dirs=[numpy.get_include()],
define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], define_macros=[("NPY_NO_DEPRECATED_API",
"NPY_1_7_API_VERSION"),
# ("CYTHON_TRACE_NOGIL", "1")
],
) )
] ]
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment