Commit 9b840986 authored by Julien Jerphanion's avatar Julien Jerphanion

No use of Cython+ KMeans

parent 5967987f
all: mrproper build bench
# Remove benchmark fixtures
clean:
-rm -Rf bench_res
-rm -f *.json
-rm -r *.html
-rm perf.data
-rm perf.data.old
# Remove all but sources
mrproper: clean
rm -Rf build
rm -f *.cpp
rm -f *.c
rm -f *.so
build:
python setup.py build_ext --inplace
# Create a full tracing html report via Viztracer
# for reference, see: https://www.maartenbreddels.com/perf/jupyter/python/tracing/gil/2021/01/14/Tracing-the-Python-GIL.html
bench: clean
@ mkdir -p bench_res
@ perf record -e sched:sched_switch -e sched:sched_process_fork -e 'sched:sched_wak*' \
-k CLOCK_MONOTONIC \
--call-graph dwarf -- viztracer -o bench_res/bench.json --ignore_frozen kmeans.py
@ perf script --no-inline | per4m perf2trace sched -o bench_res/perf.json
@ viztracer --combine bench_res/perf.json bench_res/bench.json -o bench.html
@ open bench.html
This diff is collapsed.
import time
from viztracer import VizTracer
import numpy as np
from threadpoolctl import threadpool_limits
from sklearn.datasets import make_classification
from _kmeans import lloyd_iter_chunked_dense, _inertia_dense
def _kmeans_single_lloyd(X, centers_init, sample_weight=None, max_iter=300,
verbose=False, x_squared_norms=None, tol=1e-4,
n_threads=1):
"""A single run of k-means lloyd, assumes preparation completed prior.
Parameters
----------
X : {ndarray, sparse matrix} of shape (n_samples, n_features)
The observations to cluster. If sparse matrix, must be in CSR format.
sample_weight : ndarray of shape (n_samples,)
The weights for each observation in X.
centers_init : ndarray of shape (n_clusters, n_features)
The initial centers.
max_iter : int, default=300
Maximum number of iterations of the k-means algorithm to run.
verbose : bool, default=False
Verbosity mode
x_squared_norms : ndarray of shape (n_samples,), default=None
Precomputed x_squared_norms.
tol : float, default=1e-4
Relative tolerance with regards to Frobenius norm of the difference
in the cluster centers of two consecutive iterations to declare
convergence.
It's not advised to set `tol=0` since convergence might never be
declared due to rounding errors. Use a very small number instead.
n_threads : int, default=1
The number of OpenMP threads to use for the computation. Parallelism is
sample-wise on the main cython loop which assigns each sample to its
closest center.
Returns
-------
centroid : ndarray of shape (n_clusters, n_features)
Centroids found at the last iteration of k-means.
label : ndarray of shape (n_samples,)
label[i] is the code or index of the centroid the
i'th observation is closest to.
inertia : float
The final value of the inertia criterion (sum of squared distances to
the closest centroid for all observations in the training set).
n_iter : int
Number of iterations run.
"""
n_clusters = centers_init.shape[0]
# Buffers to avoid new allocations at each iteration.
centers = centers_init
centers_new = np.zeros_like(centers)
labels = np.full(X.shape[0], -1, dtype=np.int32)
labels_old = labels.copy()
weight_in_clusters = np.zeros(n_clusters, dtype=X.dtype)
center_shift = np.zeros(n_clusters, dtype=X.dtype)
# NOTE: We only rely on dense array for those analysis
if sample_weight is None:
sample_weight = np.ones((X.shape[0],), dtype=X.dtype)
strict_convergence = False
# Threadpoolctl context to limit the number of threads in second level of
# nested parallelism (i.e. BLAS) to avoid oversubsciption.
with threadpool_limits(limits=1, user_api="blas"):
for i in range(max_iter):
lloyd_iter_chunked_dense(X, sample_weight, x_squared_norms, centers,
centers_new, weight_in_clusters, labels,
center_shift, n_threads)
if verbose:
inertia = _inertia_dense(X, sample_weight, centers, labels)
print(f"Iteration {i}, inertia {inertia}.")
centers, centers_new = centers_new, centers
if np.array_equal(labels, labels_old):
# First check the labels for strict convergence.
if verbose:
print(f"Converged at iteration {i}: strict convergence.")
strict_convergence = True
break
else:
# No strict convergence, check for tol based convergence.
center_shift_tot = (center_shift**2).sum()
if center_shift_tot <= tol:
if verbose:
print(f"Converged at iteration {i}: center shift "
f"{center_shift_tot} within tolerance {tol}.")
break
labels_old[:] = labels
if not strict_convergence:
# rerun E-step so that predicted labels match cluster centers
lloyd_iter_chunked_dense(X, sample_weight, x_squared_norms,
centers, centers, weight_in_clusters,
labels, center_shift, n_threads,
update_centers=False)
inertia = _inertia_dense(X, sample_weight, centers, labels)
return labels, inertia, centers, i + 1
if __name__ == "__main__":
np.random.seed(1337)
n_classes = 2
X, y = make_classification(n_samples=1000,
n_classes=n_classes,
n_clusters_per_class=1,
n_informative=10)
centers_init = X[:n_classes]
start = time.time()
labels, inertia, centers, _ = _kmeans_single_lloyd(X,
centers_init,
max_iter=100000,
n_threads=100)
print(time.time() - start)
\ No newline at end of file
# Cython compile instructions
import numpy
from setuptools import setup, Extension
from Cython.Build import build_ext
# To compile, use
# python setup.py build --inplace
#
extensions = [
Extension("_kmeans",
sources=["_kmeans.pyx"],
include_dirs=[numpy.get_include()],
define_macros=[("NPY_NO_DEPRECATED_API",
"NPY_1_7_API_VERSION"),
# ("CYTHON_TRACE_NOGIL", "1")
],
)
]
setup(
name="kmeans",
cmdclass={'build_ext': build_ext},
ext_modules=extensions,
install_requires=[
'setuptools>=18.0',
'cython>=0.27.3',
'numpy'
],
)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment