No use of Cython+ KMeans

9b840986 · Julien Jerphanion · 5967987f · 5967987f · 5967987f · 5967987f
Commit 9b840986 authored Apr 15, 2021 by Julien Jerphanion
5 changed files
--- a/kmeans/Makefile
+++ b/kmeans/Makefile
-all: mrproper build bench
-
-# Remove benchmark fixtures
-clean:
-	-rm -Rf bench_res
-	-rm -f *.json
-	-rm -r *.html
-	-rm perf.data
-	-rm perf.data.old
-
-# Remove all but sources
-mrproper: clean
-	rm -Rf build
-	rm -f *.cpp
-	rm -f *.c
-	rm -f *.so
-
-build:
-	python setup.py build_ext --inplace
-
-# Create a full tracing html report via Viztracer
-# for reference, see: https://www.maartenbreddels.com/perf/jupyter/python/tracing/gil/2021/01/14/Tracing-the-Python-GIL.html
-bench: clean
-	@ mkdir -p bench_res
-	@ perf record -e sched:sched_switch -e sched:sched_process_fork -e 'sched:sched_wak*' \
-	  -k CLOCK_MONOTONIC \
-	  --call-graph dwarf -- viztracer -o bench_res/bench.json --ignore_frozen kmeans.py
-	@ perf script --no-inline | per4m perf2trace sched -o bench_res/perf.json
-	@ viztracer --combine bench_res/perf.json bench_res/bench.json -o bench.html
-	@ open bench.html
--- a/kmeans/__init__.py
+++ b/kmeans/__init__.py
--- a/kmeans/_kmeans.pyx
+++ b/kmeans/_kmeans.pyx
--- a/kmeans/kmeans.py
+++ b/kmeans/kmeans.py
-import time
-
-from viztracer import VizTracer
-
-import numpy as np
-from threadpoolctl import threadpool_limits
-from sklearn.datasets import make_classification
-
-from _kmeans import lloyd_iter_chunked_dense, _inertia_dense
-
-
-def _kmeans_single_lloyd(X, centers_init, sample_weight=None, max_iter=300,
-                         verbose=False, x_squared_norms=None, tol=1e-4,
-                         n_threads=1):
-    """A single run of k-means lloyd, assumes preparation completed prior.
-    Parameters
-    ----------
-    X : {ndarray, sparse matrix} of shape (n_samples, n_features)
-        The observations to cluster. If sparse matrix, must be in CSR format.
-    sample_weight : ndarray of shape (n_samples,)
-        The weights for each observation in X.
-    centers_init : ndarray of shape (n_clusters, n_features)
-        The initial centers.
-    max_iter : int, default=300
-        Maximum number of iterations of the k-means algorithm to run.
-    verbose : bool, default=False
-        Verbosity mode
-    x_squared_norms : ndarray of shape (n_samples,), default=None
-        Precomputed x_squared_norms.
-    tol : float, default=1e-4
-        Relative tolerance with regards to Frobenius norm of the difference
-        in the cluster centers of two consecutive iterations to declare
-        convergence.
-        It's not advised to set `tol=0` since convergence might never be
-        declared due to rounding errors. Use a very small number instead.
-    n_threads : int, default=1
-        The number of OpenMP threads to use for the computation. Parallelism is
-        sample-wise on the main cython loop which assigns each sample to its
-        closest center.
-    Returns
-    -------
-    centroid : ndarray of shape (n_clusters, n_features)
-        Centroids found at the last iteration of k-means.
-    label : ndarray of shape (n_samples,)
-        label[i] is the code or index of the centroid the
-        i'th observation is closest to.
-    inertia : float
-        The final value of the inertia criterion (sum of squared distances to
-        the closest centroid for all observations in the training set).
-    n_iter : int
-        Number of iterations run.
-    """
-    n_clusters = centers_init.shape[0]
-
-    # Buffers to avoid new allocations at each iteration.
-    centers = centers_init
-    centers_new = np.zeros_like(centers)
-    labels = np.full(X.shape[0], -1, dtype=np.int32)
-    labels_old = labels.copy()
-    weight_in_clusters = np.zeros(n_clusters, dtype=X.dtype)
-    center_shift = np.zeros(n_clusters, dtype=X.dtype)
-
-    # NOTE: We only rely on dense array for those analysis
-    if sample_weight is None:
-        sample_weight = np.ones((X.shape[0],), dtype=X.dtype)
-
-    strict_convergence = False
-
-    # Threadpoolctl context to limit the number of threads in second level of
-    # nested parallelism (i.e. BLAS) to avoid oversubsciption.
-    with threadpool_limits(limits=1, user_api="blas"):
-        for i in range(max_iter):
-            lloyd_iter_chunked_dense(X, sample_weight, x_squared_norms, centers,
-                                     centers_new, weight_in_clusters, labels,
-                                     center_shift, n_threads)
-
-            if verbose:
-                inertia = _inertia_dense(X, sample_weight, centers, labels)
-                print(f"Iteration {i}, inertia {inertia}.")
-
-            centers, centers_new = centers_new, centers
-
-            if np.array_equal(labels, labels_old):
-                # First check the labels for strict convergence.
-                if verbose:
-                    print(f"Converged at iteration {i}: strict convergence.")
-                strict_convergence = True
-                break
-            else:
-                # No strict convergence, check for tol based convergence.
-                center_shift_tot = (center_shift**2).sum()
-                if center_shift_tot <= tol:
-                    if verbose:
-                        print(f"Converged at iteration {i}: center shift "
-                              f"{center_shift_tot} within tolerance {tol}.")
-                    break
-
-            labels_old[:] = labels
-
-        if not strict_convergence:
-            # rerun E-step so that predicted labels match cluster centers
-            lloyd_iter_chunked_dense(X, sample_weight, x_squared_norms,
-                                     centers, centers, weight_in_clusters,
-                                     labels, center_shift, n_threads,
-                                     update_centers=False)
-
-    inertia = _inertia_dense(X, sample_weight, centers, labels)
-
-    return labels, inertia, centers, i + 1
-
-
-if __name__ == "__main__":
-    np.random.seed(1337)
-    n_classes = 2
-    X, y = make_classification(n_samples=1000,
-                               n_classes=n_classes,
-                               n_clusters_per_class=1,
-                               n_informative=10)
-    centers_init = X[:n_classes]
-
-    start = time.time()
-    labels, inertia, centers, _ = _kmeans_single_lloyd(X,
-                                                       centers_init,
-                                                       max_iter=100000,
-                                                       n_threads=100)
-    print(time.time() - start)
\ No newline at end of file
--- a/kmeans/setup.py
+++ b/kmeans/setup.py
-# Cython compile instructions
-import numpy
-from setuptools import setup, Extension
-from Cython.Build import build_ext
-
-# To compile, use
-# python setup.py build --inplace
-#
-
-extensions = [
-    Extension("_kmeans",
-              sources=["_kmeans.pyx"],
-              include_dirs=[numpy.get_include()],
-              define_macros=[("NPY_NO_DEPRECATED_API",
-                              "NPY_1_7_API_VERSION"),
-                             # ("CYTHON_TRACE_NOGIL", "1")
-                             ],
-              )
-]
-
-setup(
-    name="kmeans",
-    cmdclass={'build_ext': build_ext},
-    ext_modules=extensions,
-    install_requires=[
-        'setuptools>=18.0',
-        'cython>=0.27.3',
-        'numpy'
-    ],
-)