Commit 0da9564c authored by Julien Jerphanion's avatar Julien Jerphanion

Base Experiments with cython+

parents
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
**/*.c
**/*.cpp
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
cdef cypclass Base:
int a
int b
__init__(self, int a, int b):
self.a = a
self.b = b
__init__(self, int a):
self.__init__(a, 0)
__init__(self):
self.__init__(0, 0)
cdef cypclass Derived(Base):
int c
__init__(self, int a, int b, int c):
self.c = c
cpdef cypclass AltDerived(Base):
__init__(self, int a, int b):
self.a = a + b
self.b = b - a
cpdef void test():
cdef Base base = Base(4, 2)
cdef Derived derived1 = Derived(4)
cdef Derived derived2 = Derived(4, 2)
cdef Derived derived3 = Derived(4, 2, 1)
cdef AltDerived alt_derived = AltDerived(4, 2)
print(base.a, base.b)
print(derived1.a, derived1.b, derived1.c)
print(derived2.a, derived2.b, derived2.c)
print(derived3.a, derived3.b, derived3.c)
print(alt_derived.a, alt_derived.b)
cdef cypclass Base:
int base
cdef cypclass Derived1(Base):
int derived1
void setBase(self, int arg):
self.base = arg
cdef cypclass Derived2(Base):
int derived2
void setBase(self, int arg):
self.base = arg
cdef cypclass Diamond(Derived1, Derived2):
int diamond
cdef void printD1(Derived1 d1) with gil:
print(d1.derived1, d1.base)
cdef void printD2(Derived2 d2) with gil:
print(d2.derived2, d2.base)
cpdef void test():
o = Diamond()
o.derived1 = 42
o.derived2 = 4242
Derived1.setBase(o, 4)
Derived2.setBase(o, 2)
printD1(o)
printD2(o)
cdef cypclass A:
int a
int getter(self):
return self.a * 6
int __int__(self):
return self.getter()
__init__(self, int a):
self.a = a - 1
cdef cypclass MyClass:
A cypobj
int number
__init__(self, int a = 0):
self.cypobj = A(a)
self.number = a
__init__(self, A obj):
self.cypobj = obj
self.number = obj.a + 1
int get_A_value(self):
if self.cypobj is not NULL:
return self.cypobj.getter()
else:
return 42
cdef int take_value(MyClass o) nogil:
value = o.get_A_value()
return value
def print_values():
method1_specified = MyClass(2)
method1_default = MyClass()
method2 = new MyClass()
print(take_value(method1_specified),
take_value(method1_default),
take_value(method2))
cdef cypclass Character:
int health
__init__(self, int health):
self.health = health
int update_health(self, int amount):
if -amount > health:
self.health = 0
else:
self.health += amount
return self.health
cdef cypclass Player(Character):
int score
__init__(self, int health):
self.health = health
self.score = 0
Player __iadd__(self, int bonus):
self.score += bonus
return self
void dump(self) with gil:
print("Player Character (health: %d, score: %d)" % (self.health,
self.score))
def main():
cdef Player p
with nogil:
p = Player(10)
p += 2
p.update_health(-1)
p.dump()
from setuptools import setup, Extension
from Cython.Build import build_ext
from glob import glob
# To compile, use
# python setup.py build --inplace
#
extensions = [
Extension(pyx_file.replace(".pyx", ""),
sources=[pyx_file],
language="c++",
)
for pyx_file in sorted(glob("*.pyx"))
]
setup(
name="c",
author="Julien Jerphanion",
author_email="git@jjerphan.xyz",
version='1',
cmdclass={'build_ext': build_ext},
ext_modules=extensions,
install_requires=[
'setuptools>=18.0',
'cython>=0.27.3',
'numpy'
],
classifiers=[
"Intended Audience :: Science/Research",
"Intended Audience :: Developers",
"License :: OSI Approved",
"Programming Language :: C",
"Programming Language :: Python",
"Topic :: Software Development",
"Topic :: Scientific/Engineering",
"Operating System :: POSIX",
"Operating System :: Unix",
"Operating System :: MacOS",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: " "Implementation :: CPython",
],
python_requires=">=3.6",
)
cdef cypclass Singleton
cdef int allocated = 0
cdef Singleton ptr
cdef cypclass Singleton:
Singleton __new__(alloc):
global allocated
global ptr
if not allocated:
allocated = 1
return ptr
cpdef void testSingleton():
o1 = Singleton()
o2 = Singleton()
if o1 is o2 and o1 is ptr:
print("👍")
cpdef cypclass SomeClass:
int a
double b
int __int__(self):
return self.a
double __double__(self):
return self.b
cdef cypclass SomeContainer:
SomeClass some_object
bint __bool__(self):
return self.some_object is not NULL
int __int__(self):
if self:
return <int> self.some_object
else:
return 0
double __double__(self):
if self:
return <double> self.some_object
else:
return 0.0
cpdef void test():
contained = SomeClass()
contained.a = 42
contained.b = 4.2
container = SomeContainer()
container.some_object = contained
print(<bint> container, <int> container, <double> container)
from cython cimport floating
cpdef enum BLAS_Order:
RowMajor # C contiguous
ColMajor # Fortran contiguous
cpdef enum BLAS_Trans:
NoTrans = 110 # correspond to 'n'
Trans = 116 # correspond to 't'
# BLAS Level 1 ################################################################
cdef floating _dot(int, floating*, int, floating*, int) nogil
cdef floating _asum(int, floating*, int) nogil
cdef void _axpy(int, floating, floating*, int, floating*, int) nogil
cdef floating _nrm2(int, floating*, int) nogil
cdef void _copy(int, floating*, int, floating*, int) nogil
cdef void _scal(int, floating, floating*, int) nogil
cdef void _rotg(floating*, floating*, floating*, floating*) nogil
cdef void _rot(int, floating*, int, floating*, int, floating, floating) nogil
# BLAS Level 2 ################################################################
cdef void _gemv(BLAS_Order, BLAS_Trans, int, int, floating, floating*, int,
floating*, int, floating, floating*, int) nogil
cdef void _ger(BLAS_Order, int, int, floating, floating*, int, floating*, int,
floating*, int) nogil
# BLASLevel 3 ################################################################
cdef void _gemm(BLAS_Order, BLAS_Trans, BLAS_Trans, int, int, int, floating,
floating*, int, floating*, int, floating, floating*,
int) nogil
# cython: language_level = 3
from cython cimport floating
from scipy.linalg.cython_blas cimport sdot, ddot
from scipy.linalg.cython_blas cimport sasum, dasum
from scipy.linalg.cython_blas cimport saxpy, daxpy
from scipy.linalg.cython_blas cimport snrm2, dnrm2
from scipy.linalg.cython_blas cimport scopy, dcopy
from scipy.linalg.cython_blas cimport sscal, dscal
from scipy.linalg.cython_blas cimport srotg, drotg
from scipy.linalg.cython_blas cimport srot, drot
from scipy.linalg.cython_blas cimport sgemv, dgemv
from scipy.linalg.cython_blas cimport sger, dger
from scipy.linalg.cython_blas cimport sgemm, dgemm
################
# BLAS Level 1 #
################
cdef floating _dot(int n, floating *x, int incx,
floating *y, int incy) nogil:
"""x.T.y"""
if floating is float:
return sdot(&n, x, &incx, y, &incy)
else:
return ddot(&n, x, &incx, y, &incy)
cpdef _dot_memview(floating[::1] x, floating[::1] y):
return _dot(x.shape[0], &x[0], 1, &y[0], 1)
cdef floating _asum(int n, floating *x, int incx) nogil:
"""sum(|x_i|)"""
if floating is float:
return sasum(&n, x, &incx)
else:
return dasum(&n, x, &incx)
cpdef _asum_memview(floating[::1] x):
return _asum(x.shape[0], &x[0], 1)
cdef void _axpy(int n, floating alpha, floating *x, int incx,
floating *y, int incy) nogil:
"""y := alpha * x + y"""
if floating is float:
saxpy(&n, &alpha, x, &incx, y, &incy)
else:
daxpy(&n, &alpha, x, &incx, y, &incy)
cpdef _axpy_memview(floating alpha, floating[::1] x, floating[::1] y):
_axpy(x.shape[0], alpha, &x[0], 1, &y[0], 1)
cdef floating _nrm2(int n, floating *x, int incx) nogil:
"""sqrt(sum((x_i)^2))"""
if floating is float:
return snrm2(&n, x, &incx)
else:
return dnrm2(&n, x, &incx)
cpdef _nrm2_memview(floating[::1] x):
return _nrm2(x.shape[0], &x[0], 1)
cdef void _copy(int n, floating *x, int incx, floating *y, int incy) nogil:
"""y := x"""
if floating is float:
scopy(&n, x, &incx, y, &incy)
else:
dcopy(&n, x, &incx, y, &incy)
cpdef _copy_memview(floating[::1] x, floating[::1] y):
_copy(x.shape[0], &x[0], 1, &y[0], 1)
cdef void _scal(int n, floating alpha, floating *x, int incx) nogil:
"""x := alpha * x"""
if floating is float:
sscal(&n, &alpha, x, &incx)
else:
dscal(&n, &alpha, x, &incx)
cpdef _scal_memview(floating alpha, floating[::1] x):
_scal(x.shape[0], alpha, &x[0], 1)
cdef void _rotg(floating *a, floating *b, floating *c, floating *s) nogil:
"""Generate plane rotation"""
if floating is float:
srotg(a, b, c, s)
else:
drotg(a, b, c, s)
cpdef _rotg_memview(floating a, floating b, floating c, floating s):
_rotg(&a, &b, &c, &s)
return a, b, c, s
cdef void _rot(int n, floating *x, int incx, floating *y, int incy,
floating c, floating s) nogil:
"""Apply plane rotation"""
if floating is float:
srot(&n, x, &incx, y, &incy, &c, &s)
else:
drot(&n, x, &incx, y, &incy, &c, &s)
cpdef _rot_memview(floating[::1] x, floating[::1] y, floating c, floating s):
_rot(x.shape[0], &x[0], 1, &y[0], 1, c, s)
################
# BLAS Level 2 #
################
cdef void _gemv(BLAS_Order order, BLAS_Trans ta, int m, int n, floating alpha,
floating *A, int lda, floating *x, int incx,
floating beta, floating *y, int incy) nogil:
"""y := alpha * op(A).x + beta * y"""
cdef char ta_ = ta
if order == RowMajor:
ta_ = NoTrans if ta == Trans else Trans
if floating is float:
sgemv(&ta_, &n, &m, &alpha, A, &lda, x, &incx, &beta, y, &incy)
else:
dgemv(&ta_, &n, &m, &alpha, A, &lda, x, &incx, &beta, y, &incy)
else:
if floating is float:
sgemv(&ta_, &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy)
else:
dgemv(&ta_, &m, &n, &alpha, A, &lda, x, &incx, &beta, y, &incy)
cpdef _gemv_memview(BLAS_Trans ta, floating alpha, floating[:, :] A,
floating[::1] x, floating beta, floating[::1] y):
cdef:
int m = A.shape[0]
int n = A.shape[1]
BLAS_Order order = ColMajor if A.strides[0] == A.itemsize else RowMajor
int lda = m if order == ColMajor else n
_gemv(order, ta, m, n, alpha, &A[0, 0], lda, &x[0], 1, beta, &y[0], 1)
cdef void _ger(BLAS_Order order, int m, int n, floating alpha, floating *x,
int incx, floating *y, int incy, floating *A, int lda) nogil:
"""A := alpha * x.y.T + A"""
if order == RowMajor:
if floating is float:
sger(&n, &m, &alpha, y, &incy, x, &incx, A, &lda)
else:
dger(&n, &m, &alpha, y, &incy, x, &incx, A, &lda)
else:
if floating is float:
sger(&m, &n, &alpha, x, &incx, y, &incy, A, &lda)
else:
dger(&m, &n, &alpha, x, &incx, y, &incy, A, &lda)
cpdef _ger_memview(floating alpha, floating[::1] x, floating[::] y,
floating[:, :] A):
cdef:
int m = A.shape[0]
int n = A.shape[1]
BLAS_Order order = ColMajor if A.strides[0] == A.itemsize else RowMajor
int lda = m if order == ColMajor else n
_ger(order, m, n, alpha, &x[0], 1, &y[0], 1, &A[0, 0], lda)
################
# BLAS Level 3 #
################
cdef void _gemm(BLAS_Order order, BLAS_Trans ta, BLAS_Trans tb, int m, int n,
int k, floating alpha, floating *A, int lda, floating *B,
int ldb, floating beta, floating *C, int ldc) nogil:
"""C := alpha * op(A).op(B) + beta * C"""
cdef:
char ta_ = ta
char tb_ = tb
if order == RowMajor:
if floating is float:
sgemm(&tb_, &ta_, &n, &m, &k, &alpha, B,
&ldb, A, &lda, &beta, C, &ldc)
else:
dgemm(&tb_, &ta_, &n, &m, &k, &alpha, B,
&ldb, A, &lda, &beta, C, &ldc)
else:
if floating is float:
sgemm(&ta_, &tb_, &m, &n, &k, &alpha, A,
&lda, B, &ldb, &beta, C, &ldc)
else:
dgemm(&ta_, &tb_, &m, &n, &k, &alpha, A,
&lda, B, &ldb, &beta, C, &ldc)
cpdef _gemm_memview(BLAS_Trans ta, BLAS_Trans tb, floating alpha,
floating[:, :] A, floating[:, :] B, floating beta,
floating[:, :] C):
cdef:
int m = A.shape[0] if ta == NoTrans else A.shape[1]
int n = B.shape[1] if tb == NoTrans else B.shape[0]
int k = A.shape[1] if ta == NoTrans else A.shape[0]
int lda, ldb, ldc
BLAS_Order order = ColMajor if A.strides[0] == A.itemsize else RowMajor
if order == RowMajor:
lda = k if ta == NoTrans else m
ldb = n if tb == NoTrans else k
ldc = n
else:
lda = m if ta == NoTrans else k
ldb = k if tb == NoTrans else n
ldc = m
_gemm(order, ta, tb, m, n, k, alpha, &A[0, 0],
lda, &B[0, 0], ldb, beta, &C[0, 0], ldc)
# cython: language_level=3
from cython cimport floating
cimport numpy as np
cdef floating _euclidean_dense_dense(floating*, floating*, int, bint) nogil
cdef floating _euclidean_sparse_dense(floating[::1], int[::1], floating[::1],
floating, bint) nogil
cpdef void _relocate_empty_clusters_dense(
np.ndarray[floating, ndim=2, mode='c'], floating[::1], floating[:, ::1],
floating[:, ::1], floating[::1], int[::1])
cpdef void _relocate_empty_clusters_sparse(
floating[::1], int[::1], int[::1], floating[::1], floating[:, ::1],
floating[:, ::1], floating[::1], int[::1])
cdef void _average_centers(floating[:, ::1], floating[::1])
cdef void _center_shift(floating[:, ::1], floating[:, ::1], floating[::1])
\ No newline at end of file
# cython: profile=True, boundscheck=False, wraparound=False, cdivision=True
# Profiling is enabled by default as the overhead does not seem to be
# measurable on this specific use case.
# Author: Peter Prettenhofer <peter.prettenhofer@gmail.com>
# Olivier Grisel <olivier.grisel@ensta.org>
# Lars Buitinck
#
# License: BSD 3 clause
# TODO: We still need to use ndarrays instead of typed memoryviews when using
# fused types and when the array may be read-only (for instance when it's
# provided by the user). This is fixed in cython > 0.3.
import numpy as np
cimport numpy as np
from cython cimport floating
from libc.math cimport sqrt
from scipy import sparse
import numpy as np
np.import_array()
ctypedef fused integral:
int
long long
ctypedef np.float64_t DOUBLE
ctypedef np.int32_t INT
# Number of samples per data chunk defined as a global constant.
CHUNK_SIZE = 256
def csr_row_norms(X):
"""L2 norm of each row in CSR matrix X."""
if X.dtype not in [np.float32, np.float64]:
X = X.astype(np.float64)
return _csr_row_norms(X.data, X.shape, X.indices, X.indptr)
def _csr_row_norms(np.ndarray[floating, ndim=1, mode="c"] X_data,
shape,
np.ndarray[integral, ndim=1, mode="c"] X_indices,
np.ndarray[integral, ndim=1, mode="c"] X_indptr):
cdef:
unsigned long long n_samples = shape[0]
unsigned long long i
integral j
double sum_
norms = np.empty(n_samples, dtype=X_data.dtype)
cdef floating[::1] norms_view = norms
for i in range(n_samples):
sum_ = 0.0
for j in range(X_indptr[i], X_indptr[i + 1]):
sum_ += X_data[j] * X_data[j]
norms_view[i] = sum_
return norms
def row_norms(X, squared=False):
"""Row-wise (squared) Euclidean norm of X.
Equivalent to np.sqrt((X * X).sum(axis=1)), but also supports sparse
matrices and does not create an X.shape-sized temporary.
Performs no input validation.
Parameters
----------
X : array-like
The input array.
squared : bool, default=False
If True, return squared norms.
Returns
-------
array-like
The row-wise (squared) Euclidean norm of X.
"""
if sparse.issparse(X):
if not isinstance(X, sparse.csr_matrix):
X = sparse.csr_matrix(X)
norms = csr_row_norms(X)
else:
norms = np.einsum('ij,ij->i', X, X)
if not squared:
np.sqrt(norms, norms)
return norms
cdef floating _euclidean_dense_dense(
floating* a, # IN
floating* b, # IN
int n_features,
bint squared) nogil:
"""Euclidean distance between a dense and b dense"""
cdef:
int i
int n = n_features // 4
int rem = n_features % 4
floating result = 0
# We manually unroll the loop for better cache optimization.
for i in range(n):
result += ((a[0] - b[0]) * (a[0] - b[0])
+(a[1] - b[1]) * (a[1] - b[1])
+(a[2] - b[2]) * (a[2] - b[2])
+(a[3] - b[3]) * (a[3] - b[3]))
a += 4; b += 4
for i in range(rem):
result += (a[i] - b[i]) * (a[i] - b[i])
return result if squared else sqrt(result)
def _euclidean_dense_dense_wrapper(floating[::1] a, floating[::1] b,
bint squared):
"""Wrapper of _euclidean_dense_dense for testing purpose"""
return _euclidean_dense_dense(&a[0], &b[0], a.shape[0], squared)
cdef floating _euclidean_sparse_dense(
floating[::1] a_data, # IN
int[::1] a_indices, # IN
floating[::1] b, # IN
floating b_squared_norm,
bint squared) nogil:
"""Euclidean distance between a sparse and b dense"""
cdef:
int nnz = a_indices.shape[0]
int i
floating tmp, bi
floating result = 0.0
for i in range(nnz):
bi = b[a_indices[i]]
tmp = a_data[i] - bi
result += tmp * tmp - bi * bi
result += b_squared_norm
if result < 0: result = 0.0
return result if squared else sqrt(result)
def _euclidean_sparse_dense_wrapper(
floating[::1] a_data,
int[::1] a_indices,
floating[::1] b,
floating b_squared_norm,
bint squared):
"""Wrapper of _euclidean_sparse_dense for testing purpose"""
return _euclidean_sparse_dense(
a_data, a_indices, b, b_squared_norm, squared)
cpdef floating _inertia_dense(
np.ndarray[floating, ndim=2, mode='c'] X, # IN
floating[::1] sample_weight, # IN
floating[:, ::1] centers, # IN
int[::1] labels): # IN
"""Compute inertia for dense input data
Sum of squared distance between each sample and its assigned center.
"""
cdef:
int n_samples = X.shape[0]
int n_features = X.shape[1]
int i, j
floating sq_dist = 0.0
floating inertia = 0.0
for i in range(n_samples):
j = labels[i]
sq_dist = _euclidean_dense_dense(&X[i, 0], &centers[j, 0],
n_features, True)
inertia += sq_dist * sample_weight[i]
return inertia
cpdef floating _inertia_sparse(
X, # IN
floating[::1] sample_weight, # IN
floating[:, ::1] centers, # IN
int[::1] labels): # IN
"""Compute inertia for sparse input data
Sum of squared distance between each sample and its assigned center.
"""
cdef:
floating[::1] X_data = X.data
int[::1] X_indices = X.indices
int[::1] X_indptr = X.indptr
int n_samples = X.shape[0]
int n_features = X.shape[1]
int i, j
floating sq_dist = 0.0
floating inertia = 0.0
floating[::1] centers_squared_norms = row_norms(centers, squared=True)
for i in range(n_samples):
j = labels[i]
sq_dist = _euclidean_sparse_dense(
X_data[X_indptr[i]: X_indptr[i + 1]],
X_indices[X_indptr[i]: X_indptr[i + 1]],
centers[j], centers_squared_norms[j], True)
inertia += sq_dist * sample_weight[i]
return inertia
cpdef void _relocate_empty_clusters_dense(
np.ndarray[floating, ndim=2, mode='c'] X, # IN
floating[::1] sample_weight, # IN
floating[:, ::1] centers_old, # IN
floating[:, ::1] centers_new, # INOUT
floating[::1] weight_in_clusters, # INOUT
int[::1] labels): # IN
"""Relocate centers which have no sample assigned to them."""
cdef:
int[::1] empty_clusters = np.where(np.equal(weight_in_clusters, 0))[0].astype(np.int32)
int n_empty = empty_clusters.shape[0]
if n_empty == 0:
return
cdef:
int n_features = X.shape[1]
floating[::1] distances = ((np.asarray(X) - np.asarray(centers_old)[labels])**2).sum(axis=1)
int[::1] far_from_centers = np.argpartition(distances, -n_empty)[:-n_empty-1:-1].astype(np.int32)
int new_cluster_id, old_cluster_id, far_idx, idx, k
floating weight
for idx in range(n_empty):
new_cluster_id = empty_clusters[idx]
far_idx = far_from_centers[idx]
weight = sample_weight[far_idx]
old_cluster_id = labels[far_idx]
for k in range(n_features):
centers_new[old_cluster_id, k] -= X[far_idx, k] * weight
centers_new[new_cluster_id, k] = X[far_idx, k] * weight
weight_in_clusters[new_cluster_id] = weight
weight_in_clusters[old_cluster_id] -= weight
cpdef void _relocate_empty_clusters_sparse(
floating[::1] X_data, # IN
int[::1] X_indices, # IN
int[::1] X_indptr, # IN
floating[::1] sample_weight, # IN
floating[:, ::1] centers_old, # IN
floating[:, ::1] centers_new, # INOUT
floating[::1] weight_in_clusters, # INOUT
int[::1] labels): # IN
"""Relocate centers which have no sample assigned to them."""
cdef:
int[::1] empty_clusters = np.where(np.equal(weight_in_clusters, 0))[0].astype(np.int32)
int n_empty = empty_clusters.shape[0]
if n_empty == 0:
return
cdef:
int n_samples = X_indptr.shape[0] - 1
int n_features = centers_old.shape[1]
floating x
int i, j, k
floating[::1] distances = np.zeros(n_samples, dtype=X_data.base.dtype)
floating[::1] centers_squared_norms = row_norms(centers_old, squared=True)
for i in range(n_samples):
j = labels[i]
distances[i] = _euclidean_sparse_dense(
X_data[X_indptr[i]: X_indptr[i + 1]],
X_indices[X_indptr[i]: X_indptr[i + 1]],
centers_old[j], centers_squared_norms[j], True)
cdef:
int[::1] far_from_centers = np.argpartition(distances, -n_empty)[:-n_empty-1:-1].astype(np.int32)
int new_cluster_id, old_cluster_id, far_idx, idx
floating weight
for idx in range(n_empty):
new_cluster_id = empty_clusters[idx]
far_idx = far_from_centers[idx]
weight = sample_weight[far_idx]
old_cluster_id = labels[far_idx]
for k in range(X_indptr[far_idx], X_indptr[far_idx + 1]):
centers_new[old_cluster_id, X_indices[k]] -= X_data[k] * weight
centers_new[new_cluster_id, X_indices[k]] = X_data[k] * weight
weight_in_clusters[new_cluster_id] = weight
weight_in_clusters[old_cluster_id] -= weight
cdef void _average_centers(
floating[:, ::1] centers, # INOUT
floating[::1] weight_in_clusters): # IN
"""Average new centers wrt weights."""
cdef:
int n_clusters = centers.shape[0]
int n_features = centers.shape[1]
int j, k
floating alpha
for j in range(n_clusters):
if weight_in_clusters[j] > 0:
alpha = 1.0 / weight_in_clusters[j]
for k in range(n_features):
centers[j, k] *= alpha
cdef void _center_shift(
floating[:, ::1] centers_old, # IN
floating[:, ::1] centers_new, # IN
floating[::1] center_shift): # OUT
"""Compute shift between old and new centers."""
cdef:
int n_clusters = centers_old.shape[0]
int n_features = centers_old.shape[1]
int j
for j in range(n_clusters):
center_shift[j] = _euclidean_dense_dense(
&centers_new[j, 0], &centers_old[j, 0], n_features, False)
def _mini_batch_update_csr(X, np.ndarray[floating, ndim=1] sample_weight,
np.ndarray[floating, ndim=1] x_squared_norms,
np.ndarray[floating, ndim=2] centers,
np.ndarray[floating, ndim=1] weight_sums,
np.ndarray[INT, ndim=1] nearest_center,
np.ndarray[floating, ndim=1] old_center,
int compute_squared_diff):
"""Incremental update of the centers for sparse MiniBatchKMeans.
Parameters
----------
X : CSR matrix, dtype float
The complete (pre allocated) training set as a CSR matrix.
centers : array, shape (n_clusters, n_features)
The cluster centers
counts : array, shape (n_clusters,)
The vector in which we keep track of the numbers of elements in a
cluster
Returns
-------
inertia : float
The inertia of the batch prior to centers update, i.e. the sum
of squared distances to the closest center for each sample. This
is the objective function being minimized by the k-means algorithm.
squared_diff : float
The sum of squared update (squared norm of the centers position
change). If compute_squared_diff is 0, this computation is skipped and
0.0 is returned instead.
Both squared diff and inertia are commonly used to monitor the convergence
of the algorithm.
"""
cdef:
np.ndarray[floating, ndim=1] X_data = X.data
np.ndarray[int, ndim=1] X_indices = X.indices
np.ndarray[int, ndim=1] X_indptr = X.indptr
unsigned int n_samples = X.shape[0]
unsigned int n_clusters = centers.shape[0]
unsigned int n_features = centers.shape[1]
unsigned int sample_idx, center_idx, feature_idx
unsigned int k
DOUBLE old_weight_sum, new_weight_sum
DOUBLE center_diff
DOUBLE squared_diff = 0.0
# move centers to the mean of both old and newly assigned samples
for center_idx in range(n_clusters):
old_weight_sum = weight_sums[center_idx]
new_weight_sum = old_weight_sum
# count the number of samples assigned to this center
for sample_idx in range(n_samples):
if nearest_center[sample_idx] == center_idx:
new_weight_sum += sample_weight[sample_idx]
if new_weight_sum == old_weight_sum:
# no new sample: leave this center as it stands
continue
# rescale the old center to reflect it previous accumulated weight
# with regards to the new data that will be incrementally contributed
if compute_squared_diff:
old_center[:] = centers[center_idx]
centers[center_idx] *= old_weight_sum
# iterate of over samples assigned to this cluster to move the center
# location by inplace summation
for sample_idx in range(n_samples):
if nearest_center[sample_idx] != center_idx:
continue
# inplace sum with new samples that are members of this cluster
# and update of the incremental squared difference update of the
# center position
for k in range(X_indptr[sample_idx], X_indptr[sample_idx + 1]):
centers[center_idx, X_indices[k]] += X_data[k]
# inplace rescale center with updated count
if new_weight_sum > old_weight_sum:
# update the count statistics for this center
weight_sums[center_idx] = new_weight_sum
# re-scale the updated center with the total new counts
centers[center_idx] /= new_weight_sum
# update the incremental computation of the squared total
# centers position change
if compute_squared_diff:
for feature_idx in range(n_features):
squared_diff += (old_center[feature_idx]
- centers[center_idx, feature_idx]) ** 2
return squared_diff
# cython: profile=True, boundscheck=False, wraparound=False, cdivision=True
#
# Licence: BSD 3 clause
# TODO: We still need to use ndarrays instead of typed memoryviews when using
# fused types and when the array may be read-only (for instance when it's
# provided by the user). This is fixed in cython > 0.3.
import numpy as np
cimport numpy as np
from cython cimport floating
from cython.parallel import prange, parallel
from libc.stdlib cimport malloc, calloc, free
from libc.string cimport memset
from libc.float cimport DBL_MAX, FLT_MAX
from ._cython_blas cimport _gemm
from ._cython_blas cimport RowMajor, Trans, NoTrans
from ._k_means_fast import CHUNK_SIZE, row_norms
from ._k_means_fast cimport _relocate_empty_clusters_dense
from ._k_means_fast cimport _relocate_empty_clusters_sparse
from ._k_means_fast cimport _average_centers, _center_shift
np.import_array()
def lloyd_iter_chunked_dense(
np.ndarray[floating, ndim=2, mode='c'] X, # IN
floating[::1] sample_weight, # IN
floating[::1] x_squared_norms, # IN
floating[:, ::1] centers_old, # IN
floating[:, ::1] centers_new, # OUT
floating[::1] weight_in_clusters, # OUT
int[::1] labels, # OUT
floating[::1] center_shift, # OUT
int n_threads,
bint update_centers=True):
"""Single iteration of K-means lloyd algorithm with dense input.
Update labels and centers (inplace), for one iteration, distributed
over data chunks.
Parameters
----------
X : ndarray of shape (n_samples, n_features), dtype=floating
The observations to cluster.
sample_weight : ndarray of shape (n_samples,), dtype=floating
The weights for each observation in X.
x_squared_norms : ndarray of shape (n_samples,), dtype=floating
Squared L2 norm of X.
centers_old : ndarray of shape (n_clusters, n_features), dtype=floating
Centers before previous iteration, placeholder for the centers after
previous iteration.
centers_new : ndarray of shape (n_clusters, n_features), dtype=floating
Centers after previous iteration, placeholder for the new centers
computed during this iteration.
centers_squared_norms : ndarray of shape (n_clusters,), dtype=floating
Squared L2 norm of the centers.
weight_in_clusters : ndarray of shape (n_clusters,), dtype=floating
Placeholder for the sums of the weights of every observation assigned
to each center.
labels : ndarray of shape (n_samples,), dtype=int
labels assignment.
center_shift : ndarray of shape (n_clusters,), dtype=floating
Distance between old and new centers.
n_threads : int
The number of threads to be used by openmp.
update_centers : bool
- If True, the labels and the new centers will be computed, i.e. runs
the E-step and the M-step of the algorithm.
- If False, only the labels will be computed, i.e runs the E-step of
the algorithm. This is useful especially when calling predict on a
fitted model.
"""
cdef:
int n_samples = X.shape[0]
int n_features = X.shape[1]
int n_clusters = centers_new.shape[0]
# hard-coded number of samples per chunk. Appeared to be close to
# optimal in all situations.
int n_samples_chunk = CHUNK_SIZE if n_samples > CHUNK_SIZE else n_samples
int n_chunks = n_samples // n_samples_chunk
int n_samples_rem = n_samples % n_samples_chunk
int chunk_idx, n_samples_chunk_eff
int start, end
int j, k
floating[::1] centers_squared_norms = row_norms(centers_old, squared=True)
floating *centers_new_chunk
floating *weight_in_clusters_chunk
floating *pairwise_distances_chunk
# count remainder chunk in total number of chunks
n_chunks += n_samples != n_chunks * n_samples_chunk
# number of threads should not be bigger than number of chunks
n_threads = min(n_threads, n_chunks)
if update_centers:
memset(&centers_new[0, 0], 0, n_clusters * n_features * sizeof(floating))
memset(&weight_in_clusters[0], 0, n_clusters * sizeof(floating))
with nogil, parallel(num_threads=n_threads):
# thread local buffers
centers_new_chunk = <floating*> calloc(n_clusters * n_features, sizeof(floating))
weight_in_clusters_chunk = <floating*> calloc(n_clusters, sizeof(floating))
pairwise_distances_chunk = <floating*> malloc(n_samples_chunk * n_clusters * sizeof(floating))
for chunk_idx in prange(n_chunks, schedule='static'):
start = chunk_idx * n_samples_chunk
if chunk_idx == n_chunks - 1 and n_samples_rem > 0:
end = start + n_samples_rem
else:
end = start + n_samples_chunk
_update_chunk_dense(
&X[start, 0],
sample_weight[start: end],
x_squared_norms[start: end],
centers_old,
centers_squared_norms,
labels[start: end],
centers_new_chunk,
weight_in_clusters_chunk,
pairwise_distances_chunk,
update_centers)
# reduction from local buffers. The gil is necessary for that to avoid
# race conditions.
if update_centers:
with gil:
for j in range(n_clusters):
weight_in_clusters[j] += weight_in_clusters_chunk[j]
for k in range(n_features):
centers_new[j, k] += centers_new_chunk[j * n_features + k]
free(centers_new_chunk)
free(weight_in_clusters_chunk)
free(pairwise_distances_chunk)
if update_centers:
_relocate_empty_clusters_dense(X, sample_weight, centers_old,
centers_new, weight_in_clusters, labels)
_average_centers(centers_new, weight_in_clusters)
_center_shift(centers_old, centers_new, center_shift)
cdef void _update_chunk_dense(
floating *X, # IN
# expecting C alinged 2D array. XXX: Can be
# replaced by const memoryview when cython min
# version is >= 0.3
floating[::1] sample_weight, # IN
floating[::1] x_squared_norms, # IN
floating[:, ::1] centers_old, # IN
floating[::1] centers_squared_norms, # IN
int[::1] labels, # OUT
floating *centers_new, # OUT
floating *weight_in_clusters, # OUT
floating *pairwise_distances, # OUT
bint update_centers) nogil:
"""K-means combined EM step for one dense data chunk.
Compute the partial contribution of a single data chunk to the labels and
centers.
"""
cdef:
int n_samples = labels.shape[0]
int n_clusters = centers_old.shape[0]
int n_features = centers_old.shape[1]
floating sq_dist, min_sq_dist
int i, j, k, label
# Instead of computing the full pairwise squared distances matrix,
# ||X - C||² = ||X||² - 2 X.C^T + ||C||², we only need to store
# the - 2 X.C^T + ||C||² term since the argmin for a given sample only
# depends on the centers.
# pairwise_distances = ||C||²
for i in range(n_samples):
for j in range(n_clusters):
pairwise_distances[i * n_clusters + j] = centers_squared_norms[j]
# pairwise_distances += -2 * X.dot(C.T)
_gemm(RowMajor, NoTrans, Trans, n_samples, n_clusters, n_features,
-2.0, X, n_features, &centers_old[0, 0], n_features,
1.0, pairwise_distances, n_clusters)
for i in range(n_samples):
min_sq_dist = pairwise_distances[i * n_clusters]
label = 0
for j in range(1, n_clusters):
sq_dist = pairwise_distances[i * n_clusters + j]
if sq_dist < min_sq_dist:
min_sq_dist = sq_dist
label = j
labels[i] = label
if update_centers:
weight_in_clusters[label] += sample_weight[i]
for k in range(n_features):
centers_new[label * n_features + k] += X[i * n_features + k] * sample_weight[i]
def lloyd_iter_chunked_sparse(
X, # IN
floating[::1] sample_weight, # IN
floating[::1] x_squared_norms, # IN
floating[:, ::1] centers_old, # IN
floating[:, ::1] centers_new, # OUT
floating[::1] weight_in_clusters, # OUT
int[::1] labels, # OUT
floating[::1] center_shift, # OUT
int n_threads,
bint update_centers=True):
"""Single iteration of K-means lloyd algorithm with sparse input.
Update labels and centers (inplace), for one iteration, distributed
over data chunks.
Parameters
----------
X : sparse matrix of shape (n_samples, n_features), dtype=floating
The observations to cluster. Must be in CSR format.
sample_weight : ndarray of shape (n_samples,), dtype=floating
The weights for each observation in X.
x_squared_norms : ndarray of shape (n_samples,), dtype=floating
Squared L2 norm of X.
centers_old : ndarray of shape (n_clusters, n_features), dtype=floating
Centers before previous iteration, placeholder for the centers after
previous iteration.
centers_new : ndarray of shape (n_clusters, n_features), dtype=floating
Centers after previous iteration, placeholder for the new centers
computed during this iteration.
centers_squared_norms : ndarray of shape (n_clusters,), dtype=floating
Squared L2 norm of the centers.
weight_in_clusters : ndarray of shape (n_clusters,), dtype=floating
Placeholder for the sums of the weights of every observation assigned
to each center.
labels : ndarray of shape (n_samples,), dtype=int
labels assignment.
center_shift : ndarray of shape (n_clusters,), dtype=floating
Distance between old and new centers.
n_threads : int
The number of threads to be used by openmp.
update_centers : bool
- If True, the labels and the new centers will be computed, i.e. runs
the E-step and the M-step of the algorithm.
- If False, only the labels will be computed, i.e runs the E-step of
the algorithm. This is useful especially when calling predict on a
fitted model.
"""
# print(X.indices.dtype)
cdef:
int n_samples = X.shape[0]
int n_features = X.shape[1]
int n_clusters = centers_new.shape[0]
# Chosed same as for dense. Does not have the same impact since with
# sparse data the pairwise distances matrix is not precomputed.
# However, splitting in chunks is necessary to get parallelism.
int n_samples_chunk = CHUNK_SIZE if n_samples > CHUNK_SIZE else n_samples
int n_chunks = n_samples // n_samples_chunk
int n_samples_rem = n_samples % n_samples_chunk
int chunk_idx, n_samples_chunk_eff = 0
int start = 0, end = 0
int j, k
floating[::1] X_data = X.data
int[::1] X_indices = X.indices
int[::1] X_indptr = X.indptr
floating[::1] centers_squared_norms = row_norms(centers_old, squared=True)
floating *centers_new_chunk
floating *weight_in_clusters_chunk
# count remainder chunk in total number of chunks
n_chunks += n_samples != n_chunks * n_samples_chunk
# number of threads should not be bigger than number of chunks
n_threads = min(n_threads, n_chunks)
if update_centers:
memset(&centers_new[0, 0], 0, n_clusters * n_features * sizeof(floating))
memset(&weight_in_clusters[0], 0, n_clusters * sizeof(floating))
with nogil, parallel(num_threads=n_threads):
# thread local buffers
centers_new_chunk = <floating*> calloc(n_clusters * n_features, sizeof(floating))
weight_in_clusters_chunk = <floating*> calloc(n_clusters, sizeof(floating))
for chunk_idx in prange(n_chunks, schedule='static'):
start = chunk_idx * n_samples_chunk
if chunk_idx == n_chunks - 1 and n_samples_rem > 0:
end = start + n_samples_rem
else:
end = start + n_samples_chunk
_update_chunk_sparse(
X_data[X_indptr[start]: X_indptr[end]],
X_indices[X_indptr[start]: X_indptr[end]],
X_indptr[start: end],
sample_weight[start: end],
x_squared_norms[start: end],
centers_old,
centers_squared_norms,
labels[start: end],
centers_new_chunk,
weight_in_clusters_chunk,
update_centers)
# reduction from local buffers. The gil is necessary for that to avoid
# race conditions.
if update_centers:
with gil:
for j in range(n_clusters):
weight_in_clusters[j] += weight_in_clusters_chunk[j]
for k in range(n_features):
centers_new[j, k] += centers_new_chunk[j * n_features + k]
free(centers_new_chunk)
free(weight_in_clusters_chunk)
if update_centers:
_relocate_empty_clusters_sparse(
X_data, X_indices, X_indptr, sample_weight,
centers_old, centers_new, weight_in_clusters, labels)
_average_centers(centers_new, weight_in_clusters)
_center_shift(centers_old, centers_new, center_shift)
cdef void _update_chunk_sparse(
floating[::1] X_data, # IN
int[::1] X_indices, # IN
int[::1] X_indptr, # IN
floating[::1] sample_weight, # IN
floating[::1] x_squared_norms, # IN
floating[:, ::1] centers_old, # IN
floating[::1] centers_squared_norms, # IN
int[::1] labels, # OUT
floating *centers_new, # OUT
floating *weight_in_clusters, # OUT
bint update_centers) nogil:
"""K-means combined EM step for one sparse data chunk.
Compute the partial contribution of a single data chunk to the labels and
centers.
"""
cdef:
int n_samples = labels.shape[0]
int n_clusters = centers_old.shape[0]
int n_features = centers_old.shape[1]
floating sq_dist, min_sq_dist
int i, j, k, label
floating max_floating = FLT_MAX if floating is float else DBL_MAX
int s = X_indptr[0]
# XXX Precompute the pairwise distances matrix is not worth for sparse
# currently. Should be tested when BLAS (sparse x dense) matrix
# multiplication is available.
for i in range(n_samples):
min_sq_dist = max_floating
label = 0
for j in range(n_clusters):
sq_dist = 0.0
for k in range(X_indptr[i] - s, X_indptr[i + 1] - s):
sq_dist += centers_old[j, X_indices[k]] * X_data[k]
# Instead of computing the full squared distance with each cluster,
# ||X - C||² = ||X||² - 2 X.C^T + ||C||², we only need to compute
# the - 2 X.C^T + ||C||² term since the argmin for a given sample
# only depends on the centers C.
sq_dist = centers_squared_norms[j] -2 * sq_dist
if sq_dist < min_sq_dist:
min_sq_dist = sq_dist
label = j
labels[i] = label
if update_centers:
weight_in_clusters[label] += sample_weight[i]
for k in range(X_indptr[i] - s, X_indptr[i + 1] - s):
centers_new[label * n_features + X_indices[k]] += X_data[k] * sample_weight[i]
# Cython compile instructions
import numpy
from setuptools import setup, Extension
from Cython.Build import build_ext
# To compile, use
# python setup.py build --inplace
#
extensions = [
Extension("kmeans",
sources=["_k_means_fast.pyx"],
include_dirs=[numpy.get_include()],
)
]
setup(
name="kmeans",
cmdclass={'build_ext': build_ext},
ext_modules=extensions,
install_requires=[
'setuptools>=18.0',
'cython>=0.27.3',
'numpy'
],
)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment