Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython_plus_experiments
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Julien Jerphanion
cython_plus_experiments
Commits
6ba0d820
Commit
6ba0d820
authored
Apr 07, 2021
by
Julien Jerphanion
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Adapt setup for benchmark
parent
6bd854ca
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
71 additions
and
19 deletions
+71
-19
kmeans/Makefile
kmeans/Makefile
+26
-4
kmeans/_kmeans.pyx
kmeans/_kmeans.pyx
+7
-2
kmeans/kmeans.py
kmeans/kmeans.py
+34
-12
kmeans/setup.py
kmeans/setup.py
+4
-1
No files found.
kmeans/Makefile
View file @
6ba0d820
all
:
all
:
mrproper build bench
python setup.py build_ext
--inplace
# Remove benchmark fixtures
clean
:
-
rm
-Rf
bench_res
-
rm
-f
*
.json
-
rm
-r
*
.html
-
rm
perf.data
-
rm
perf.data.old
mrproper
:
# Remove all but sources
mrproper
:
clean
rm
-Rf
build
rm
-f
*
.cpp
rm
-f
*
.cpp
rm
-f
*
.c
rm
-f
*
.c
rm
-f
*
.so
rm
-f
*
.so
rm
-Rf
build
build
:
python setup.py build_ext
--inplace
# Create a full tracing html report via Viztracer
# for reference, see: https://www.maartenbreddels.com/perf/jupyter/python/tracing/gil/2021/01/14/Tracing-the-Python-GIL.html
bench
:
clean
@
mkdir
-p
bench_res
@
perf record
-e
sched:sched_switch
-e
sched:sched_process_fork
-e
'sched:sched_wak*'
\
-k
CLOCK_MONOTONIC
\
--call-graph
dwarf
--
viztracer
-o
bench_res/bench.json
--ignore_frozen
kmeans.py
@
perf script
--no-inline
| per4m perf2trace sched
-o
bench_res/perf.json
@
viztracer
--combine
bench_res/perf.json bench_res/bench.json
-o
bench.html
@
open bench.html
kmeans/_kmeans.pyx
View file @
6ba0d820
# cython: profile=True, boundscheck=False, wraparound=False, cdivision=True
# cython: profile=True
#
# cython: boundscheck=False
# cython: wraparound=False
# cython: cdivision=True
# cython: language_level = 3
# cython: linetrace=True
# distutils: define_macros=CYTHON_TRACE_NOGIL=1
# Licence: BSD 3 clause
# Licence: BSD 3 clause
# TODO: We still need to use ndarrays instead of typed memoryviews when using
# TODO: We still need to use ndarrays instead of typed memoryviews when using
...
...
kmeans/kmeans.py
View file @
6ba0d820
import
time
from
viztracer
import
VizTracer
import
numpy
as
np
import
numpy
as
np
from
threadpoolctl
import
threadpool_limits
from
threadpoolctl
import
threadpool_limits
from
sklearn.datasets
import
make_classification
from
kmeans.
_kmeans
import
lloyd_iter_chunked_dense
,
_inertia_dense
from
_kmeans
import
lloyd_iter_chunked_dense
,
_inertia_dense
def
_kmeans_single_lloyd
(
X
,
sample_weight
,
centers_init
,
max_iter
=
300
,
def
_kmeans_single_lloyd
(
X
,
centers_init
,
sample_weight
=
None
,
max_iter
=
300
,
verbose
=
False
,
x_squared_norms
=
None
,
tol
=
1e-4
,
verbose
=
False
,
x_squared_norms
=
None
,
tol
=
1e-4
,
n_threads
=
1
):
n_threads
=
1
):
"""A single run of k-means lloyd, assumes preparation completed prior.
"""A single run of k-means lloyd, assumes preparation completed prior.
...
@@ -58,8 +63,6 @@ def _kmeans_single_lloyd(X, sample_weight, centers_init, max_iter=300,
...
@@ -58,8 +63,6 @@ def _kmeans_single_lloyd(X, sample_weight, centers_init, max_iter=300,
# NOTE: We only rely on dense array for those analysis
# NOTE: We only rely on dense array for those analysis
if
sample_weight
is
None
:
if
sample_weight
is
None
:
sample_weight
=
np
.
ones
((
X
.
shape
[
0
],),
dtype
=
X
.
dtype
)
sample_weight
=
np
.
ones
((
X
.
shape
[
0
],),
dtype
=
X
.
dtype
)
lloyd_iter
=
lloyd_iter_chunked_dense
_inertia
=
_inertia_dense
strict_convergence
=
False
strict_convergence
=
False
...
@@ -67,11 +70,12 @@ def _kmeans_single_lloyd(X, sample_weight, centers_init, max_iter=300,
...
@@ -67,11 +70,12 @@ def _kmeans_single_lloyd(X, sample_weight, centers_init, max_iter=300,
# nested parallelism (i.e. BLAS) to avoid oversubsciption.
# nested parallelism (i.e. BLAS) to avoid oversubsciption.
with
threadpool_limits
(
limits
=
1
,
user_api
=
"blas"
):
with
threadpool_limits
(
limits
=
1
,
user_api
=
"blas"
):
for
i
in
range
(
max_iter
):
for
i
in
range
(
max_iter
):
lloyd_iter
(
X
,
sample_weight
,
x_squared_norms
,
centers
,
centers_new
,
lloyd_iter_chunked_dense
(
X
,
sample_weight
,
x_squared_norms
,
centers
,
weight_in_clusters
,
labels
,
center_shift
,
n_threads
)
centers_new
,
weight_in_clusters
,
labels
,
center_shift
,
n_threads
)
if
verbose
:
if
verbose
:
inertia
=
_inertia
(
X
,
sample_weight
,
centers
,
labels
)
inertia
=
_inertia
_dense
(
X
,
sample_weight
,
centers
,
labels
)
print
(
f"Iteration
{
i
}
, inertia
{
inertia
}
."
)
print
(
f"Iteration
{
i
}
, inertia
{
inertia
}
."
)
centers
,
centers_new
=
centers_new
,
centers
centers
,
centers_new
=
centers_new
,
centers
...
@@ -95,10 +99,28 @@ def _kmeans_single_lloyd(X, sample_weight, centers_init, max_iter=300,
...
@@ -95,10 +99,28 @@ def _kmeans_single_lloyd(X, sample_weight, centers_init, max_iter=300,
if
not
strict_convergence
:
if
not
strict_convergence
:
# rerun E-step so that predicted labels match cluster centers
# rerun E-step so that predicted labels match cluster centers
lloyd_iter
(
X
,
sample_weight
,
x_squared_norms
,
centers
,
centers
,
lloyd_iter_chunked_dense
(
X
,
sample_weight
,
x_squared_norms
,
weight_in_clusters
,
labels
,
center_shift
,
n_threads
,
centers
,
centers
,
weight_in_clusters
,
labels
,
center_shift
,
n_threads
,
update_centers
=
False
)
update_centers
=
False
)
inertia
=
_inertia
(
X
,
sample_weight
,
centers
,
labels
)
inertia
=
_inertia
_dense
(
X
,
sample_weight
,
centers
,
labels
)
return
labels
,
inertia
,
centers
,
i
+
1
return
labels
,
inertia
,
centers
,
i
+
1
if
__name__
==
"__main__"
:
np
.
random
.
seed
(
1337
)
n_classes
=
2
X
,
y
=
make_classification
(
n_samples
=
1000
,
n_classes
=
n_classes
,
n_clusters_per_class
=
1
,
n_informative
=
10
)
centers_init
=
X
[:
n_classes
]
start
=
time
.
time
()
labels
,
inertia
,
centers
,
_
=
_kmeans_single_lloyd
(
X
,
centers_init
,
max_iter
=
100000
,
n_threads
=
100
)
print
(
time
.
time
()
-
start
)
\ No newline at end of file
kmeans/setup.py
View file @
6ba0d820
...
@@ -11,7 +11,10 @@ extensions = [
...
@@ -11,7 +11,10 @@ extensions = [
Extension
(
"_kmeans"
,
Extension
(
"_kmeans"
,
sources
=
[
"_kmeans.pyx"
],
sources
=
[
"_kmeans.pyx"
],
include_dirs
=
[
numpy
.
get_include
()],
include_dirs
=
[
numpy
.
get_include
()],
define_macros
=
[(
"NPY_NO_DEPRECATED_API"
,
"NPY_1_7_API_VERSION"
)],
define_macros
=
[(
"NPY_NO_DEPRECATED_API"
,
"NPY_1_7_API_VERSION"
),
# ("CYTHON_TRACE_NOGIL", "1")
],
)
)
]
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment