Commit 1e40849e authored by Hardik Juneja's avatar Hardik Juneja

[erp5_joblib]: Add examples and related scrips to new erp5_joblib bt5

parent d2c88bd6
##############################################################################
#
# Copyright (c) 2017 Nexedi SARL and Contributors. All Rights Reserved.
# Hardik Juneja <hardik.juneja@nexedi.com>
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsability of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
import time
import numpy as np
from copy import copy
from math import sqrt
from Products.ERP5Type.Log import log
from Products.CMFActivity.ActiveResult import ActiveResult
from sklearn.base import clone
from sklearn.utils import check_random_state
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.externals import joblib
from sklearn.externals.joblib.parallel import parallel_backend, Parallel, delayed
from sklearn.datasets import load_digits
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
#
# Example: simple sqrt calculator
#
def example_simple_function(self, active_process_path):
""" simple function to calculate sqrt
"""
active_process = self.portal_activities.unrestrictedTraverse(active_process_path)
# Use CMFActivity as a backend for joblob
with parallel_backend('CMFActivity', active_process=active_process):
result = Parallel(n_jobs=2, pre_dispatch='all', timeout=30, verbose=30)(delayed(sqrt)(i**2) for i in range(5))
# Set result value and an id to the active result and post it
result = ActiveResult(result=result)
active_process.postResult(result)
log("joblib activity result", result)
return result
#
# Example: random forest function
#
def combine(all_ensembles):
final_ensemble = copy(all_ensembles[0])
final_ensemble.estimators_ = []
for ensemble in all_ensembles:
final_ensemble.estimators_ += ensemble.estimators_
return final_ensemble
def train_model(model, X, y, sample_weight=None, random_state=None):
model.set_params(random_state=random_state)
if sample_weight is not None:
model.fit(X, y, sample_weight=sample_weight)
else:
model.fit(X, y)
return model
def grow_ensemble(base_model, X, y, sample_weight=None, n_estimators=1,
n_jobs=1, random_state=None):
random_state = check_random_state(random_state)
max_seed = np.iinfo('uint32').max
random_states = random_state.randint(max_seed + 1, size=n_estimators)
results = joblib.Parallel(n_jobs=n_jobs)(
joblib.delayed(train_model)(
clone(base_model), X, y,
sample_weight=sample_weight, random_state=rs)
for rs in random_states)
return combine(results)
def example_random_forest_function(self, active_process_path):
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(
digits.data, digits.target, random_state=0)
# Create an active process
active_process = self.portal_activities.unrestrictedTraverse(active_process_path)
# Use CMFActivity as a backend for joblib
with parallel_backend('CMFActivity', n_jobs=2, active_process=active_process):
final_model = grow_ensemble(RandomForestClassifier(), X_train, y_train,
n_estimators=10, n_jobs=2, random_state=42)
score = final_model.score(X_test, y_test)
# Set result value and an id to the active result and post it
result = ActiveResult(result=score, signature=123)
active_process.postResult(result)
log('ok', len(final_model.estimators_))
return 'ok', len(final_model.estimators_), score
#
# Example: grid search function
#
def example_grid_search_function(self, active_process_path):
digits = load_digits()
X, y = digits.data, digits.target
param_grid = {
'C': np.logspace(-10, 10, 3),
'gamma': np.logspace(-10, 10, 3),
'tol': [1e-4]
}
X = np.ascontiguousarray(X)
y = np.ascontiguousarray(y)
clf = GridSearchCV(SVC(), param_grid=param_grid, verbose=10)
active_process = self.portal_activities.unrestrictedTraverse(active_process_path)
tic = time.time()
with parallel_backend('CMFActivity', n_jobs=2, active_process=active_process):
clf.fit(X, y)
return 'ok', joblib.__version__, time.time() - tic
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="Extension Component" module="erp5.portal_type"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_recorded_property_dict</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAI=</string> </persistent>
</value>
</item>
<item>
<key> <string>default_reference</string> </key>
<value> <string>joblibUseCaseExamples</string> </value>
</item>
<item>
<key> <string>description</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>extension.erp5.joblibUseCaseExamples</string> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
<value> <string>Extension Component</string> </value>
</item>
<item>
<key> <string>sid</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>text_content_error_message</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>text_content_warning_message</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>version</string> </key>
<value> <string>erp5</string> </value>
</item>
<item>
<key> <string>workflow_history</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAM=</string> </persistent>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="2" aka="AAAAAAAAAAI=">
<pickle>
<global name="PersistentMapping" module="Persistence.mapping"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>data</string> </key>
<value>
<dictionary/>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="3" aka="AAAAAAAAAAM=">
<pickle>
<global name="PersistentMapping" module="Persistence.mapping"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>data</string> </key>
<value>
<dictionary>
<item>
<key> <string>component_validation_workflow</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAQ=</string> </persistent>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="4" aka="AAAAAAAAAAQ=">
<pickle>
<global name="WorkflowHistoryList" module="Products.ERP5Type.patches.WorkflowTool"/>
</pickle>
<pickle>
<tuple>
<none/>
<list>
<dictionary>
<item>
<key> <string>action</string> </key>
<value> <string>validate</string> </value>
</item>
<item>
<key> <string>validation_state</string> </key>
<value> <string>validated</string> </value>
</item>
</dictionary>
</list>
</tuple>
</pickle>
</record>
</ZopeData>
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="Folder" module="OFS.Folder"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_objects</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>erp5_joblib_examples</string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string></string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
active_process = context.portal_activities.newActiveProcess()
active_process_id = active_process.getId()
path = active_process.getPhysicalPath()
context.portal_activities.activate(activity="SQLQueue", active_process=active_process).Base_joblibRandomForestFunction(path)
return path
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>**kw</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>Base_driverScriptRandomForest</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
active_process = context.portal_activities.newActiveProcess()
active_process_id = active_process.getId()
path = active_process.getPhysicalPath()
context.portal_activities.activate(activity="SQLQueue", active_process=active_process).Base_joblibSimpleFunction(path)
return path
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>Script_magic</string> </key>
<value> <int>3</int> </value>
</item>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
</klass>
<tuple/>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>**kw</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>Base_driverScriptSquareRoot</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="ExternalMethod" module="Products.ExternalMethod.ExternalMethod"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_function</string> </key>
<value> <string>example_random_forest_function</string> </value>
</item>
<item>
<key> <string>_module</string> </key>
<value> <string>joblibUseCaseExamples</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>Base_joblibRandomForestFunction</string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Base_joblibRandomForestFunction</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="ExternalMethod" module="Products.ExternalMethod.ExternalMethod"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_function</string> </key>
<value> <string>example_simple_function</string> </value>
</item>
<item>
<key> <string>_module</string> </key>
<value> <string>joblibUseCaseExamples</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>Base_joblibSimpleFunction</string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Base_joblibSimpleFunction</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
##############################################################################
#
# Copyright (c) 2002-2012 Nexedi SA and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
##############################################################################
from Products.ERP5Type.tests.ERP5TypeTestCase import ERP5TypeTestCase
class Test(ERP5TypeTestCase):
"""
Test joblib usecases with CMFActivity
"""
def getTitle(self):
return "TestJoblibUsecases"
def test_randomForest(self):
path = self.portal.Base_driverScriptRandomForest()
self.tic()
active_process = self.portal.portal_activities.unrestrictedTraverse(path)
result = active_process.getResultList()
self.assertAlmostEqual(0.98444444444444446, result[0].result)
def test_UnderRootOfSquaresFunction(self):
path = self.portal.Base_driverScriptSquareRoot()
self.tic()
active_process = self.portal.portal_activities.unrestrictedTraverse(path)
result = active_process.getResultList()
self.assertEquals([0.0, 1.0, 2.0, 3.0, 4.0], result[0].result)
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="Test Component" module="erp5.portal_type"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_recorded_property_dict</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAI=</string> </persistent>
</value>
</item>
<item>
<key> <string>default_reference</string> </key>
<value> <string>testJoblibActivityUseCase</string> </value>
</item>
<item>
<key> <string>description</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>test.erp5.testJoblibActivityUseCase</string> </value>
</item>
<item>
<key> <string>portal_type</string> </key>
<value> <string>Test Component</string> </value>
</item>
<item>
<key> <string>sid</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>text_content_error_message</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>text_content_warning_message</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>version</string> </key>
<value> <string>erp5</string> </value>
</item>
<item>
<key> <string>workflow_history</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAM=</string> </persistent>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="2" aka="AAAAAAAAAAI=">
<pickle>
<global name="PersistentMapping" module="Persistence.mapping"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>data</string> </key>
<value>
<dictionary/>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="3" aka="AAAAAAAAAAM=">
<pickle>
<global name="PersistentMapping" module="Persistence.mapping"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>data</string> </key>
<value>
<dictionary>
<item>
<key> <string>component_validation_workflow</string> </key>
<value>
<persistent> <string encoding="base64">AAAAAAAAAAQ=</string> </persistent>
</value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</pickle>
</record>
<record id="4" aka="AAAAAAAAAAQ=">
<pickle>
<global name="WorkflowHistoryList" module="Products.ERP5Type.patches.WorkflowTool"/>
</pickle>
<pickle>
<tuple>
<none/>
<list>
<dictionary>
<item>
<key> <string>action</string> </key>
<value> <string>validate</string> </value>
</item>
<item>
<key> <string>validation_state</string> </key>
<value> <string>validated</string> </value>
</item>
</dictionary>
</list>
</tuple>
</pickle>
</record>
</ZopeData>
Tools for Joblib and CMF Activity Integration
\ No newline at end of file
extension.erp5.joblibUseCaseExamples
\ No newline at end of file
erp5_joblib_examples
\ No newline at end of file
test.erp5.testJoblibActivityUseCase
\ No newline at end of file
erp5_joblib
\ No newline at end of file
0.1
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment