Folder: Make recursiveReindexObject scalable by calling _recurseCallMethod.

Should make Folder_reindexAll and most custom indexation methods obsolete. Remaining valid reindexation methods are: - reindexObject: for a single document, which may contain subdocuments which indexation is not necessary - recursiveReindexobject: for any subtree of documents - ERP5Site_reindexAll: for site-wide reindexations, as there is a semantic- dependent indexation order. Also, uniformise and factorise spawning immediateReindexObject. Also: - testSupply: Drop check for the previous magic threshold. _recurseCallMethod takes care of it all now. - testXMLMatrix: Let activities execute before changing cell id. This works only because recursiveReindexObject on the matrix spawns a single recursiveImmediateReindexObject activity on that context. Now, up to 1k immediateReindexObject activities (for the first 1k sub-objects) are spawned immediately, preventing their renaming immediately after commit. So let test wait for indexation before trying to rename. - testERP5Security: More activities are now spawned immediately, adapt.

Folder: Make recursiveReindexObject scalable by calling _recurseCallMethod.
Should make Folder_reindexAll and most custom indexation methods obsolete. Remaining valid reindexation methods are: - reindexObject: for a single document, which may contain subdocuments which indexation is not necessary - recursiveReindexobject: for any subtree of documents - ERP5Site_reindexAll: for site-wide reindexations, as there is a semantic- dependent indexation order. Also, uniformise and factorise spawning immediateReindexObject. Also: - testSupply: Drop check for the previous magic threshold. _recurseCallMethod takes care of it all now. - testXMLMatrix: Let activities execute before changing cell id. This works only because recursiveReindexObject on the matrix spawns a single recursiveImmediateReindexObject activity on that context. Now, up to 1k immediateReindexObject activities (for the first 1k sub-objects) are spawned immediately, preventing their renaming immediately after commit. So let test wait for indexation before trying to rename. - testERP5Security: More activities are now spawned immediately, adapt.
babbf56f · Vincent Pelletier · 028c9ad7 · babbf56f · babbf56f · babbf56f
Commit babbf56f authored Oct 11, 2017 by Vincent Pelletier
5 changed files
--- a/product/ERP5/tests/testSupply.py
+++ b/product/ERP5/tests/testSupply.py
@@ -605,77 +605,6 @@ class TestSaleSupply(TestSupplyMixin, SubcontentReindexingWrapper,
    self.assertEqual(10, movement.getPrice())
    self.abort()

-  def _createTwoHundredSupplyLineInASupply(self):
-    supply = self._makeSupply(
-      start_date_range_min='2014/01/01',
-      start_date_range_max='2014/01/31',
-    )
-    for i in range(200):
-      resource_value = self.portal.product_module['%s_%d' % (self.id(), i)]
-      supply_line = self._makeSupplyLine(supply)
-      supply_line.edit(resource_value=resource_value,
-                       base_price=100)
-    return supply
-
-  def _createTwoHundredResource(self):
-    for i in range(200):
-      self._makeResource('%s_%d' % (self.id(), i))
-
-  def testReindexOnLargeSupply(self):
-    """
-      Make sure that recursiveImmediateReindexObject is not called on the root
-      document when the document has more than 100 sub objects.
-    """
-    self._makeSections()
-    self._createTwoHundredResource()
-    supply = self._createTwoHundredSupplyLineInASupply()
-    # First, clear activities just in case.
-    self.tic()
-    # Editing triggers reindexObject(active_kw={}) through DCWorkflowDefinition.
-    # Not only edit(), but also all the workflow transitions can trigger it.
-    # Likewise, recursiveReindexObject(active_kw={}) is triggered, for instance
-    # in Supply.py, Delivery.py and etc,.
-    # This is because of the reindexObject() method definition on the Documents.
-    supply.edit(title='xx')
-
-    transaction.commit()
-
-    sql_connection = self.getSQLConnection()
-    supply_path = supply.getPath()
-    sql = """SELECT
-               count(*)
-             FROM
-               message
-             WHERE
-               path like '%s'
-             AND
-               method_id = 'recursiveImmediateReindexObject'
-          """ % (supply_path.replace('_', r'\_') + '/%')
-    result = sql_connection.manage_test(sql)
-    all_the_spply_line_activities_count  = result[0]['COUNT(*)']
-    # supply line reindex activity count must be 200 since created 200 lines
-    self.assertEqual(200, all_the_spply_line_activities_count)
-
-    sql_connection = self.getSQLConnection()
-    sql = "SELECT count(*) FROM message WHERE path='%s'" % supply_path
-    result = sql_connection.manage_test(sql)
-    supply_document_reindex_count = result[0]['COUNT(*)']
-    # reindex activity with the same supply must be only one in this case.
-    self.assertEqual(1, supply_document_reindex_count)
-
-    sql = "SELECT count(*) FROM message WHERE path='%s' AND method_id='%s'" \
-           % (supply_path, 'recursiveImmediateReindexObject')
-    result = sql_connection.manage_test(sql)
-    supply_recursive_immediate_reindex_count = result[0]['COUNT(*)']
-
-    # the count of recursiveImmediateReindex on Supply document must be zero
-    # because the supply contains >100 sub objects. And the the suply lines
-    # reindex are already triggerred. Thus if recursiveImmediateReindex
-    # is also triggered on Supply, the reindex will be duplicated. Moreover,
-    # recursiveImmediateReindex in a single node is less efficient comparing
-    # to use all nodes for the reindex, in such a >100 case.
-    self.assertEqual(0, supply_recursive_immediate_reindex_count)
-
 class TestPurchaseSupply(TestSaleSupply):
  """
    Test Purchase Supplies usage

--- a/product/ERP5Security/tests/testERP5Security.py
+++ b/product/ERP5Security/tests/testERP5Security.py
@@ -1270,7 +1270,8 @@ class TestLocalRoleManagement(ERP5TypeTestCase):
    self.assertTrue(len(person.objectIds()))
    person.reindexObjectSecurity()
    self.commit()
-    check(['recursiveImmediateReindexObject'])
+    # One reindexation activity per subobject, and one on the person itself.
+    check(['immediateReindexObject'] * (len(person) + 1))
    self.tic()

 def test_suite():

--- a/product/ERP5Type/Base.py
+++ b/product/ERP5Type/Base.py
@@ -2811,30 +2811,38 @@ class Base( CopyContainer,
    # Do not check if root is indexable, it is done into catalogObjectList,
    # so we will save time
    if self.isIndexable:
-      if activate_kw is None:
-        activate_kw = {}
-
-      reindex_kw = self.getDefaultReindexParameterDict()
-      if reindex_kw is not None:
-        reindex_kw = reindex_kw.copy()
-        reindex_activate_kw = reindex_kw.pop('activate_kw', None) or {}
-        reindex_activate_kw.update(activate_kw)
-        reindex_kw.update(kw)
-        kw = reindex_kw
-        activate_kw = reindex_activate_kw
-
-      group_id_list  = []
-      if kw.get("group_id") not in ('', None):
-        group_id_list.append(kw["group_id"])
-      if kw.get("sql_catalog_id") not in ('', None):
-        group_id_list.append(kw["sql_catalog_id"])
-      group_id = ' '.join(group_id_list)
-
-      self.activate(group_method_id='portal_catalog/catalogObjectList',
-                    alternate_method_id='alternateReindexObject',
-                    group_id=group_id,
-                    serialization_tag=self.getRootDocumentPath(),
-                    **activate_kw).immediateReindexObject(**kw)
+      kw, activate_kw = self._getReindexAndActivateParameterDict(
+        kw,
+        activate_kw,
+      )
+      activate_kw['serialization_tag'] = self.getRootDocumentPath()
+      self.activate(**activate_kw).immediateReindexObject(**kw)
+
+  def _getReindexAndActivateParameterDict(self, kw, activate_kw):
+    if activate_kw is None:
+      activate_kw = ()
+    reindex_kw = self.getDefaultReindexParameterDict()
+    if reindex_kw is not None:
+      reindex_kw = reindex_kw.copy()
+      reindex_activate_kw = reindex_kw.pop('activate_kw', None) or {}
+      reindex_activate_kw.update(activate_kw)
+      reindex_kw.update(kw)
+      kw = reindex_kw
+      activate_kw = reindex_activate_kw
+    else:
+      activate_kw = dict(activate_kw)
+    group_id_list  = []
+    if kw.get("group_id") not in ('', None):
+      group_id_list.append(kw["group_id"])
+    if kw.get("sql_catalog_id") not in ('', None):
+      group_id_list.append(kw["sql_catalog_id"])
+    if activate_kw.get('group_id') not in ('', None):
+      group_id_list.append(activate_kw['group_id'])
+    activate_kw['group_id'] = ' '.join(group_id_list)
+    activate_kw['group_method_id'] = 'portal_catalog/catalogObjectList'
+    activate_kw['alternate_method_id'] = 'alternateReindexObject'
+    activate_kw['activity'] = 'SQLDict'
+    return kw, activate_kw

  security.declarePublic('recursiveReindexObject')
  recursiveReindexObject = reindexObject

--- a/product/ERP5Type/Core/Folder.py
+++ b/product/ERP5Type/Core/Folder.py
@@ -74,7 +74,6 @@ import os
 from zLOG import LOG, WARNING
 import warnings
 from urlparse import urlparse
-REINDEX_SPLIT_COUNT = 100 # if folder containes more than this, reindexing should be splitted.
 from Products.ERP5Type.Message import translateString

 # Dummy Functions for update / upgrade
@@ -1231,49 +1230,26 @@ class Folder(CopyContainer, OFSFolder2, CMFBTreeFolder, CMFHBTreeFolder, Base, F
  security.declarePublic('recursiveReindexObject')
  def recursiveReindexObject(self, activate_kw=None, **kw):
    if self.isIndexable:
-      if not activate_kw and self.objectCount() > REINDEX_SPLIT_COUNT:
-        # If the number of objects to reindex is too high
-        # we should try to split reindexing in order to be more efficient
-        # NOTE: this heuristic will fail for example with orders which
-        # contain > REINDEX_SPLIT_COUNT order lines.
-        # It will be less efficient in this case. We also do not
-        # use this heuristic whenever activate_kw is defined
-        self._reindexObject(**kw)
-        # XXX-JPS: Here, we could invoke Folder_reindexAll instead, like this:
-        #   self.Folder_reindexAll()
-        #   return
-        # this shows that both methods should be merged.
-        for c in self.objectValues():
-          if getattr(aq_base(c),
-                    'recursiveReindexObject', None) is not None:
-            c.recursiveReindexObject(**kw)
-        return
-
-      if activate_kw is None:
-        activate_kw = {}
-
-      reindex_kw = self.getDefaultReindexParameterDict()
-      if reindex_kw is not None:
-        reindex_kw = reindex_kw.copy()
-        reindex_activate_kw = reindex_kw.pop('activate_kw', None) or {}
-        reindex_activate_kw.update(activate_kw)
-        reindex_kw.update(kw)
-        kw = reindex_kw
-        activate_kw = reindex_activate_kw
-
-      group_id_list  = []
-      if kw.get("group_id") not in ('', None):
-        group_id_list.append(kw["group_id"])
-      if kw.get("sql_catalog_id") not in ('', None):
-        group_id_list.append(kw["sql_catalog_id"])
-      group_id = ' '.join(group_id_list)
-
-      self.activate(group_method_id='portal_catalog/catalogObjectList',
-                    expand_method_id='getIndexableChildValueList',
-                    alternate_method_id='alternateReindexObject',
-                    group_id=group_id,
-                    serialization_tag=self.getRootDocumentPath(),
-                    **activate_kw).recursiveImmediateReindexObject(**kw)
+      kw, activate_kw = self._getReindexAndActivateParameterDict(
+        kw,
+        activate_kw,
+      )
+      activate_kw['group_method_cost'] = 0.01
+      self._recurseCallMethod(
+        'immediateReindexObject',
+        method_kw=kw,
+        activate_kw=activate_kw,
+        get_activate_kw_method_id='_updateActivateKwWithSerialisationTag',
+        max_depth=None,
+        skip_method_id='_isDocumentNonIndexable',
+      )
+
+  def _isDocumentNonIndexable(self, document):
+    return not document.isIndexable
+
+  def _updateActivateKwWithSerialisationTag(self, document, activate_kw):
+    activate_kw['serialization_tag'] = document.getRootDocumentPath()
+    return activate_kw

  security.declareProtected( Permissions.AccessContentsInformation,
                             'getIndexableChildValueList' )

--- a/product/ERP5Type/tests/testXMLMatrix.py
+++ b/product/ERP5Type/tests/testXMLMatrix.py
@@ -451,7 +451,7 @@ class TestXMLMatrix(ERP5TypeTestCase, LogInterceptor):

    cell_range = [['2', ], ['b',]]
    matrix.setCellRange(*cell_range, **kwd)
-    self.commit()
+    self.tic()
    self.assertEquals(set(["quantity_1_1"]), set([
      x.getId() for x in matrix.objectValues()]))