Commit babbf56f authored by Vincent Pelletier's avatar Vincent Pelletier

Folder: Make recursiveReindexObject scalable by calling _recurseCallMethod.

Should make Folder_reindexAll and most custom indexation methods obsolete.
Remaining valid reindexation methods are:
- reindexObject: for a single document, which may contain subdocuments
  which indexation is not necessary
- recursiveReindexobject: for any subtree of documents
- ERP5Site_reindexAll: for site-wide reindexations, as there is a semantic-
  dependent indexation order.

Also, uniformise and factorise spawning immediateReindexObject.

Also:
- testSupply: Drop check for the previous magic threshold.
  _recurseCallMethod takes care of it all now.
- testXMLMatrix: Let activities execute before changing cell id.
  This works only because recursiveReindexObject on the matrix spawns a
  single recursiveImmediateReindexObject activity on that context. Now,
  up to 1k immediateReindexObject activities (for the first 1k sub-objects)
  are spawned immediately, preventing their renaming immediately after
  commit.
  So let test wait for indexation before trying to rename.
- testERP5Security: More activities are now spawned immediately, adapt.
parent 028c9ad7
......@@ -605,77 +605,6 @@ class TestSaleSupply(TestSupplyMixin, SubcontentReindexingWrapper,
self.assertEqual(10, movement.getPrice())
self.abort()
def _createTwoHundredSupplyLineInASupply(self):
supply = self._makeSupply(
start_date_range_min='2014/01/01',
start_date_range_max='2014/01/31',
)
for i in range(200):
resource_value = self.portal.product_module['%s_%d' % (self.id(), i)]
supply_line = self._makeSupplyLine(supply)
supply_line.edit(resource_value=resource_value,
base_price=100)
return supply
def _createTwoHundredResource(self):
for i in range(200):
self._makeResource('%s_%d' % (self.id(), i))
def testReindexOnLargeSupply(self):
"""
Make sure that recursiveImmediateReindexObject is not called on the root
document when the document has more than 100 sub objects.
"""
self._makeSections()
self._createTwoHundredResource()
supply = self._createTwoHundredSupplyLineInASupply()
# First, clear activities just in case.
self.tic()
# Editing triggers reindexObject(active_kw={}) through DCWorkflowDefinition.
# Not only edit(), but also all the workflow transitions can trigger it.
# Likewise, recursiveReindexObject(active_kw={}) is triggered, for instance
# in Supply.py, Delivery.py and etc,.
# This is because of the reindexObject() method definition on the Documents.
supply.edit(title='xx')
transaction.commit()
sql_connection = self.getSQLConnection()
supply_path = supply.getPath()
sql = """SELECT
count(*)
FROM
message
WHERE
path like '%s'
AND
method_id = 'recursiveImmediateReindexObject'
""" % (supply_path.replace('_', r'\_') + '/%')
result = sql_connection.manage_test(sql)
all_the_spply_line_activities_count = result[0]['COUNT(*)']
# supply line reindex activity count must be 200 since created 200 lines
self.assertEqual(200, all_the_spply_line_activities_count)
sql_connection = self.getSQLConnection()
sql = "SELECT count(*) FROM message WHERE path='%s'" % supply_path
result = sql_connection.manage_test(sql)
supply_document_reindex_count = result[0]['COUNT(*)']
# reindex activity with the same supply must be only one in this case.
self.assertEqual(1, supply_document_reindex_count)
sql = "SELECT count(*) FROM message WHERE path='%s' AND method_id='%s'" \
% (supply_path, 'recursiveImmediateReindexObject')
result = sql_connection.manage_test(sql)
supply_recursive_immediate_reindex_count = result[0]['COUNT(*)']
# the count of recursiveImmediateReindex on Supply document must be zero
# because the supply contains >100 sub objects. And the the suply lines
# reindex are already triggerred. Thus if recursiveImmediateReindex
# is also triggered on Supply, the reindex will be duplicated. Moreover,
# recursiveImmediateReindex in a single node is less efficient comparing
# to use all nodes for the reindex, in such a >100 case.
self.assertEqual(0, supply_recursive_immediate_reindex_count)
class TestPurchaseSupply(TestSaleSupply):
"""
Test Purchase Supplies usage
......
......@@ -1270,7 +1270,8 @@ class TestLocalRoleManagement(ERP5TypeTestCase):
self.assertTrue(len(person.objectIds()))
person.reindexObjectSecurity()
self.commit()
check(['recursiveImmediateReindexObject'])
# One reindexation activity per subobject, and one on the person itself.
check(['immediateReindexObject'] * (len(person) + 1))
self.tic()
def test_suite():
......
......@@ -2811,30 +2811,38 @@ class Base( CopyContainer,
# Do not check if root is indexable, it is done into catalogObjectList,
# so we will save time
if self.isIndexable:
if activate_kw is None:
activate_kw = {}
reindex_kw = self.getDefaultReindexParameterDict()
if reindex_kw is not None:
reindex_kw = reindex_kw.copy()
reindex_activate_kw = reindex_kw.pop('activate_kw', None) or {}
reindex_activate_kw.update(activate_kw)
reindex_kw.update(kw)
kw = reindex_kw
activate_kw = reindex_activate_kw
group_id_list = []
if kw.get("group_id") not in ('', None):
group_id_list.append(kw["group_id"])
if kw.get("sql_catalog_id") not in ('', None):
group_id_list.append(kw["sql_catalog_id"])
group_id = ' '.join(group_id_list)
self.activate(group_method_id='portal_catalog/catalogObjectList',
alternate_method_id='alternateReindexObject',
group_id=group_id,
serialization_tag=self.getRootDocumentPath(),
**activate_kw).immediateReindexObject(**kw)
kw, activate_kw = self._getReindexAndActivateParameterDict(
kw,
activate_kw,
)
activate_kw['serialization_tag'] = self.getRootDocumentPath()
self.activate(**activate_kw).immediateReindexObject(**kw)
def _getReindexAndActivateParameterDict(self, kw, activate_kw):
if activate_kw is None:
activate_kw = ()
reindex_kw = self.getDefaultReindexParameterDict()
if reindex_kw is not None:
reindex_kw = reindex_kw.copy()
reindex_activate_kw = reindex_kw.pop('activate_kw', None) or {}
reindex_activate_kw.update(activate_kw)
reindex_kw.update(kw)
kw = reindex_kw
activate_kw = reindex_activate_kw
else:
activate_kw = dict(activate_kw)
group_id_list = []
if kw.get("group_id") not in ('', None):
group_id_list.append(kw["group_id"])
if kw.get("sql_catalog_id") not in ('', None):
group_id_list.append(kw["sql_catalog_id"])
if activate_kw.get('group_id') not in ('', None):
group_id_list.append(activate_kw['group_id'])
activate_kw['group_id'] = ' '.join(group_id_list)
activate_kw['group_method_id'] = 'portal_catalog/catalogObjectList'
activate_kw['alternate_method_id'] = 'alternateReindexObject'
activate_kw['activity'] = 'SQLDict'
return kw, activate_kw
security.declarePublic('recursiveReindexObject')
recursiveReindexObject = reindexObject
......
......@@ -74,7 +74,6 @@ import os
from zLOG import LOG, WARNING
import warnings
from urlparse import urlparse
REINDEX_SPLIT_COUNT = 100 # if folder containes more than this, reindexing should be splitted.
from Products.ERP5Type.Message import translateString
# Dummy Functions for update / upgrade
......@@ -1231,49 +1230,26 @@ class Folder(CopyContainer, OFSFolder2, CMFBTreeFolder, CMFHBTreeFolder, Base, F
security.declarePublic('recursiveReindexObject')
def recursiveReindexObject(self, activate_kw=None, **kw):
if self.isIndexable:
if not activate_kw and self.objectCount() > REINDEX_SPLIT_COUNT:
# If the number of objects to reindex is too high
# we should try to split reindexing in order to be more efficient
# NOTE: this heuristic will fail for example with orders which
# contain > REINDEX_SPLIT_COUNT order lines.
# It will be less efficient in this case. We also do not
# use this heuristic whenever activate_kw is defined
self._reindexObject(**kw)
# XXX-JPS: Here, we could invoke Folder_reindexAll instead, like this:
# self.Folder_reindexAll()
# return
# this shows that both methods should be merged.
for c in self.objectValues():
if getattr(aq_base(c),
'recursiveReindexObject', None) is not None:
c.recursiveReindexObject(**kw)
return
if activate_kw is None:
activate_kw = {}
reindex_kw = self.getDefaultReindexParameterDict()
if reindex_kw is not None:
reindex_kw = reindex_kw.copy()
reindex_activate_kw = reindex_kw.pop('activate_kw', None) or {}
reindex_activate_kw.update(activate_kw)
reindex_kw.update(kw)
kw = reindex_kw
activate_kw = reindex_activate_kw
group_id_list = []
if kw.get("group_id") not in ('', None):
group_id_list.append(kw["group_id"])
if kw.get("sql_catalog_id") not in ('', None):
group_id_list.append(kw["sql_catalog_id"])
group_id = ' '.join(group_id_list)
self.activate(group_method_id='portal_catalog/catalogObjectList',
expand_method_id='getIndexableChildValueList',
alternate_method_id='alternateReindexObject',
group_id=group_id,
serialization_tag=self.getRootDocumentPath(),
**activate_kw).recursiveImmediateReindexObject(**kw)
kw, activate_kw = self._getReindexAndActivateParameterDict(
kw,
activate_kw,
)
activate_kw['group_method_cost'] = 0.01
self._recurseCallMethod(
'immediateReindexObject',
method_kw=kw,
activate_kw=activate_kw,
get_activate_kw_method_id='_updateActivateKwWithSerialisationTag',
max_depth=None,
skip_method_id='_isDocumentNonIndexable',
)
def _isDocumentNonIndexable(self, document):
return not document.isIndexable
def _updateActivateKwWithSerialisationTag(self, document, activate_kw):
activate_kw['serialization_tag'] = document.getRootDocumentPath()
return activate_kw
security.declareProtected( Permissions.AccessContentsInformation,
'getIndexableChildValueList' )
......
......@@ -451,7 +451,7 @@ class TestXMLMatrix(ERP5TypeTestCase, LogInterceptor):
cell_range = [['2', ], ['b',]]
matrix.setCellRange(*cell_range, **kwd)
self.commit()
self.tic()
self.assertEquals(set(["quantity_1_1"]), set([
x.getId() for x in matrix.objectValues()]))
......
  • With this commit, some objects that were not indexed are now indexed, and this fails when these objects can't be indexed (maybe for bad reasons).

    We found this for a PythonScript that's stored inside a Web Site:

    Traceback (innermost last):
     Module Products.CMFActivity.ActivityTool, line 1360, in invokeGroup
       traverse(method_id)(expanded_object_list)
     Module Products.ERP5Catalog.CatalogTool, line 939, in catalogObjectList
       super(CatalogTool, self).catalogObjectList(tmp_object_list, **m.kw)
     Module Products.ZSQLCatalog.ZSQLCatalog, line 828, in catalogObjectList
       catalog_value=catalog,
     Module Products.ERP5Catalog.CatalogTool, line 875, in wrapObjectList
      - __traceback_info__: (<PythonScript at /tristan_test/web_site_module/nexedi_global/WebSite_setSkin>,)
       and document_object._getAcquireLocalRoles():
    AttributeError: _getAcquireLocalRoles

    Traceback of the activity creation:

     ...
     File "product/ERP5Type/Core/Folder.py", line 1244, in recursiveReindexObject
       skip_method_id='_isDocumentNonIndexable',
     File "product/ERP5Type/Core/Folder.py", line 520, in _recurseCallMethod
       recurse(self, 0)
     File "product/ERP5Type/Core/Folder.py", line 506, in recurse
       recurse(ob, depth + 1)
     File "product/ERP5Type/Core/Folder.py", line 517, in recurse
       method_id)(*method_args, **method_kw)
     File "product/CMFActivity/ActivityTool.py", line 546, in __call__
       portal_activities=portal_activities, 

    What happens is that before, the recursion was done with:

              if getattr(aq_base(c),
                        'recursiveReindexObject', None) is not None:
                c.recursiveReindexObject(**kw)

    (the object is skipped because it does not have a recursiveReindexObject method)

    and now the recursion is done within _recurseCallMethod: skip_method_id is only considered for the root object. Well, not exactly: I guess that if the recursion is split at such object, we'll get a AttributeError because PythonScript does not have _isDocumentNonIndexable.

    So there's at least one bug in the recent changes of _recurseCallMethod: it must ignore self except for placeless things, or maybe also for the initial call (e.g. skip_method_id = kw.pop('skip_method_id', None)).

    Do we want such object indexed?

    /cc @tc

  • skip_method_id is only considered for the root object

    But it acts on its argument (document, and not self), which it the current recustion level. So I am not sure what you mean here. This was made this way to be consistent with how _recurseCallMethod works: it is always called on the same context, and recursion depth & location is conveyed by its arguments.

    (e.g. skip_method_id = kw.pop('skip_method_id', None))

    The intent is really to check on each level whether recursion should happen, which current code does AFAIK. So poping (so the check does not happen on recursed subtree) is not what I intend for this method.

    For example, I want to be able to call portal_trash.recursiveReindexObject(), which will only reindex portal_trash/* but not portal_trash/*/**.

    EDIT: The reason why portal_trash/*/** is not indexed is that TrashBin.isSubtreeIndexable returns False: the TrashBins themselves are indexable, but none of their content is.

    Do we want such object indexed?

    I have no fundamental objection to indexing these: scripts in websites have a canonical path which can be reconstructed anywhere (even if it has to involve the same acquisition workarounds as when editing web pages), unlike (famously) everything under portal_skins.

    So to me the fix is to exclude the whole subtree when encountering an object which cannot be indexed (as is visibly the case here).

    The rationale for skipping the entire subtree here is that a non-indexable object does not have a (meaningful) uid, so parent_uid of their immediate children will be meaningless, so the subtree cannot be correctly indexed.

    The next level of the fix, which is optional to me but I have nothing against it either, is to make these Python Scripts indexable.

    Edited by Vincent Pelletier
  • About skip_method_id, I misread the code and kw.pop('skip_method_id', None) would be wrong. _isDocumentNonIndexable is really called for the script, and isSubtreeIndexable is acquired and returns True. I also made a mistake about what happens in case of split: contrary to expand (in simulation), _recurseCallMethod is always resumed on the root object.

    So to me the fix is to exclude the whole subtree when encountering an object which cannot be indexed (as is visibly the case here).

    Yes, and that can be done by just changing _isDocumentNonIndexable. Maybe:

      def _isDocumentNonIndexable(self, document):
        return (getattr(aq_base(document), 'isSubtreeIndexable', None) is None
                or not document.isSubtreeIndexable())
  • Looks good to me, yes.

    I gave a shot at factorising such code pattern (get attribute on aq_base, then get attribute again on wrapped object), but I did not get very convincing results. I mostly found a heap of useless aq_base imports in the process.

  • I gave a shot at factorising such code pattern (get attribute on aq_base, then get attribute again on wrapped object), but I did not get very convincing results.

    There's aq_explicit but it does not exist as a function so the object must be in an acquisition wrapper. Here, we could do:

      def _isDocumentNonIndexable(self, document):
        try:
          isSubtreeIndexable = document.aq_explicit.isSubtreeIndexable
        except AttributeError:
          return True
        return not isSubtreeIndexable()
  • There's aq_explicit

    But then doesn't self (from inside isSubtreeIndexable) also become an explicit acquisition wrapper ?

    I think we do not care for this spefic instance, as such methods should be simple enough without relying on implicit acquisition. For other uses (either other uses of this pattern - although I found fewer than I expected - or for possible future evolution of this use) it could be inappropriate.

  • You're right. That would be wrong to get methods.

    ipdb> self
    <Person Module at /erp5/person_module>
    ipdb> self.portal_catalog
    <Catalog Tool at /erp5/portal_catalog used for /erp5/person_module>
    ipdb> self.aq_explicit.portal_catalog
    *** AttributeError: portal_catalog
    ipdb> self.aq_explicit.getObject.__self__
    <Person Module at /erp5/person_module>
    ipdb> self.aq_explicit.getObject.__self__.portal_catalog
    *** AttributeError: portal_catalog
  • mentioned in commit 614a8349

    Toggle commit list
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment