test result: immediately redraft test result line on task failure

Right now we have this scenario: - test result line is started - sometimes, runTestSuite fails (like timeout), failure is reported but test result line remains started (we don't know yet which line is associated with testnode) - when a test result line is "started" since more than 4 hours, test result line is redrafted - test can be reexecuted Speed up by removing the need of waiting alarm, by knowing which test result line is executed by which test node, and by redrafting immediately the test result line on test node failure

test result: immediately redraft test result line on task failure
Right now we have this scenario: - test result line is started - sometimes, runTestSuite fails (like timeout), failure is reported but test result line remains started (we don't know yet which line is associated with testnode) - when a test result line is "started" since more than 4 hours, test result line is redrafted - test can be reexecuted Speed up by removing the need of waiting alarm, by knowing which test result line is executed by which test node, and by redrafting immediately the test result line on test node failure
fe3bf27f · Sebastien Robin · 58d4ab8e · fe3bf27f · fe3bf27f · fe3bf27f
Commit fe3bf27f authored Apr 19, 2019 by Sebastien Robin
4 changed files
--- a/bt5/erp5_test_result/TestTemplateItem/portal_components/test.erp5.testTaskDistribution.py
+++ b/bt5/erp5_test_result/TestTemplateItem/portal_components/test.erp5.testTaskDistribution.py
@@ -358,7 +358,7 @@ class TestTaskDistribution(ERP5TypeTestCase):
    # we commit, since usually we have a remote call only doing this
    (self.tic if tic else self.commit)()
    return result
-    
+
  def test_05_createTestResult(self):
    """
    We will check the method createTestResult of distributor
@@ -504,7 +504,6 @@ class TestTaskDistribution(ERP5TypeTestCase):
            ).stop(test_count=1, duration=1000)
    test_result.stop()
    self.tic()
-
    test_result_path, _ = self._createTestResult(
      test_list=['testSlow', 'testFast', 'testFailing'])
    # we run first the tests failing in previous run
@@ -603,12 +602,12 @@ class TestTaskDistribution(ERP5TypeTestCase):
      self.tool.startUnitTest(test_result_path)
      # We have a failure but with recent activities on tests
      self.pinDateTime(now - 1.0/24*1.5)
-      self.tool.reportTaskFailure(test_result_path, {}, "Node0")
+      self.distributor.reportTaskFailure(test_result_path, {}, "Node0")
      self.assertEqual("failed", node.getSimulationState())
      self.assertEqual("started", test_result.getSimulationState())
      # We have a failure but with no recent activities on tests
      self.pinDateTime(now)
-      self.tool.reportTaskFailure(test_result_path, {}, "Node0")
+      self.distributor.reportTaskFailure(test_result_path, {}, "Node0")
      self.assertEqual("failed", node.getSimulationState())
      self.assertEqual("failed", test_result.getSimulationState())
    finally:
@@ -621,8 +620,8 @@ class TestTaskDistribution(ERP5TypeTestCase):
    But on the other hand, if a test result line is started many times (due to
    automatic redraft), then this might just means we have issue of runTestSuite unable
    to finish tests, or we might have just tests that can never be executed within timeout time.
-    In such case, it's better to mark test result as failed to give a chance to other test
-    suites to be executed
+    In such case, it's better to mark test result as failed to give a chance to switch
+    to new revision
    """
    now = DateTime()
    try:
@@ -634,25 +633,24 @@ class TestTaskDistribution(ERP5TypeTestCase):
      node, = test_result.objectValues(portal_type="Test Result Node",
                                           sort_on=[("title", "ascending")])
      self.assertEqual("started", node.getSimulationState())
-      self.tool.startUnitTest(test_result_path)
+      self.distributor.startUnitTest(test_result_path, node_title="Node0")
      self.checkTestResultLine(test_result, [('testFoo', 'started')])
      # We have a failure but with recent activities on tests
+      # so do not mark the test result as failed
      self.pinDateTime(now - 1.0/24*7.5)
-      self.tool.reportTaskFailure(test_result_path, {}, "Node0")
+      self.distributor.reportTaskFailure(test_result_path, {}, "Node0")
      self.assertEqual("failed", node.getSimulationState())
      self.assertEqual("started", test_result.getSimulationState())
-      self.checkTestResultLine(test_result, [('testFoo', 'started')])
-      # some hours later, test line is redrafted
-      self.pinDateTime(now - 1.0/24*3)
-      self._callRestartStuckTestResultAlarm()
+      # test result line redrafted due to reportTaskFailure
      self.checkTestResultLine(test_result, [('testFoo', 'draft')])
      # Test is then relaunched
-      self.tool.startUnitTest(test_result_path)
+      self.pinDateTime(now - 1.0/24*7)
+      self.tool.startUnitTest(test_result_path, node_title="Node0")
      self.checkTestResultLine(test_result, [('testFoo', 'started')])
      # We have another failure but remains only test result line that was already
      # redrafted, so we have to mark the test result as failed
-      self.pinDateTime(now - 1.0/24*2.5)
-      self.tool.reportTaskFailure(test_result_path, {}, "Node0")
+      self.pinDateTime(now - 1.0/24*4)
+      self.distributor.reportTaskFailure(test_result_path, {}, "Node0")
      self.assertEqual("failed", node.getSimulationState())
      self.assertEqual("failed", test_result.getSimulationState())
    finally:

--- a/erp5/util/taskdistribution/__init__.py
+++ b/erp5/util/taskdistribution/__init__.py
@@ -139,10 +139,11 @@ class TestResultLineProxy(RPCRetry):
      Test name, as provided to TaskDistributor.createTestResult .
    """
    def __init__(self, proxy, retry_time, logger, test_result_line_path,
-            test_name):
+            test_name, node_title=None):
        super(TestResultLineProxy, self).__init__(proxy, retry_time, logger)
        self._test_result_line_path = test_result_line_path
        self._name = test_name
+        self._node_title = node_title

    def __repr__(self):
        return '<%s(%r, %r) at %x>' % (self.__class__.__name__,
@@ -191,7 +192,7 @@ class TestResultLineProxy(RPCRetry):
            self._logger.info('Extra parameters provided: %r', kw)
            status_dict.update(kw)
        self._retryRPC('stopUnitTest', (self._test_result_line_path,
-            binarize_args(status_dict)))
+            binarize_args(status_dict), self._node_title))

 class TestResultProxy(RPCRetry):
    """
@@ -246,7 +247,7 @@ class TestResultProxy(RPCRetry):
        if result:
            line_url, test_name = result
            result = TestResultLineProxy(self._proxy, self._retry_time,
-                self._logger, line_url, test_name)
+                self._logger, line_url, test_name, node_title=self._node_title)
        return result

    def reportFailure(self, date=None, command=None, stdout=None, stderr=None):

--- a/product/ERP5/Document/ERP5ProjectUnitTestDistributor.py
+++ b/product/ERP5/Document/ERP5ProjectUnitTestDistributor.py
@@ -436,22 +436,24 @@ class ERP5ProjectUnitTestDistributor(XMLObject):
    return test_suite

  security.declarePublic("startUnitTest")
-  def startUnitTest(self,test_result_path,exclude_list=()):
+  def startUnitTest(self, test_result_path, exclude_list=(), node_title=None):
    """
    Here this is only a proxy to the task distribution tool
    """
    LOG('ERP5ProjectUnitTestDistributor.startUnitTest', 0, test_result_path)
    portal = self.getPortalObject()
-    return portal.portal_task_distribution.startUnitTest(test_result_path,exclude_list)
+    return portal.portal_task_distribution.startUnitTest(test_result_path,exclude_list,
+                  node_title=node_title)

  security.declarePublic("stopUnitTest")
-  def stopUnitTest(self,test_path,status_dict):
+  def stopUnitTest(self,test_path,status_dict, node_title=None):
    """
    Here this is only a proxy to the task distribution tool
    """
    LOG('ERP5ProjectUnitTestDistributor.stop_unit_test', 0, test_path)
    portal = self.getPortalObject()
-    return portal.portal_task_distribution.stopUnitTest(test_path, status_dict)
+    return portal.portal_task_distribution.stopUnitTest(test_path, status_dict,
+                  node_title=node_title)

  security.declarePublic("generateConfiguration")
  def generateConfiguration(self, test_suite_title, batch_mode=0):

--- a/product/ERP5/Tool/TaskDistributionTool.py
+++ b/product/ERP5/Tool/TaskDistributionTool.py
@@ -152,7 +152,7 @@ class TaskDistributionTool(BaseTool):
    catalog_kw = {'portal_type': 'Test Result',
                  'title': SimpleQuery(comparison_operator='=', title=test_title),
                  'sort_on': (("creation_date","descending"),),
-                  'query': NegatedQuery(SimpleQuery(simulation_state="cancelled")),
+                  'simulation_state': NegatedQuery(SimpleQuery(simulation_state="cancelled")),
                  'limit': 1}
    result_list = portal.test_result_module.searchFolder(**catalog_kw)
    if result_list:
@@ -182,6 +182,9 @@ class TaskDistributionTool(BaseTool):
            if reference_list_string is not None:
              if reference_list_string == test_result.getReference():
                return
+              # If we are here, latest test result might be an old revision created
+              # by hand, then we should not test a newer revision already tested
+              catalog_kw['simulation_state'] = ["stopped", "public_stopped"]
              if portal.test_result_module.searchFolder(
                   reference=SimpleQuery(comparison_operator='=', reference=reference_list_string),
                   **catalog_kw):
@@ -214,7 +217,7 @@ class TaskDistributionTool(BaseTool):
    return test_result.getRelativeUrl(), revision

  security.declarePublic('startUnitTest')
-  def startUnitTest(self, test_result_path, exclude_list=()):
+  def startUnitTest(self, test_result_path, exclude_list=(), node_title=None):
    """(temporary)
      - test_result_path (string)
      - exclude_list (list of strings)
@@ -234,11 +237,14 @@ class TaskDistributionTool(BaseTool):
        state = line.getSimulationState()
        test = line.getRelativeUrl(), test
        if state == 'draft':
+          if node_title:
+            node = self._getTestNodeRelativeUrl(node_title)
+            line.setSource(node)
          line.start()
          return test

  security.declarePublic('stopUnitTest')
-  def stopUnitTest(self, test_path, status_dict):
+  def stopUnitTest(self, test_path, status_dict, node_title=None):
    """(temporary)
      - test_path (string)
      - status_dict (dict)
@@ -271,11 +277,20 @@ class TaskDistributionTool(BaseTool):
                                                          status_dict)))
    portal = self.getPortalObject()
    test_result = portal.restrictedTraverse(test_result_path)
-    node = self._getTestResultNode(test_result, node_title)
-    assert node is not None
-    node.fail(**status_dict)
-    for node in test_result.objectValues(portal_type='Test Result Node'):
-      if node.getSimulationState() != 'failed':
+    test_result_node = self._getTestResultNode(test_result, node_title)
+    assert test_result_node is not None
+    test_result_node.fail(**status_dict)
+    # Redraft all test result lines that were affected to that test node
+    # to allow immediate reexecution (useful in case of timeout raised
+    # by a runTestSuite process)
+    for line in test_result.objectValues(portal_type="Test Result Line"):
+      if line.getSimulationState() == "started" and line.getSourceTitle() == node_title:
+        line.redraft()
+    # If all test nodes failed, we would like to cancel the test result, giving
+    # opportunity to testnode to start working on a newer version of repository,
+    # possibly coming with a fix avoiding current failure
+    for test_result_node in test_result.objectValues(portal_type='Test Result Node'):
+      if test_result_node.getSimulationState() != 'failed':
        break
    else:
      # now check if we had recent work on test line, if so, this means
@@ -284,13 +299,13 @@ class TaskDistributionTool(BaseTool):
      recent_time = DateTime() - 1.0/24
      for test_result_line in test_result.objectValues(
          portal_type="Test Result Line"):
-        if test_result_line.getModificationDate() > recent_time:
+        if test_result_line.getModificationDate() >= recent_time:
          # do not take into account redrafted lines, this means we already
-          # had issues with them
+          # had issues with them (just one time, since we already redraft above)
          if len([x for x in portal.portal_workflow.getInfoFor(
                  ob=test_result_line,
                  name='history',
-                  wf_id='test_result_workflow') if x['action']=='redraft']) == 0:
+                  wf_id='test_result_workflow') if x['action']=='redraft']) <= 1:
            break
      else:
        if test_result.getSimulationState() not in ('failed', 'cancelled'):
@@ -326,4 +341,4 @@ class TaskDistributionTool(BaseTool):
    portal = self.getPortalObject()
    memcached_dict = portal.portal_memcached.getMemcachedDict(
                            "task_distribution", "default_memcached_plugin")
-    return memcached_dict
+    return memcached_dict
\ No newline at end of file