Commit fe3bf27f authored by Sebastien Robin's avatar Sebastien Robin

test result: immediately redraft test result line on task failure

Right now we have this scenario:
- test result line is started
- sometimes, runTestSuite fails (like timeout), failure is reported
  but test result line remains started (we don't know yet which
  line is associated with testnode)
- when a test result line is "started" since more than 4 hours,
  test result line is redrafted
- test can be reexecuted
Speed up by removing the need of waiting alarm, by knowing which
test result line is executed by which test node, and by redrafting
immediately the test result line on test node failure
parent 58d4ab8e
...@@ -358,7 +358,7 @@ class TestTaskDistribution(ERP5TypeTestCase): ...@@ -358,7 +358,7 @@ class TestTaskDistribution(ERP5TypeTestCase):
# we commit, since usually we have a remote call only doing this # we commit, since usually we have a remote call only doing this
(self.tic if tic else self.commit)() (self.tic if tic else self.commit)()
return result return result
def test_05_createTestResult(self): def test_05_createTestResult(self):
""" """
We will check the method createTestResult of distributor We will check the method createTestResult of distributor
...@@ -504,7 +504,6 @@ class TestTaskDistribution(ERP5TypeTestCase): ...@@ -504,7 +504,6 @@ class TestTaskDistribution(ERP5TypeTestCase):
).stop(test_count=1, duration=1000) ).stop(test_count=1, duration=1000)
test_result.stop() test_result.stop()
self.tic() self.tic()
test_result_path, _ = self._createTestResult( test_result_path, _ = self._createTestResult(
test_list=['testSlow', 'testFast', 'testFailing']) test_list=['testSlow', 'testFast', 'testFailing'])
# we run first the tests failing in previous run # we run first the tests failing in previous run
...@@ -603,12 +602,12 @@ class TestTaskDistribution(ERP5TypeTestCase): ...@@ -603,12 +602,12 @@ class TestTaskDistribution(ERP5TypeTestCase):
self.tool.startUnitTest(test_result_path) self.tool.startUnitTest(test_result_path)
# We have a failure but with recent activities on tests # We have a failure but with recent activities on tests
self.pinDateTime(now - 1.0/24*1.5) self.pinDateTime(now - 1.0/24*1.5)
self.tool.reportTaskFailure(test_result_path, {}, "Node0") self.distributor.reportTaskFailure(test_result_path, {}, "Node0")
self.assertEqual("failed", node.getSimulationState()) self.assertEqual("failed", node.getSimulationState())
self.assertEqual("started", test_result.getSimulationState()) self.assertEqual("started", test_result.getSimulationState())
# We have a failure but with no recent activities on tests # We have a failure but with no recent activities on tests
self.pinDateTime(now) self.pinDateTime(now)
self.tool.reportTaskFailure(test_result_path, {}, "Node0") self.distributor.reportTaskFailure(test_result_path, {}, "Node0")
self.assertEqual("failed", node.getSimulationState()) self.assertEqual("failed", node.getSimulationState())
self.assertEqual("failed", test_result.getSimulationState()) self.assertEqual("failed", test_result.getSimulationState())
finally: finally:
...@@ -621,8 +620,8 @@ class TestTaskDistribution(ERP5TypeTestCase): ...@@ -621,8 +620,8 @@ class TestTaskDistribution(ERP5TypeTestCase):
But on the other hand, if a test result line is started many times (due to But on the other hand, if a test result line is started many times (due to
automatic redraft), then this might just means we have issue of runTestSuite unable automatic redraft), then this might just means we have issue of runTestSuite unable
to finish tests, or we might have just tests that can never be executed within timeout time. to finish tests, or we might have just tests that can never be executed within timeout time.
In such case, it's better to mark test result as failed to give a chance to other test In such case, it's better to mark test result as failed to give a chance to switch
suites to be executed to new revision
""" """
now = DateTime() now = DateTime()
try: try:
...@@ -634,25 +633,24 @@ class TestTaskDistribution(ERP5TypeTestCase): ...@@ -634,25 +633,24 @@ class TestTaskDistribution(ERP5TypeTestCase):
node, = test_result.objectValues(portal_type="Test Result Node", node, = test_result.objectValues(portal_type="Test Result Node",
sort_on=[("title", "ascending")]) sort_on=[("title", "ascending")])
self.assertEqual("started", node.getSimulationState()) self.assertEqual("started", node.getSimulationState())
self.tool.startUnitTest(test_result_path) self.distributor.startUnitTest(test_result_path, node_title="Node0")
self.checkTestResultLine(test_result, [('testFoo', 'started')]) self.checkTestResultLine(test_result, [('testFoo', 'started')])
# We have a failure but with recent activities on tests # We have a failure but with recent activities on tests
# so do not mark the test result as failed
self.pinDateTime(now - 1.0/24*7.5) self.pinDateTime(now - 1.0/24*7.5)
self.tool.reportTaskFailure(test_result_path, {}, "Node0") self.distributor.reportTaskFailure(test_result_path, {}, "Node0")
self.assertEqual("failed", node.getSimulationState()) self.assertEqual("failed", node.getSimulationState())
self.assertEqual("started", test_result.getSimulationState()) self.assertEqual("started", test_result.getSimulationState())
self.checkTestResultLine(test_result, [('testFoo', 'started')]) # test result line redrafted due to reportTaskFailure
# some hours later, test line is redrafted
self.pinDateTime(now - 1.0/24*3)
self._callRestartStuckTestResultAlarm()
self.checkTestResultLine(test_result, [('testFoo', 'draft')]) self.checkTestResultLine(test_result, [('testFoo', 'draft')])
# Test is then relaunched # Test is then relaunched
self.tool.startUnitTest(test_result_path) self.pinDateTime(now - 1.0/24*7)
self.tool.startUnitTest(test_result_path, node_title="Node0")
self.checkTestResultLine(test_result, [('testFoo', 'started')]) self.checkTestResultLine(test_result, [('testFoo', 'started')])
# We have another failure but remains only test result line that was already # We have another failure but remains only test result line that was already
# redrafted, so we have to mark the test result as failed # redrafted, so we have to mark the test result as failed
self.pinDateTime(now - 1.0/24*2.5) self.pinDateTime(now - 1.0/24*4)
self.tool.reportTaskFailure(test_result_path, {}, "Node0") self.distributor.reportTaskFailure(test_result_path, {}, "Node0")
self.assertEqual("failed", node.getSimulationState()) self.assertEqual("failed", node.getSimulationState())
self.assertEqual("failed", test_result.getSimulationState()) self.assertEqual("failed", test_result.getSimulationState())
finally: finally:
......
...@@ -139,10 +139,11 @@ class TestResultLineProxy(RPCRetry): ...@@ -139,10 +139,11 @@ class TestResultLineProxy(RPCRetry):
Test name, as provided to TaskDistributor.createTestResult . Test name, as provided to TaskDistributor.createTestResult .
""" """
def __init__(self, proxy, retry_time, logger, test_result_line_path, def __init__(self, proxy, retry_time, logger, test_result_line_path,
test_name): test_name, node_title=None):
super(TestResultLineProxy, self).__init__(proxy, retry_time, logger) super(TestResultLineProxy, self).__init__(proxy, retry_time, logger)
self._test_result_line_path = test_result_line_path self._test_result_line_path = test_result_line_path
self._name = test_name self._name = test_name
self._node_title = node_title
def __repr__(self): def __repr__(self):
return '<%s(%r, %r) at %x>' % (self.__class__.__name__, return '<%s(%r, %r) at %x>' % (self.__class__.__name__,
...@@ -191,7 +192,7 @@ class TestResultLineProxy(RPCRetry): ...@@ -191,7 +192,7 @@ class TestResultLineProxy(RPCRetry):
self._logger.info('Extra parameters provided: %r', kw) self._logger.info('Extra parameters provided: %r', kw)
status_dict.update(kw) status_dict.update(kw)
self._retryRPC('stopUnitTest', (self._test_result_line_path, self._retryRPC('stopUnitTest', (self._test_result_line_path,
binarize_args(status_dict))) binarize_args(status_dict), self._node_title))
class TestResultProxy(RPCRetry): class TestResultProxy(RPCRetry):
""" """
...@@ -246,7 +247,7 @@ class TestResultProxy(RPCRetry): ...@@ -246,7 +247,7 @@ class TestResultProxy(RPCRetry):
if result: if result:
line_url, test_name = result line_url, test_name = result
result = TestResultLineProxy(self._proxy, self._retry_time, result = TestResultLineProxy(self._proxy, self._retry_time,
self._logger, line_url, test_name) self._logger, line_url, test_name, node_title=self._node_title)
return result return result
def reportFailure(self, date=None, command=None, stdout=None, stderr=None): def reportFailure(self, date=None, command=None, stdout=None, stderr=None):
......
...@@ -436,22 +436,24 @@ class ERP5ProjectUnitTestDistributor(XMLObject): ...@@ -436,22 +436,24 @@ class ERP5ProjectUnitTestDistributor(XMLObject):
return test_suite return test_suite
security.declarePublic("startUnitTest") security.declarePublic("startUnitTest")
def startUnitTest(self,test_result_path,exclude_list=()): def startUnitTest(self, test_result_path, exclude_list=(), node_title=None):
""" """
Here this is only a proxy to the task distribution tool Here this is only a proxy to the task distribution tool
""" """
LOG('ERP5ProjectUnitTestDistributor.startUnitTest', 0, test_result_path) LOG('ERP5ProjectUnitTestDistributor.startUnitTest', 0, test_result_path)
portal = self.getPortalObject() portal = self.getPortalObject()
return portal.portal_task_distribution.startUnitTest(test_result_path,exclude_list) return portal.portal_task_distribution.startUnitTest(test_result_path,exclude_list,
node_title=node_title)
security.declarePublic("stopUnitTest") security.declarePublic("stopUnitTest")
def stopUnitTest(self,test_path,status_dict): def stopUnitTest(self,test_path,status_dict, node_title=None):
""" """
Here this is only a proxy to the task distribution tool Here this is only a proxy to the task distribution tool
""" """
LOG('ERP5ProjectUnitTestDistributor.stop_unit_test', 0, test_path) LOG('ERP5ProjectUnitTestDistributor.stop_unit_test', 0, test_path)
portal = self.getPortalObject() portal = self.getPortalObject()
return portal.portal_task_distribution.stopUnitTest(test_path, status_dict) return portal.portal_task_distribution.stopUnitTest(test_path, status_dict,
node_title=node_title)
security.declarePublic("generateConfiguration") security.declarePublic("generateConfiguration")
def generateConfiguration(self, test_suite_title, batch_mode=0): def generateConfiguration(self, test_suite_title, batch_mode=0):
......
...@@ -152,7 +152,7 @@ class TaskDistributionTool(BaseTool): ...@@ -152,7 +152,7 @@ class TaskDistributionTool(BaseTool):
catalog_kw = {'portal_type': 'Test Result', catalog_kw = {'portal_type': 'Test Result',
'title': SimpleQuery(comparison_operator='=', title=test_title), 'title': SimpleQuery(comparison_operator='=', title=test_title),
'sort_on': (("creation_date","descending"),), 'sort_on': (("creation_date","descending"),),
'query': NegatedQuery(SimpleQuery(simulation_state="cancelled")), 'simulation_state': NegatedQuery(SimpleQuery(simulation_state="cancelled")),
'limit': 1} 'limit': 1}
result_list = portal.test_result_module.searchFolder(**catalog_kw) result_list = portal.test_result_module.searchFolder(**catalog_kw)
if result_list: if result_list:
...@@ -182,6 +182,9 @@ class TaskDistributionTool(BaseTool): ...@@ -182,6 +182,9 @@ class TaskDistributionTool(BaseTool):
if reference_list_string is not None: if reference_list_string is not None:
if reference_list_string == test_result.getReference(): if reference_list_string == test_result.getReference():
return return
# If we are here, latest test result might be an old revision created
# by hand, then we should not test a newer revision already tested
catalog_kw['simulation_state'] = ["stopped", "public_stopped"]
if portal.test_result_module.searchFolder( if portal.test_result_module.searchFolder(
reference=SimpleQuery(comparison_operator='=', reference=reference_list_string), reference=SimpleQuery(comparison_operator='=', reference=reference_list_string),
**catalog_kw): **catalog_kw):
...@@ -214,7 +217,7 @@ class TaskDistributionTool(BaseTool): ...@@ -214,7 +217,7 @@ class TaskDistributionTool(BaseTool):
return test_result.getRelativeUrl(), revision return test_result.getRelativeUrl(), revision
security.declarePublic('startUnitTest') security.declarePublic('startUnitTest')
def startUnitTest(self, test_result_path, exclude_list=()): def startUnitTest(self, test_result_path, exclude_list=(), node_title=None):
"""(temporary) """(temporary)
- test_result_path (string) - test_result_path (string)
- exclude_list (list of strings) - exclude_list (list of strings)
...@@ -234,11 +237,14 @@ class TaskDistributionTool(BaseTool): ...@@ -234,11 +237,14 @@ class TaskDistributionTool(BaseTool):
state = line.getSimulationState() state = line.getSimulationState()
test = line.getRelativeUrl(), test test = line.getRelativeUrl(), test
if state == 'draft': if state == 'draft':
if node_title:
node = self._getTestNodeRelativeUrl(node_title)
line.setSource(node)
line.start() line.start()
return test return test
security.declarePublic('stopUnitTest') security.declarePublic('stopUnitTest')
def stopUnitTest(self, test_path, status_dict): def stopUnitTest(self, test_path, status_dict, node_title=None):
"""(temporary) """(temporary)
- test_path (string) - test_path (string)
- status_dict (dict) - status_dict (dict)
...@@ -271,11 +277,20 @@ class TaskDistributionTool(BaseTool): ...@@ -271,11 +277,20 @@ class TaskDistributionTool(BaseTool):
status_dict))) status_dict)))
portal = self.getPortalObject() portal = self.getPortalObject()
test_result = portal.restrictedTraverse(test_result_path) test_result = portal.restrictedTraverse(test_result_path)
node = self._getTestResultNode(test_result, node_title) test_result_node = self._getTestResultNode(test_result, node_title)
assert node is not None assert test_result_node is not None
node.fail(**status_dict) test_result_node.fail(**status_dict)
for node in test_result.objectValues(portal_type='Test Result Node'): # Redraft all test result lines that were affected to that test node
if node.getSimulationState() != 'failed': # to allow immediate reexecution (useful in case of timeout raised
# by a runTestSuite process)
for line in test_result.objectValues(portal_type="Test Result Line"):
if line.getSimulationState() == "started" and line.getSourceTitle() == node_title:
line.redraft()
# If all test nodes failed, we would like to cancel the test result, giving
# opportunity to testnode to start working on a newer version of repository,
# possibly coming with a fix avoiding current failure
for test_result_node in test_result.objectValues(portal_type='Test Result Node'):
if test_result_node.getSimulationState() != 'failed':
break break
else: else:
# now check if we had recent work on test line, if so, this means # now check if we had recent work on test line, if so, this means
...@@ -284,13 +299,13 @@ class TaskDistributionTool(BaseTool): ...@@ -284,13 +299,13 @@ class TaskDistributionTool(BaseTool):
recent_time = DateTime() - 1.0/24 recent_time = DateTime() - 1.0/24
for test_result_line in test_result.objectValues( for test_result_line in test_result.objectValues(
portal_type="Test Result Line"): portal_type="Test Result Line"):
if test_result_line.getModificationDate() > recent_time: if test_result_line.getModificationDate() >= recent_time:
# do not take into account redrafted lines, this means we already # do not take into account redrafted lines, this means we already
# had issues with them # had issues with them (just one time, since we already redraft above)
if len([x for x in portal.portal_workflow.getInfoFor( if len([x for x in portal.portal_workflow.getInfoFor(
ob=test_result_line, ob=test_result_line,
name='history', name='history',
wf_id='test_result_workflow') if x['action']=='redraft']) == 0: wf_id='test_result_workflow') if x['action']=='redraft']) <= 1:
break break
else: else:
if test_result.getSimulationState() not in ('failed', 'cancelled'): if test_result.getSimulationState() not in ('failed', 'cancelled'):
...@@ -326,4 +341,4 @@ class TaskDistributionTool(BaseTool): ...@@ -326,4 +341,4 @@ class TaskDistributionTool(BaseTool):
portal = self.getPortalObject() portal = self.getPortalObject()
memcached_dict = portal.portal_memcached.getMemcachedDict( memcached_dict = portal.portal_memcached.getMemcachedDict(
"task_distribution", "default_memcached_plugin") "task_distribution", "default_memcached_plugin")
return memcached_dict return memcached_dict
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment