Commit 1a2a7280 authored by Sebastien Robin's avatar Sebastien Robin

fixed various issues with testnode and make it even more robust

Here the list of changes:
- close all timers when quitting, this makes stopping an erp5tetsnode
  much faster
- remove hack on slapos/testnode after fix of slapos.cookbook
- remove old tmp files left by buildout (buildout has te bo fixed too)
- remove logging handlers where the are not needed any more
- fixed the kill command, it was not able to kill properly childs
parent e58ab518
......@@ -31,10 +31,12 @@ class ERP5TestNode(TestCase):
self.remote_repository0 = os.path.join(self._temp_dir, 'rep0')
self.remote_repository1 = os.path.join(self._temp_dir, 'rep1')
self.remote_repository2 = os.path.join(self._temp_dir, 'rep2')
self.system_temp_folder = os.path.join(self._temp_dir,'tmp')
os.mkdir(self.working_directory)
os.mkdir(self.slapos_directory)
os.mkdir(self.test_suite_directory)
os.mkdir(self.environment)
os.mkdir(self.system_temp_folder)
os.makedirs(self.log_directory)
os.close(os.open(self.log_file,os.O_CREAT))
os.mkdir(self.remote_repository0)
......@@ -60,6 +62,7 @@ class ERP5TestNode(TestCase):
config["log_file"] = self.log_file
config["test_suite_master_url"] = None
config["test_node_title"] = "Foo-Test-Node"
config["system_temp_folder"] = self.system_temp_folder
return TestNode(self.log, config)
def getTestSuiteData(self, add_third_repository=False, reference="foo"):
......@@ -585,3 +588,25 @@ branch = foo
test_node.max_log_time = 0
test_node._cleanupLog()
check(set(['a_file']))
def test_17_cleanupLogDirectory(self):
# Make sure that we are able to cleanup old temp folders
test_node = self.getTestNode()
temp_directory = self.system_temp_folder
def check(file_list):
directory_dir = os.listdir(temp_directory)
self.assertTrue(set(file_list).issubset(
set(directory_dir)),
"%r not contained by %r" % (file_list, directory_dir))
check([])
os.mkdir(os.path.join(temp_directory, 'buildoutA'))
os.mkdir(os.path.join(temp_directory, 'something'))
os.mkdir(os.path.join(temp_directory, 'tmpC'))
check(set(['buildoutA', 'something', 'tmpC']))
# default log file time is 15 days, so nothing is going to be deleted
test_node._cleanupTemporaryFiles()
check(set(['buildoutA', 'something', 'tmpC']))
# then we set keep time to 0, folder will be deleted
test_node.max_temp_time = 0
test_node._cleanupLog()
check(set(['something']))
\ No newline at end of file
......@@ -99,21 +99,26 @@ def subprocess_capture(p, log, log_prefix, get_output=True):
return (p.stdout and ''.join(stdout),
p.stderr and ''.join(stderr))
def killCommand(pid):
def killCommand(pid, log):
"""
To avoid letting orphaned childs, we stop the process and all it's
child (until childs does not change) and then we brutally kill
everyone at the same time
"""
process = psutil.Process(pid)
child_set = set([x.pid for x in process.get_children(recursive=True)])
new_child_set = None
new_child_set = set([x.pid for x in process.get_children(recursive=True)])
child_set = None
os.kill(pid, signal.SIGSTOP)
while new_child_set != child_set:
child_set = new_child_set
log("killCommand, new_child_set : %r, child_set: %r" % (
new_child_set, child_set))
for child_pid in child_set:
os.kill(child_pid, signal.SIGSTOP)
time.sleep(1)
child_set = new_child_set
new_child_set = set([x.pid for x in process.get_children(recursive=True)])
log("killCommand, finishing, child_set : %r" % (child_set,))
for child_pid in child_set:
os.kill(child_pid, signal.SIGKILL)
os.kill(pid, signal.SIGKILL)
......@@ -130,12 +135,13 @@ class ProcessManager(object):
self.p = None
self.result = None
self.max_timeout = kw.get("max_timeout") or MAX_TIMEOUT
self.timer_set = set()
def spawn(self, *args, **kw):
def timeoutExpired(p, log):
if p.poll() is None:
log('PROCESS TOO LONG OR DEAD, GOING TO BE TERMINATED')
killCommand(p.pid)
killCommand(p.pid, log)
if self.under_cancellation:
raise CancellationError("Test Result was cancelled")
......@@ -161,10 +167,12 @@ class ProcessManager(object):
stderr=subprocess.PIPE, env=env, **subprocess_kw)
self.process_pid_set.add(p.pid)
timer = threading.Timer(self.max_timeout, timeoutExpired, args=(p, log))
self.timer_set.add(timer)
timer.start()
stdout, stderr = subprocess_capture(p, log, log_prefix,
get_output=get_output)
timer.cancel()
self.timer_set.discard(timer)
result = dict(status_code=p.returncode, command=command,
stdout=stdout, stderr=stderr)
self.process_pid_set.discard(p.pid)
......@@ -184,9 +192,11 @@ class ProcessManager(object):
self.log('ProcessManager killPreviousRun, going to kill %r' % (self.process_pid_set,))
if cancellation:
self.under_cancellation = True
for timer in self.timer_set:
timer.cancel()
for pgpid in self.process_pid_set:
try:
killCommand(pgpid)
killCommand(pgpid, self.log)
except:
pass
try:
......
......@@ -27,6 +27,7 @@
import ConfigParser
import argparse
import logging
import logging.handlers
import os
import pkg_resources
......@@ -58,7 +59,9 @@ def main(*args):
logger.addHandler(logging.StreamHandler())
logger.info('Activated console output.')
if parsed_argument.logfile:
file_handler = logging.FileHandler(filename=parsed_argument.logfile)
file_handler = logging.handlers.RotatingFileHandler(
filename=parsed_argument.logfile,
maxBytes=20000000, backupCount=4)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
logger.info('Activated logfile %r output' % parsed_argument.logfile)
......@@ -87,6 +90,7 @@ def main(*args):
CONFIG['proxy_port'])
CONFIG['httpd_url'] = 'https://[%s]:%s' % (CONFIG['httpd_ip'],
CONFIG['httpd_port'])
CONFIG['system_temp_folder'] = "/tmp"
# generate vcs_repository_list
if 'bot_environment' in config.sections():
......
......@@ -45,6 +45,7 @@ from erp5.util import taskdistribution
DEFAULT_SLEEP_TIMEOUT = 120 # time in seconds to sleep
MAX_LOG_TIME = 15 # time in days we should keep logs that we can see through
# httd
MAX_TEMP_TIME = 5 # time in days we should keep temp files
supervisord_pid_file = None
PROFILE_PATH_KEY = 'profile_path'
......@@ -88,6 +89,7 @@ class NodeTestSuite(SlapOSInstance):
def __init__(self, reference):
super(NodeTestSuite, self).__init__()
self.reference = reference
self.file_handler = None
def edit(self, **kw):
super(NodeTestSuite, self).edit(**kw)
......@@ -131,29 +133,31 @@ class NodeTestSuite(SlapOSInstance):
return getattr(self, "suite_log", None)
def _initializeSuiteLog(self):
# remove previous handlers
logger = logging.getLogger('testsuite')
if self.file_handler is not None:
logger.removeHandler(self.file_handler)
# and replace it with new handler
logger_format = '%(asctime)s %(name)-13s: %(levelname)-8s %(message)s'
formatter = logging.Formatter(logger_format)
logging.basicConfig(level=logging.INFO, format=logger_format)
logger = logging.getLogger('testsuite')
file_handler = logging.FileHandler(filename=self.suite_log_path)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
self.file_handler = logging.FileHandler(filename=self.suite_log_path)
self.file_handler.setFormatter(formatter)
logger.addHandler(self.file_handler)
logger.info('Activated logfile %r output' % self.suite_log_path)
self.suite_log = logger.info
class TestNode(object):
def __init__(self, log, config, max_log_time=MAX_LOG_TIME):
def __init__(self, log, config, max_log_time=MAX_LOG_TIME,
max_temp_time=MAX_TEMP_TIME):
self.testnode_log = log
self.log = log
self.config = config or {}
self.process_manager = ProcessManager(log)
self.node_test_suite_dict = {}
# hack until slapos.cookbook is updated
if self.config.get('working_directory', '').endswith("slapos"):
self.config['working_directory'] = self.config[
'working_directory'][:-(len("slapos"))] + "testnode"
self.max_log_time = max_log_time
self.max_temp_time = max_temp_time
def checkOldTestSuite(self,test_suite_data):
config = self.config
......@@ -392,11 +396,33 @@ branch = %(branch)s
self.log("deleting log directory %r" % (folder_path,))
shutil.rmtree(folder_path)
def _cleanupTemporaryFiles(self):
"""
buildout seems letting files under /tmp. To avoid regular error of
missing disk space, remove old logs
"""
temp_directory = self.config["system_temp_folder"]
now = time.time()
user_id = os.geteuid()
for temp_folder in os.listdir(temp_directory):
folder_path = os.path.join(temp_directory, temp_folder)
if (temp_folder.startswith("tmp") or
temp_folder.startswith("buildout")):
stat = os.stat(folder_path)
if stat.st_uid == user_id and \
(now - stat.st_mtime)/86400 > self.max_temp_time:
self.log("deleting temp directory %r" % (folder_path,))
if os.path.isdir(folder_path):
shutil.rmtree(folder_path)
else:
os.remove(folder_path)
def cleanUp(self,test_result):
log = self.log
log('Testnode.cleanUp')
self.process_manager.killPreviousRun()
self._cleanupLog()
self._cleanupTemporaryFiles()
def run(self):
log = self.log
......@@ -494,3 +520,4 @@ branch = %(branch)s
# Exceptions are swallowed during cleanup phas
log("GENERAL EXCEPTION, QUITING")
self.cleanUp(test_result)
log("GENERAL EXCEPTION, QUITING, cleanup finished")
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment