Commit 1a2a7280 authored by Sebastien Robin's avatar Sebastien Robin

fixed various issues with testnode and make it even more robust

Here the list of changes:
- close all timers when quitting, this makes stopping an erp5tetsnode
  much faster
- remove hack on slapos/testnode after fix of slapos.cookbook
- remove old tmp files left by buildout (buildout has te bo fixed too)
- remove logging handlers where the are not needed any more
- fixed the kill command, it was not able to kill properly childs
parent e58ab518
...@@ -31,10 +31,12 @@ class ERP5TestNode(TestCase): ...@@ -31,10 +31,12 @@ class ERP5TestNode(TestCase):
self.remote_repository0 = os.path.join(self._temp_dir, 'rep0') self.remote_repository0 = os.path.join(self._temp_dir, 'rep0')
self.remote_repository1 = os.path.join(self._temp_dir, 'rep1') self.remote_repository1 = os.path.join(self._temp_dir, 'rep1')
self.remote_repository2 = os.path.join(self._temp_dir, 'rep2') self.remote_repository2 = os.path.join(self._temp_dir, 'rep2')
self.system_temp_folder = os.path.join(self._temp_dir,'tmp')
os.mkdir(self.working_directory) os.mkdir(self.working_directory)
os.mkdir(self.slapos_directory) os.mkdir(self.slapos_directory)
os.mkdir(self.test_suite_directory) os.mkdir(self.test_suite_directory)
os.mkdir(self.environment) os.mkdir(self.environment)
os.mkdir(self.system_temp_folder)
os.makedirs(self.log_directory) os.makedirs(self.log_directory)
os.close(os.open(self.log_file,os.O_CREAT)) os.close(os.open(self.log_file,os.O_CREAT))
os.mkdir(self.remote_repository0) os.mkdir(self.remote_repository0)
...@@ -60,6 +62,7 @@ class ERP5TestNode(TestCase): ...@@ -60,6 +62,7 @@ class ERP5TestNode(TestCase):
config["log_file"] = self.log_file config["log_file"] = self.log_file
config["test_suite_master_url"] = None config["test_suite_master_url"] = None
config["test_node_title"] = "Foo-Test-Node" config["test_node_title"] = "Foo-Test-Node"
config["system_temp_folder"] = self.system_temp_folder
return TestNode(self.log, config) return TestNode(self.log, config)
def getTestSuiteData(self, add_third_repository=False, reference="foo"): def getTestSuiteData(self, add_third_repository=False, reference="foo"):
...@@ -584,4 +587,26 @@ branch = foo ...@@ -584,4 +587,26 @@ branch = foo
# then we set keep time to 0, folder will be deleted # then we set keep time to 0, folder will be deleted
test_node.max_log_time = 0 test_node.max_log_time = 0
test_node._cleanupLog() test_node._cleanupLog()
check(set(['a_file'])) check(set(['a_file']))
\ No newline at end of file
def test_17_cleanupLogDirectory(self):
# Make sure that we are able to cleanup old temp folders
test_node = self.getTestNode()
temp_directory = self.system_temp_folder
def check(file_list):
directory_dir = os.listdir(temp_directory)
self.assertTrue(set(file_list).issubset(
set(directory_dir)),
"%r not contained by %r" % (file_list, directory_dir))
check([])
os.mkdir(os.path.join(temp_directory, 'buildoutA'))
os.mkdir(os.path.join(temp_directory, 'something'))
os.mkdir(os.path.join(temp_directory, 'tmpC'))
check(set(['buildoutA', 'something', 'tmpC']))
# default log file time is 15 days, so nothing is going to be deleted
test_node._cleanupTemporaryFiles()
check(set(['buildoutA', 'something', 'tmpC']))
# then we set keep time to 0, folder will be deleted
test_node.max_temp_time = 0
test_node._cleanupLog()
check(set(['something']))
\ No newline at end of file
...@@ -99,21 +99,26 @@ def subprocess_capture(p, log, log_prefix, get_output=True): ...@@ -99,21 +99,26 @@ def subprocess_capture(p, log, log_prefix, get_output=True):
return (p.stdout and ''.join(stdout), return (p.stdout and ''.join(stdout),
p.stderr and ''.join(stderr)) p.stderr and ''.join(stderr))
def killCommand(pid): def killCommand(pid, log):
""" """
To avoid letting orphaned childs, we stop the process and all it's To avoid letting orphaned childs, we stop the process and all it's
child (until childs does not change) and then we brutally kill child (until childs does not change) and then we brutally kill
everyone at the same time everyone at the same time
""" """
process = psutil.Process(pid) process = psutil.Process(pid)
child_set = set([x.pid for x in process.get_children(recursive=True)]) new_child_set = set([x.pid for x in process.get_children(recursive=True)])
new_child_set = None child_set = None
os.kill(pid, signal.SIGSTOP) os.kill(pid, signal.SIGSTOP)
while new_child_set != child_set: while new_child_set != child_set:
child_set = new_child_set
log("killCommand, new_child_set : %r, child_set: %r" % (
new_child_set, child_set))
for child_pid in child_set: for child_pid in child_set:
os.kill(child_pid, signal.SIGSTOP) os.kill(child_pid, signal.SIGSTOP)
time.sleep(1) time.sleep(1)
child_set = new_child_set
new_child_set = set([x.pid for x in process.get_children(recursive=True)]) new_child_set = set([x.pid for x in process.get_children(recursive=True)])
log("killCommand, finishing, child_set : %r" % (child_set,))
for child_pid in child_set: for child_pid in child_set:
os.kill(child_pid, signal.SIGKILL) os.kill(child_pid, signal.SIGKILL)
os.kill(pid, signal.SIGKILL) os.kill(pid, signal.SIGKILL)
...@@ -130,12 +135,13 @@ class ProcessManager(object): ...@@ -130,12 +135,13 @@ class ProcessManager(object):
self.p = None self.p = None
self.result = None self.result = None
self.max_timeout = kw.get("max_timeout") or MAX_TIMEOUT self.max_timeout = kw.get("max_timeout") or MAX_TIMEOUT
self.timer_set = set()
def spawn(self, *args, **kw): def spawn(self, *args, **kw):
def timeoutExpired(p, log): def timeoutExpired(p, log):
if p.poll() is None: if p.poll() is None:
log('PROCESS TOO LONG OR DEAD, GOING TO BE TERMINATED') log('PROCESS TOO LONG OR DEAD, GOING TO BE TERMINATED')
killCommand(p.pid) killCommand(p.pid, log)
if self.under_cancellation: if self.under_cancellation:
raise CancellationError("Test Result was cancelled") raise CancellationError("Test Result was cancelled")
...@@ -161,10 +167,12 @@ class ProcessManager(object): ...@@ -161,10 +167,12 @@ class ProcessManager(object):
stderr=subprocess.PIPE, env=env, **subprocess_kw) stderr=subprocess.PIPE, env=env, **subprocess_kw)
self.process_pid_set.add(p.pid) self.process_pid_set.add(p.pid)
timer = threading.Timer(self.max_timeout, timeoutExpired, args=(p, log)) timer = threading.Timer(self.max_timeout, timeoutExpired, args=(p, log))
self.timer_set.add(timer)
timer.start() timer.start()
stdout, stderr = subprocess_capture(p, log, log_prefix, stdout, stderr = subprocess_capture(p, log, log_prefix,
get_output=get_output) get_output=get_output)
timer.cancel() timer.cancel()
self.timer_set.discard(timer)
result = dict(status_code=p.returncode, command=command, result = dict(status_code=p.returncode, command=command,
stdout=stdout, stderr=stderr) stdout=stdout, stderr=stderr)
self.process_pid_set.discard(p.pid) self.process_pid_set.discard(p.pid)
...@@ -184,9 +192,11 @@ class ProcessManager(object): ...@@ -184,9 +192,11 @@ class ProcessManager(object):
self.log('ProcessManager killPreviousRun, going to kill %r' % (self.process_pid_set,)) self.log('ProcessManager killPreviousRun, going to kill %r' % (self.process_pid_set,))
if cancellation: if cancellation:
self.under_cancellation = True self.under_cancellation = True
for timer in self.timer_set:
timer.cancel()
for pgpid in self.process_pid_set: for pgpid in self.process_pid_set:
try: try:
killCommand(pgpid) killCommand(pgpid, self.log)
except: except:
pass pass
try: try:
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
import ConfigParser import ConfigParser
import argparse import argparse
import logging import logging
import logging.handlers
import os import os
import pkg_resources import pkg_resources
...@@ -58,7 +59,9 @@ def main(*args): ...@@ -58,7 +59,9 @@ def main(*args):
logger.addHandler(logging.StreamHandler()) logger.addHandler(logging.StreamHandler())
logger.info('Activated console output.') logger.info('Activated console output.')
if parsed_argument.logfile: if parsed_argument.logfile:
file_handler = logging.FileHandler(filename=parsed_argument.logfile) file_handler = logging.handlers.RotatingFileHandler(
filename=parsed_argument.logfile,
maxBytes=20000000, backupCount=4)
file_handler.setFormatter(formatter) file_handler.setFormatter(formatter)
logger.addHandler(file_handler) logger.addHandler(file_handler)
logger.info('Activated logfile %r output' % parsed_argument.logfile) logger.info('Activated logfile %r output' % parsed_argument.logfile)
...@@ -87,6 +90,7 @@ def main(*args): ...@@ -87,6 +90,7 @@ def main(*args):
CONFIG['proxy_port']) CONFIG['proxy_port'])
CONFIG['httpd_url'] = 'https://[%s]:%s' % (CONFIG['httpd_ip'], CONFIG['httpd_url'] = 'https://[%s]:%s' % (CONFIG['httpd_ip'],
CONFIG['httpd_port']) CONFIG['httpd_port'])
CONFIG['system_temp_folder'] = "/tmp"
# generate vcs_repository_list # generate vcs_repository_list
if 'bot_environment' in config.sections(): if 'bot_environment' in config.sections():
......
...@@ -45,6 +45,7 @@ from erp5.util import taskdistribution ...@@ -45,6 +45,7 @@ from erp5.util import taskdistribution
DEFAULT_SLEEP_TIMEOUT = 120 # time in seconds to sleep DEFAULT_SLEEP_TIMEOUT = 120 # time in seconds to sleep
MAX_LOG_TIME = 15 # time in days we should keep logs that we can see through MAX_LOG_TIME = 15 # time in days we should keep logs that we can see through
# httd # httd
MAX_TEMP_TIME = 5 # time in days we should keep temp files
supervisord_pid_file = None supervisord_pid_file = None
PROFILE_PATH_KEY = 'profile_path' PROFILE_PATH_KEY = 'profile_path'
...@@ -88,6 +89,7 @@ class NodeTestSuite(SlapOSInstance): ...@@ -88,6 +89,7 @@ class NodeTestSuite(SlapOSInstance):
def __init__(self, reference): def __init__(self, reference):
super(NodeTestSuite, self).__init__() super(NodeTestSuite, self).__init__()
self.reference = reference self.reference = reference
self.file_handler = None
def edit(self, **kw): def edit(self, **kw):
super(NodeTestSuite, self).edit(**kw) super(NodeTestSuite, self).edit(**kw)
...@@ -131,29 +133,31 @@ class NodeTestSuite(SlapOSInstance): ...@@ -131,29 +133,31 @@ class NodeTestSuite(SlapOSInstance):
return getattr(self, "suite_log", None) return getattr(self, "suite_log", None)
def _initializeSuiteLog(self): def _initializeSuiteLog(self):
# remove previous handlers
logger = logging.getLogger('testsuite')
if self.file_handler is not None:
logger.removeHandler(self.file_handler)
# and replace it with new handler
logger_format = '%(asctime)s %(name)-13s: %(levelname)-8s %(message)s' logger_format = '%(asctime)s %(name)-13s: %(levelname)-8s %(message)s'
formatter = logging.Formatter(logger_format) formatter = logging.Formatter(logger_format)
logging.basicConfig(level=logging.INFO, format=logger_format) logging.basicConfig(level=logging.INFO, format=logger_format)
logger = logging.getLogger('testsuite') self.file_handler = logging.FileHandler(filename=self.suite_log_path)
file_handler = logging.FileHandler(filename=self.suite_log_path) self.file_handler.setFormatter(formatter)
file_handler.setFormatter(formatter) logger.addHandler(self.file_handler)
logger.addHandler(file_handler)
logger.info('Activated logfile %r output' % self.suite_log_path) logger.info('Activated logfile %r output' % self.suite_log_path)
self.suite_log = logger.info self.suite_log = logger.info
class TestNode(object): class TestNode(object):
def __init__(self, log, config, max_log_time=MAX_LOG_TIME): def __init__(self, log, config, max_log_time=MAX_LOG_TIME,
max_temp_time=MAX_TEMP_TIME):
self.testnode_log = log self.testnode_log = log
self.log = log self.log = log
self.config = config or {} self.config = config or {}
self.process_manager = ProcessManager(log) self.process_manager = ProcessManager(log)
self.node_test_suite_dict = {} self.node_test_suite_dict = {}
# hack until slapos.cookbook is updated
if self.config.get('working_directory', '').endswith("slapos"):
self.config['working_directory'] = self.config[
'working_directory'][:-(len("slapos"))] + "testnode"
self.max_log_time = max_log_time self.max_log_time = max_log_time
self.max_temp_time = max_temp_time
def checkOldTestSuite(self,test_suite_data): def checkOldTestSuite(self,test_suite_data):
config = self.config config = self.config
...@@ -392,11 +396,33 @@ branch = %(branch)s ...@@ -392,11 +396,33 @@ branch = %(branch)s
self.log("deleting log directory %r" % (folder_path,)) self.log("deleting log directory %r" % (folder_path,))
shutil.rmtree(folder_path) shutil.rmtree(folder_path)
def _cleanupTemporaryFiles(self):
"""
buildout seems letting files under /tmp. To avoid regular error of
missing disk space, remove old logs
"""
temp_directory = self.config["system_temp_folder"]
now = time.time()
user_id = os.geteuid()
for temp_folder in os.listdir(temp_directory):
folder_path = os.path.join(temp_directory, temp_folder)
if (temp_folder.startswith("tmp") or
temp_folder.startswith("buildout")):
stat = os.stat(folder_path)
if stat.st_uid == user_id and \
(now - stat.st_mtime)/86400 > self.max_temp_time:
self.log("deleting temp directory %r" % (folder_path,))
if os.path.isdir(folder_path):
shutil.rmtree(folder_path)
else:
os.remove(folder_path)
def cleanUp(self,test_result): def cleanUp(self,test_result):
log = self.log log = self.log
log('Testnode.cleanUp') log('Testnode.cleanUp')
self.process_manager.killPreviousRun() self.process_manager.killPreviousRun()
self._cleanupLog() self._cleanupLog()
self._cleanupTemporaryFiles()
def run(self): def run(self):
log = self.log log = self.log
...@@ -494,3 +520,4 @@ branch = %(branch)s ...@@ -494,3 +520,4 @@ branch = %(branch)s
# Exceptions are swallowed during cleanup phas # Exceptions are swallowed during cleanup phas
log("GENERAL EXCEPTION, QUITING") log("GENERAL EXCEPTION, QUITING")
self.cleanUp(test_result) self.cleanUp(test_result)
log("GENERAL EXCEPTION, QUITING, cleanup finished")
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment