Commit ff1a8a71 authored by Jérome Perrin's avatar Jérome Perrin

testnode.ProcessManager: reimplement killCommand to send SIGTERM first

Implement the traditional pattern of sending SIGTERM so that program can
free its resources and only send SIGKILL after a delay if program is
still running.
parent 70cf895f
...@@ -24,6 +24,8 @@ import tempfile ...@@ -24,6 +24,8 @@ import tempfile
import json import json
import time import time
import re import re
import psutil
@contextmanager @contextmanager
def dummySuiteLog(_): def dummySuiteLog(_):
...@@ -780,14 +782,50 @@ shared = true ...@@ -780,14 +782,50 @@ shared = true
RunnerClass.runTestSuite = original_runTestSuite RunnerClass.runTestSuite = original_runTestSuite
def test_12_spawn(self): def test_12_spawn(self):
def _checkCorrectStatus(expected_status,*args):
result = process_manager.spawn(*args)
self.assertEqual(result['status_code'], expected_status)
process_manager = ProcessManager(max_timeout=1) process_manager = ProcessManager(max_timeout=1)
_checkCorrectStatus(0, *['sleep','0'])
# We must make sure that if the command is too long that # process manager returns status_code
# it will be automatically killed self.assertEqual(0, process_manager.spawn('sleep', '0')['status_code'])
self.assertRaises(SubprocessError, process_manager.spawn, 'sleep','3')
# process manager terminates programs taking longer than max_timeout
self.assertRaises(SubprocessError, process_manager.spawn, 'sleep', '3')
# process manager terminates programs by sending them SIGTERM,
# so that they can perform cleanups.
with tempfile.NamedTemporaryFile(mode='w') as prog, tempfile.NamedTemporaryFile(delete=False) as f:
# this program will remove f if is terminated
prog.write("""
trap "rm {f.name}; exit 1" SIGTERM
for i in $(seq 600); do sleep .1; done
""".format(**locals()))
prog.flush()
self.assertRaises(
SubprocessError,
process_manager.spawn,
'/bin/bash',
prog.name)
# prog have removed this file in its SIGTERM handler
self.assertFalse(os.path.exists(f.name))
# if program are still running after SIGTERM process manager send SIGKILL
with tempfile.NamedTemporaryFile(mode='w') as prog:
# this program does not terminate on SIGTERM
prog.write("""
trap "echo not yet" SIGTERM
for i in $(seq 600); do sleep .1; done
""".format(**locals()))
prog.flush()
self.assertRaises(
SubprocessError,
process_manager.spawn,
'/bin/bash',
prog.name)
self.assertEqual(
[],
[p.info for p in psutil.process_iter(attrs=['pid', 'name', 'cmdline'])
if prog.name in p.info['cmdline']]
)
def test_13_SlaposControlerResetSoftware(self): def test_13_SlaposControlerResetSoftware(self):
test_node = self.getTestNode() test_node = self.getTestNode()
......
...@@ -102,32 +102,45 @@ def subprocess_capture(p, log_prefix, get_output=True): ...@@ -102,32 +102,45 @@ def subprocess_capture(p, log_prefix, get_output=True):
p.stderr and b''.join(stderr)) p.stderr and b''.join(stderr))
def killCommand(pid): def killCommand(pid):
"""terminate process with pid and all its child processes.
Inspired from psutil recipe
https://psutil.readthedocs.io/en/latest/index.html#terminate-my-children
""" """
To prevent processes from reacting to the KILL of other processes, def on_terminate(proc):
we STOP them all first, and we repeat until the list of children does not logger.debug("process {} terminated with exit code {}".format(proc, proc.returncode))
change anymore. Only then, we KILL them all.
""" def terminate(procs, timeout):
# send SIGTERM
for p in procs:
try:
p.terminate()
except psutil.NoSuchProcess:
pass
_, alive = psutil.wait_procs(procs, timeout=timeout, callback=on_terminate)
if alive:
# send SIGKILL
for p in alive:
logger.info("process {} survived SIGTERM; trying SIGKILL".format(p))
try:
p.kill()
except psutil.NoSuchProcess:
pass
_, alive = psutil.wait_procs(alive, timeout=timeout, callback=on_terminate)
if alive:
# give up
for p in alive:
logger.error("process {} survived SIGKILL; giving up".format(p))
try: try:
process = psutil.Process(pid) process = psutil.Process(pid)
process.suspend() except psutil.NoSuchProcess:
except psutil.Error as e: logger.info("process {} already terminated".format(pid))
return else:
process_list = [process] childrens = process.children(recursive=True)
new_list = process.children(recursive=True) terminate((process, ), 3)
while new_list: terminate(childrens, 3)
process_list += new_list
for child in new_list:
try:
child.suspend()
except psutil.Error as e:
logger.debug("killCommand/suspend: %s", e)
time.sleep(1)
new_list = set(process.children(recursive=True)).difference(process_list)
for process in process_list:
try:
process.kill()
except psutil.Error as e:
logger.debug("killCommand/kill: %s", e)
class ProcessManager(object): class ProcessManager(object):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment