Commit 1dade5cd authored by Cédric Le Ninivin's avatar Cédric Le Ninivin

Added test and entry-point for slapos-watchdog

parent 16b2e8b8
No related merge requests found
......@@ -59,7 +59,7 @@ setup(name=name,
'slapproxy = slapos.proxy:main',
'bang = slapos.bang:main',
'slapos = slapos.entry:main',
'watchdog = slapos.grid.watchdog:main',
'slapos-watchdog = slapos.grid.watchdog:main',
]
},
test_suite="slapos.tests",
......
......@@ -32,6 +32,7 @@ import os
import shutil
import signal
import slapos.slap.slap
from slapos.grid.watchdog import Watchdog, getWatchdogID
import socket
import sys
import tempfile
......@@ -40,6 +41,33 @@ import unittest
import urlparse
import xml_marshaller
WATCHDOG_TEMPLATE = """#!%(python_path)s -S
import sys
sys.path=%(sys_path)s
import slapos.slap.slap
import slapos.grid.watchdog
def setBang():
def getBang():
def bang(self_partition,message):
report = ""
for key in self_partition.__dict__:
report += (key + ': ' + str(self_partition.__dict__[key]) + ' ')
if key == '_connection_helper':
for el in self_partition.__dict__[key].__dict__:
report += (' ' + el +': ' +
str(self_partition.__dict__[key].__dict__[el]) + ' ')
report += message
open('%(watchdog_banged)s','w').write(report)
return bang
slapos.slap.ComputerPartition.bang = getBang()
setBang()
slapos.grid.watchdog.main()
"""
WRAPPER_CONTENT = """#!/bin/sh
touch worked &&
mkdir -p etc/run &&
......@@ -48,6 +76,19 @@ echo "while :; do echo "Working\\nWorking\\n" ; sleep 0.1; done" >> etc/run/wrap
chmod 755 etc/run/wrapper
"""
DAEMON_CONTENT = """#!/bin/sh
mkdir -p etc/service &&
echo "#!/bin/sh" > etc/service/daemon &&
echo "touch launched
if [ -f ./crashed ]; then
while :; do echo "Working\\nWorking\\n" ; sleep 0.1; done
else
touch ./crashed; echo "Failing\\nFailing\\n"; sleep 1; return 111;
fi" >> etc/service/daemon &&
chmod 755 etc/service/daemon &&
touch worked
"""
class BasicMixin:
def assertSortedListEqual(self, list1, list2, msg=None):
self.assertListEqual(sorted(list1), sorted(list2), msg)
......@@ -241,6 +282,8 @@ class ComputerForTest:
if parsed_url.path == 'destroyedComputerPartition':
instance.state = 'destroyed'
return (200, {}, '')
if parsed_url.path == 'softwareInstanceBang':
return (200, {}, '')
if parsed_url.path == 'softwareInstanceError':
instance.error_log = '\n'.join([line for line \
in parsed_qs['error_log'][0].splitlines()
......@@ -569,6 +612,177 @@ chmod 755 etc/run/wrapper
self.assertEqual('stopped', instance.state)
class TestSlapgridCPWithMasterWatchdog(MasterMixin, unittest.TestCase):
def test_one_failing_daemon_in_service_will_bang_with_watchdog(self):
"""
Check that a failing service watched by watchdog trigger bang
1.Prepare computer and set a service named daemon in etc/service
(to be watched by watchdog). This daemon will fail.
2.Prepare file for supervisord to call watchdog
-Set sys.path
-Monkeypatch computer partition bang
3.Check damemon is launched
4.Wait for it to fail
5.Wait for file generated by monkeypacthed bang to appear
"""
computer = ComputerForTest(self.software_root,self.instance_root)
partition = computer.instance_list[0]
partition.requested_state = 'started'
partition.software.setBuildout(DAEMON_CONTENT)
# Prepare watchdog
watchdog_path = os.path.join(self._tempdir,'watchdog')
watchdog_banged = os.path.join(self._tempdir,'watchdog_banged')
open(watchdog_path,'w').write(
WATCHDOG_TEMPLATE % dict(python_path=sys.executable,
sys_path=sys.path,
watchdog_banged=watchdog_banged))
os.chmod(watchdog_path,0755)
self.grid.watchdog_path = watchdog_path
self.assertTrue(self.grid.processComputerPartitionList())
self.assertSortedListEqual(os.listdir(self.instance_root), ['0', 'etc',
'var'])
self.assertSortedListEqual(os.listdir(partition.partition_path),
['.0_daemon.log','worked', 'buildout.cfg', 'etc'])
tries = 10
daemon_log = os.path.join(partition.partition_path, '.0_daemon.log')
while tries > 0:
tries -= 1
if os.path.getsize(daemon_log) > 0:
break
time.sleep(0.2)
self.assertTrue('Failing' in open(daemon_log, 'r').read())
tries = 25
while tries > 0:
tries -= 1
if os.path.exists(watchdog_banged):
break
time.sleep(0.2)
self.assertTrue(os.path.exists(watchdog_banged))
self.assertTrue('daemon' in open(watchdog_banged,'r').read())
RUN_CONTENT = """#!/bin/sh
mkdir -p etc/run &&
echo "#!/bin/sh" > etc/run/daemon &&
echo "touch launched
touch ./crashed; echo "Failing\\nFailing\\n"; sleep 1; return 111;
" >> etc/run/daemon &&
chmod 755 etc/run/daemon &&
touch worked
"""
def test_one_failing_daemon_in_run_will_not_bang_with_watchdog(self):
"""
Check that a failing service watched by watchdog trigger bang
1.Prepare computer and set a service named daemon in etc/run
(not watched by watchdog). This daemon will fail.
2.Prepare file for supervisord to call watchdog
-Set sys.path
-Monkeypatch computer partition bang
3.Check damemon is launched
4.Wait for it to fail
5.Check that file generated by monkeypacthed bang do not appear
"""
computer = ComputerForTest(self.software_root,self.instance_root)
partition = computer.instance_list[0]
partition.requested_state = 'started'
partition.software.setBuildout(self.RUN_CONTENT)
# Prepare watchdog
watchdog_path = os.path.join(self._tempdir,'watchdog')
watchdog_banged = os.path.join(self._tempdir,'watchdog_banged')
open(watchdog_path,'w').write(
WATCHDOG_TEMPLATE % dict(python_path=sys.executable,
sys_path=sys.path,
watchdog_banged=watchdog_banged))
os.chmod(watchdog_path,0755)
self.grid.watchdog_path = watchdog_path
self.assertTrue(self.grid.processComputerPartitionList())
self.assertSortedListEqual(os.listdir(self.instance_root), ['0', 'etc',
'var'])
self.assertSortedListEqual(os.listdir(partition.partition_path),
['.0_daemon.log','worked', 'buildout.cfg', 'etc'])
tries = 10
daemon_log = os.path.join(partition.partition_path, '.0_daemon.log')
while tries > 0:
tries -= 1
if os.path.getsize(daemon_log) > 0:
break
time.sleep(0.2)
self.assertTrue('Failing' in open(daemon_log, 'r').read())
tries = 25
while tries > 0:
tries -= 1
if os.path.exists(watchdog_banged):
break
time.sleep(0.2)
self.assertFalse(os.path.exists(watchdog_banged))
def test_watched_by_watchdog_bang(self):
"""
Test that a process going to fatal or exited mode in supervisord
is banged if watched by watchdog
(ie: watchdog id in process name)
"""
computer = ComputerForTest(self.software_root,self.instance_root)
instance = computer.instance_list[0]
watchdog = Watchdog(dict(master_url=self.master_url,
computer_id=self.computer_id,
key_file=None,
cert_file=None))
for event in watchdog.process_state_events:
instance.sequence = []
headers = dict(eventname=event)
payload = "processname:%s groupname:%s from_state:RUNNING"\
% ('daemon'+getWatchdogID(),instance.name)
watchdog.handle_event(headers,payload)
self.assertEqual(instance.sequence,['softwareInstanceBang'])
def test_unwanted_events_will_not_bang(self):
"""
Test that a process going to a mode not watched by watchdog
in supervisord is not banged if watched by watchdog
"""
computer = ComputerForTest(self.software_root,self.instance_root)
instance = computer.instance_list[0]
watchdog = Watchdog(dict(master_url=self.master_url,
computer_id=self.computer_id,
key_file=None,
cert_file=None))
for event in ['EVENT', 'PROCESS_STATE', 'PROCESS_STATE_RUNNING',
'PROCESS_STATE_BACKOFF', 'PROCESS_STATE_STOPPED']:
computer.sequence = []
headers = dict(eventname=event)
payload = "processname:%s groupname:%s from_state:RUNNING"\
% ('daemon'+getWatchdogID(),instance.name)
watchdog.handle_event(headers,payload)
self.assertEqual(instance.sequence,[])
def test_not_watched_by_watchdog_do_not_bang(self):
"""
Test that a process going to fatal or exited mode in supervisord
is not banged if not watched by watchdog
(ie: no watchdog id in process name)
"""
computer = ComputerForTest(self.software_root,self.instance_root)
instance = computer.instance_list[0]
watchdog = Watchdog(dict(master_url=self.master_url,
computer_id=self.computer_id,
key_file=None,
cert_file=None))
for event in watchdog.process_state_events:
computer.sequence = []
headers = dict(eventname=event)
payload = "processname:%s groupname:%s from_state:RUNNING"\
% ('daemon',instance.name)
watchdog.handle_event(headers,payload)
self.assertEqual(computer.sequence,[])
class TestSlapgridCPPartitionProcessing (MasterMixin, unittest.TestCase):
def test_partition_timestamp(self):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment