Commit 675cf888 authored by Łukasz Nowak's avatar Łukasz Nowak

Be more tolerant with supervisord backend.

Use socket/time timeouts in small loops to react on common usage cases.
Be sure that supervisord is functional enough in order to avoid running other
steps in case of deconfigured svc backend.
parent 8ad4ec2f
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
############################################################################## ##############################################################################
from supervisor import xmlrpc from supervisor import xmlrpc
import time
from utils import SlapPopen from utils import SlapPopen
import logging import logging
import os import os
...@@ -33,6 +34,8 @@ import sys ...@@ -33,6 +34,8 @@ import sys
import xmlrpclib import xmlrpclib
from optparse import OptionParser from optparse import OptionParser
import ConfigParser import ConfigParser
import socket as socketlib
import subprocess
def getSupervisorRPC(socket): def getSupervisorRPC(socket):
...@@ -47,19 +50,31 @@ def launchSupervisord(socket, configuration_file): ...@@ -47,19 +50,31 @@ def launchSupervisord(socket, configuration_file):
logger = logging.getLogger('SVCBackend') logger = logging.getLogger('SVCBackend')
supervisor = getSupervisorRPC(socket) supervisor = getSupervisorRPC(socket)
if os.path.exists(socket): if os.path.exists(socket):
trynum = 1
while trynum < 6:
try: try:
status = supervisor.getState() status = supervisor.getState()
except xmlrpclib.Fault, e:
if e.faultCode == 6 and e.faultString == 'SHUTDOWN_STATE':
logger.info('Supervisor in shutdown procedure, will check again later.')
trynum += 1
time.sleep(2 * trynum)
except Exception: except Exception:
# In case if there is problem with connection, assume that supervisord # In case if there is problem with connection, assume that supervisord
# is not running and try to run it # is not running and try to run it
pass break
else: else:
if status['statename'] == 'RUNNING' and status['statecode'] == 1: if status['statename'] == 'RUNNING' and status['statecode'] == 1:
logger.info('Supervisord already running.') logger.info('Supervisord already running.')
return return
elif status['statename'] == 'SHUTDOWN_STATE' and status['statecode'] == 6:
logger.info('Supervisor in shutdown procedure, will check again later.')
trynum += 1
time.sleep(2 * trynum)
else: else:
log_message = 'Unknown supervisord state %r. Will try to start.' % status log_message = 'Unknown supervisord state %r. Will try to start.' % status
logger.warning(log_message) logger.warning(log_message)
break
logger.info("Launching supervisord with clean environment.") logger.info("Launching supervisord with clean environment.")
# Extract python binary to prevent shebang size limit # Extract python binary to prevent shebang size limit
...@@ -70,17 +85,38 @@ def launchSupervisord(socket, configuration_file): ...@@ -70,17 +85,38 @@ def launchSupervisord(socket, configuration_file):
"'] ; supervisor.supervisord.main()") "'] ; supervisor.supervisord.main()")
supervisord_popen = SlapPopen(invocation_list, supervisord_popen = SlapPopen(invocation_list,
env={}, env={},
executable=sys.executable) executable=sys.executable, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
result = supervisord_popen.communicate()[0] result = supervisord_popen.communicate()[0]
if supervisord_popen.returncode == 0: if supervisord_popen.returncode == 0:
log_message = 'Supervisord command invoked with: %s' % result log_message = 'Supervisord command invoked with: %s' % result
logger.info(log_message) logger.info(log_message)
try:
default_timeout = socketlib.getdefaulttimeout()
current_timeout = 1
trynum = 1
while trynum < 6:
try:
socketlib.setdefaulttimeout(current_timeout)
status = supervisor.getState() status = supervisor.getState()
if status['statename'] == 'RUNNING' and status['statecode'] == 1: if status['statename'] == 'RUNNING' and status['statecode'] == 1:
logger.info('Supervisord started correctly.') return
logger.warning('Wrong status name %(statename)r and code '
'%(statecode)r, trying again' % status)
trynum += 1
except Exception:
current_timeout = 5 * trynum
trynum += 1
pass
else:
logger.info('Supervisord started correctly in try %s.' % trynum)
return
logger.warning('Issue while checking supervisord.')
finally:
socketlib.setdefaulttimeout(default_timeout)
else: else:
log_message = 'Supervisord unknown problem: %s' % result log_message = 'Supervisord unknown problem: %s' % result
logger.info(log_message) logger.warning(log_message)
def getOptionDict(*argument_tuple): def getOptionDict(*argument_tuple):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment