monitor_bootstrap_status.py 2.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
from zope import interface as zope_interface
from slapos.grid.promise import interface
from slapos.grid.promise.generic import GenericPromise
import os
import time
import psutil
from slapos.runner.utils import tail

PROCESS_PID_FILE = ""
PROCESS_NAME = ""
STATUS_FILE = ""

class RunPromise(GenericPromise):

  zope_interface.implements(interface.IPromise)

  def __init__(self, config):
    GenericPromise.__init__(self, config)
    self.setPeriodicity(minute=2)

  def sense(self):
    if PROCESS_PID_FILE == "" or PROCESS_NAME == "" or STATUS_FILE == "":
      self.logger.info("")
      return

    if not os.path.exists(PROCESS_PID_FILE):
      self.logger.info("Bootstrap didn't run!")
      return

    with open(PROCESS_PID_FILE) as f:
      try:
        pid = int(f.read())
      except ValueError, e:
        raise ValueError("%r is empty or doesn't contain a valid pid number: %s" % (
          PROCESS_PID_FILE, str(e)))

    try:
      process = psutil.Process(pid)
      command_string = ' '.join(process.cmdline())
      if "monitor.bootstrap" in command_string and \
          self.getPartitionFolder() in command_string:
        for i in range(0, 15):
          if process.is_running():
            time.sleep(1)
          else:
            break
        else:
          self.logger.error("Monitor bootstrap is running for more than 15 seconds!")
          return
    except psutil.NoSuchProcess:
      # process exited
      pass

    if os.path.exists(STATUS_FILE) and not os.stat(STATUS_FILE).st_size:
      self.logger.info("Bootstrap OK")
      return

    message = "Monitor bootstrap exited with error."
    log_file = os.path.join(self.getPartitionFolder(), ".%s_%s.log" % (
      self.getConfig('partition-id'),
      PROCESS_NAME))
    if os.path.exists(log_file):
      with open(log_file) as f:
        message += "\n ---- Latest monitor-boostrap.log ----\n"
        message += tail(f, 4)

    self.logger.error(message)

  def test(self):
    return self._test(result_count=1, failure_amount=1)

  def anomaly(self):
    # bang if we have 3 error successively
    return self._anomaly(result_count=3, failure_amount=3)