Commit fb1d531c authored by Thomas Gambier's avatar Thomas Gambier 🚴🏼 Committed by Administrator

Introduce new promise for backupserver software release.

This is the promise that will be used for the backupserver software release. It is working as is (with some needed changes in backupserver software release of course) but I may add more features.

I'd like to have a first review of my code.

/reviewed-on nexedi/slapos.toolbox!43
parent c4d2977f
...@@ -46,6 +46,8 @@ setup(name=name, ...@@ -46,6 +46,8 @@ setup(name=name,
'slapos.core', # as it provides library for slap 'slapos.core', # as it provides library for slap
'xml_marshaller', # needed to dump information 'xml_marshaller', # needed to dump information
'GitPython', #needed for git manipulation into slaprunner 'GitPython', #needed for git manipulation into slaprunner
'croniter', # needed to know cron schedule
'pytz', # needed to manipulate timezone
'passlib', 'passlib',
'netifaces', 'netifaces',
'erp5.util', 'erp5.util',
......
from zope import interface as zope_interface
from slapos.grid.promise import interface
from slapos.grid.promise.generic import GenericPromise
from slapos.grid.promise.generic import TestResult
import re
import sys
import pytz
from datetime import datetime
from croniter import croniter
from dateutil.parser import parse
class RunPromise(GenericPromise):
zope_interface.implements(interface.IPromise)
def __init__(self, config):
GenericPromise.__init__(self, config)
# check backup ran OK every 5 minutes
self.setPeriodicity(minute=5)
def sense(self):
"""
backupserver run rdiff-backup and log everything in a text file.
At the beginning of the backup, we have "backup running" printed in the text file.
At the end of the backup, we can have one of the following printed in the text file:
* "backup failed" -> backup failed
* "backup success" -> backup succeeded
A backup is valid only if we have the 2 conditions:
* we can grep "backup running" in the text file
* we can't grep "backup failed" in the text file
"""
status = self.getConfig('status_fullpath')
prev_cron = croniter(self.getConfig('cron_frequency'), datetime.now(pytz.utc)).get_prev(datetime) # date of the previous time cron launched
status_url = "{}/private/{}/{}".format(self.getConfig("monitor_url"), self.getConfig("status_dirbasename"), self.getConfig("status_name"))
statistic_url = "{}/private/{}/{}".format(self.getConfig("monitor_url"), self.getConfig("statistic_dirbasename"), self.getConfig("statistic_name"))
# First, parse the log file
backup_started = False
backup_ended = False
for line in open(status, 'r'):
m = re.match(r"(.*), (.*), (.*), backup (.*)$", line)
if m:
if m.group(4) == "running":
backup_started = True
backup_start = parse(m.group(1))
elif m.group(4) == "failed":
backup_ended = True
backup_failed = True
backup_end = parse(m.group(1))
elif m.group(4) == "success":
backup_ended = True
backup_failed = False
backup_end = parse(m.group(1))
# Then check result
if backup_ended and backup_failed:
self.logger.error("Backup FAILED at {} (see {} ).".format(backup_end, status_url))
elif not backup_started:
self.logger.error("Can't find backup start date. Is there a problem with status file? (see {} ).".format(status_url))
elif backup_start < prev_cron:
self.logger.error("Backup didn't start at correct time: it started at {} but should have started after {}. (see {} ).".format(backup_start, prev_cron, status_url))
elif not backup_ended:
self.logger.info("Backup currently running, started at {} (see {} ).".format(backup_start, status_url))
else:
self.logger.info("Backup OK, started at {} and lasted {} (see full stats at {} and status at {} ).".format(
backup_start,
backup_end - backup_start,
statistic_url,
status_url
))
def test(self):
"""
Test is never failing because we don't want to check when buildout deploy
the instance that the backup is working as it may not have started yet.
"""
return TestResult(problem=False, message="")
def anomaly(self):
"""
Anomaly returns a TestResult instead of AnomalyResult because we don't
want to call bang when there is a problem. Usually the problem won't be
in the deployment of this instance but rather in the instance we are
backuping. This will need a human intervention.
"""
return self._test(result_count=1, failure_amount=1)
##############################################################################
#
# Copyright (c) 2018 Vifib SARL and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
from slapos.test.promise.plugin import TestPromisePluginMixin
from slapos.grid.promise import PromiseError
import os
from datetime import datetime,timedelta
class TestBackupserverCheckBackup(TestPromisePluginMixin):
def setUp(self):
TestPromisePluginMixin.setUp(self)
log_folder = os.path.join(self.partition_dir, 'var/log')
os.makedirs(log_folder)
self.status_name = 'SOFTINSTTEST_status.txt'
self.status_fullpath = os.path.join(log_folder, self.status_name)
self.cron_frequency = '0 0 * * *'
self.promise_name = "backupserver_check_backup.py"
content = """from slapos.promise.plugin.backupserver_check_backup import RunPromise
extra_config_dict = {{
'cron_frequency': '{}',
'statistic_dirbasename': 'DUMMY_STATISTIC_DIRBASENAME',
'statistic_name': 'DUMMY_STATISTIC_NAME',
'status_dirbasename': 'DUMMY_STATUS_DIRBASENAME',
'status_fullpath': '{}',
'status_name': '{}',
'monitor_url': 'https://DUMMY_MONITOR_URL',
}}
""".format(self.cron_frequency, self.status_fullpath, self.status_name)
self.writePromise(self.promise_name, content)
def tearDown(self):
TestPromisePluginMixin.tearDown(self)
if os.path.exists(self.status_fullpath):
os.remove(self.status_fullpath)
def format_status(self, date, status):
"""" return a string formatted like backupserver status """
if date.tzinfo is not None:
raise "Date should be UTC"
return "{}+0000, DUMMY_STATISTIC_NAME, SOFTINSTTEST, backup {}\n".format(date.replace(microsecond=0).isoformat(), status)
def test_check_backup_ok(self):
now = datetime.utcnow()
with open(self.status_fullpath, 'w') as f:
f.write(self.format_status(now, "running"))
f.write(self.format_status(now, "success"))
self.configureLauncher(enable_anomaly=True)
self.launcher.run()
result = self.getPromiseResult(self.promise_name)
self.assertEquals(result['result']['failed'], False)
self.assertEquals(result['result']['message'][:9], "Backup OK")
def test_check_backup_fail(self):
now = datetime.utcnow()
with open(self.status_fullpath, 'w') as f:
f.write(self.format_status(now, "running"))
f.write(self.format_status(now, "failed"))
self.configureLauncher(enable_anomaly=True)
with self.assertRaises(PromiseError):
self.launcher.run()
result = self.getPromiseResult(self.promise_name)
self.assertEquals(result['result']['message'][:13], "Backup FAILED")
def test_check_backup_too_long(self):
now = datetime.utcnow()
long_ago = now - timedelta(days = 2)
with open(self.status_fullpath, 'w') as f:
f.write(self.format_status(long_ago, "running"))
self.configureLauncher(enable_anomaly=True)
with self.assertRaises(PromiseError):
self.launcher.run()
result = self.getPromiseResult(self.promise_name)
self.assertEquals(result['result']['failed'], True)
self.assertEquals(result['result']['message'][:35], "Backup didn't start at correct time")
if __name__ == '__main__':
unittest.main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment