Commit 2b5d1afa authored by Alain Takoudjou's avatar Alain Takoudjou

promise: move check_free_disk_space promise to promise.plugin

check_free_disk_space is now in promise.plugin, with some improvement.
Database connection timeout is now 10 seconds
If database is locked, promise will try again 10 times before start to raise error with "database is locked" message.
parent 4419c1ae
......@@ -72,7 +72,6 @@ setup(name=name,
'agent = slapos.agent.agent:main',
'apache-mpm-watchdog = slapos.promise.apache_mpm_watchdog:main',
'check-computer-memory = slapos.promise.check_computer_memory:main',
'check-free-disk = slapos.promise.check_free_disk:main',
'check-web-page-http-cache-hit = slapos.promise.check_web_page_http_cache_hit:main',
'check-feed-as-promise = slapos.checkfeedaspromise:main',
'check-error-on-apache-log = slapos.promise.check_error_on_apache_log:main',
......
#!/usr/bin/env python
"""
Check if free disk space is less than given threshold.
"""
import sys
import os
import sqlite3
import argparse
import datetime
import psutil
from slapos.collect.db import Database
def getFreeSpace(disk_partition, database, date, time):
database = Database(database, create=False, timeout=5)
try:
# fetch free disk space
database.connect()
where_query = "time between '%s:00' and '%s:30' and partition='%s'" % (time, time, disk_partition)
query_result = database.select("disk", date, "free", where=where_query)
result = zip(*query_result)
if not result or not result[0][0]:
print "No result from collector database: disk check skipped"
return 0
disk_free = result[0][0]
finally:
database.close()
return int(disk_free)
def getInodeUsage(path):
max_inode_usage = 97.99 # < 98% usage
stat = os.statvfs(path)
usage_output = ""
total_inode = stat.f_files
free_inode = stat.f_ffree
usage = round((float(total_inode - free_inode) / total_inode), 4) * 100
if usage > max_inode_usage:
return "Disk Inodes usages is really high: %s%%" % usage
elif os.path.exists('/tmp'):
# check if /tmp is mounted on another disk than path
tmp_stat = os.statvfs('/tmp')
if tmp_stat.f_blocks != stat.f_blocks:
tmp_usage = round((float(tmp_stat.f_files - tmp_stat.f_ffree) / tmp_stat.f_files), 4) * 100
if tmp_usage > max_inode_usage:
return "Disk Inodes usage is high: %s%%" % tmp_usage
return ""
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--collectordb", required=True)
parser.add_argument("--home_path", required=True)
parser.add_argument("--config", required=True)
args = parser.parse_args()
# find if a disk is mounted on the path
disk_partition = ""
path = os.path.join(args.home_path, "") +"extrafolder"
partitions = psutil.disk_partitions()
while path is not '/':
if not disk_partition:
path = os.path.dirname(path)
else:
break
for p in partitions:
if p.mountpoint is path:
disk_partition = p.device
break
if not disk_partition:
print "Couldn't find disk partition"
exit(2)
min_free_size = 1024*1024*1024*2 # 2G by default
if os.path.exists(args.config):
with open(args.config) as f:
min_size_str = f.read().strip()
if min_size_str == '0':
# disable check
print "Free disk space check is disabled\n set a number up to 0 to enable!"
exit(0)
if min_size_str.isdigit():
value = int(min_size_str)
if value >= 200:
# Minimum value is 200Mb, it's already low
min_free_size = int(min_size_str)*1024*1024
else:
with open(args.config, 'w') as f:
f.write(str(min_free_size/(1024*1024)))
# get last minute
now = datetime.datetime.now()
currentdate = now.strftime('%Y-%m-%d')
currenttime = now - datetime.timedelta(minutes=1)
currenttime = currenttime.time().strftime('%H:%M')
db_path = args.collectordb
if db_path.endswith("collector.db"):
db_path=db_path[:-len("collector.db")]
free_space = getFreeSpace(disk_partition, db_path, currentdate, currenttime)
if free_space and free_space > min_free_size:
inode_usage = getInodeUsage(args.home_path)
if inode_usage:
print inode_usage
exit(2)
print "Disk usage: OK"
exit(0)
free_space = round(free_space/(1024.0*1024*1024), 2)
min_space = round(min_free_size/(1024.0*1024*1024), 2)
print 'Free disk space low: remaining %s G (threshold: %s G)' % (
free_space, min_space)
print 'Please modify minimum value in your monitor interface.'
exit(1)
if __name__ == "__main__":
sys.exit(main())
from zope import interface as zope_interface
from slapos.grid.promise import interface
from slapos.grid.promise.generic import GenericPromise
import os
import sys
import sqlite3
import argparse
import datetime
import psutil
from slapos.collect.db import Database
class RunPromise(GenericPromise):
zope_interface.implements(interface.IPromise)
def __init__(self, config):
GenericPromise.__init__(self, config)
# check disk space at least every 3 minutes
self.setPeriodicity(minute=3)
def raiseOnDatabaseLocked(self, locked_message):
max_warn = 10
latest_result_list = self.getLastPromiseResultList(result_count=max_warn)
warning_count = 0
if len(latest_result_list) < max_warn:
return False
for result in latest_result_list[0]:
if result['status'] == "ERROR" and locked_message in result["message"]:
return True
for result_list in latest_result_list:
found = False
for result in result_list:
if result['status'] == "WARNING" and locked_message in result["message"]:
found = True
warning_count += 1
break
if not found:
break
if warning_count == max_warn:
# too many warning on database locked, now fail.
return True
self.logger.warn("collector database is locked by another process: %s %s" % (warning_count, len(latest_result_list)))
return False
def functio(self):
raise sqlite3.OperationalError("database is locked")
def getFreeSpace(self, disk_partition, database, date, time):
database = Database(database, create=False, timeout=10)
try:
# fetch free disk space
database.connect()
where_query = "time between '%s:00' and '%s:30' and partition='%s'" % (time, time, disk_partition)
query_result = database.select("disk", date, "free", where=where_query)
result = zip(*query_result)
if not result or not result[0][0]:
self.logger.info("No result from collector database: disk check skipped")
return 0
disk_free = result[0][0]
except sqlite3.OperationalError, e:
# if database is still locked after timeout expiration (another process is using it)
# we print warning message and try the promise at next run until max warn msg
locked_message = "database is locked"
if locked_message in str(e) and \
not self.raiseOnDatabaseLocked(locked_message):
return 0
raise
finally:
database.close()
pass
return int(disk_free)
def getInodeUsage(self, path):
max_inode_usage = 97.99 # < 98% usage
stat = os.statvfs(path)
usage_output = ""
total_inode = stat.f_files
free_inode = stat.f_ffree
usage = round((float(total_inode - free_inode) / total_inode), 4) * 100
if usage > max_inode_usage:
return "Disk Inodes usages is really high: %s%%" % usage
elif os.path.exists('/tmp'):
# check if /tmp is mounted on another disk than path
tmp_stat = os.statvfs('/tmp')
if tmp_stat.f_blocks != stat.f_blocks:
tmp_usage = round((float(tmp_stat.f_files - tmp_stat.f_ffree) / tmp_stat.f_files), 4) * 100
if tmp_usage > max_inode_usage:
return "Disk Inodes usage is high: %s%%" % tmp_usage
return ""
def sense(self):
# find if a disk is mounted on the path
disk_partition = ""
disk_threshold_file = self.getConfig('threshold-file')
db_path = self.getConfig('collectordb')
check_date = self.getConfig('test-check-date')
path = os.path.join(self.getPartitionFolder(), "") + "extrafolder"
partitions = psutil.disk_partitions()
while path is not '/':
if not disk_partition:
path = os.path.dirname(path)
else:
break
for p in partitions:
if p.mountpoint is path:
disk_partition = p.device
break
if not disk_partition:
self.logger.error("Couldn't find disk partition")
return
min_free_size = 1024*1024*1024*2 # 2G by default
if disk_threshold_file is not None:
if os.path.exists(disk_threshold_file):
with open(disk_threshold_file) as f:
min_size_str = f.read().strip()
if min_size_str == '0':
# disable check
self.logger.info("Free disk space check is disabled\n set a number up to 0 to enable!")
return
if min_size_str.isdigit():
value = int(min_size_str)
if value >= 200:
# Minimum value is 200Mb, it's already low
min_free_size = int(min_size_str)*1024*1024
else:
with open(disk_threshold_file, 'w') as f:
f.write(str(min_free_size/(1024*1024)))
if check_date:
# testing mode
currentdate = check_date
currenttime = self.getConfig('test-check-time', '09:17')
disk_partition = self.getConfig('test-disk-partition', '/dev/sda1')
else:
# get last minute
now = datetime.datetime.now()
currentdate = now.strftime('%Y-%m-%d')
currenttime = now - datetime.timedelta(minutes=1)
currenttime = currenttime.time().strftime('%H:%M')
if db_path.endswith("collector.db"):
db_path=db_path[:-len("collector.db")]
free_space = self.getFreeSpace(disk_partition, db_path, currentdate, currenttime)
if free_space == 0:
return
elif free_space > min_free_size:
inode_usage = self.getInodeUsage(self.getPartitionFolder())
if inode_usage:
self.logger.error(inode_usage)
else:
self.logger.info("Disk usage: OK")
return
free_space = round(free_space/(1024.0*1024*1024), 2)
min_space = round(min_free_size/(1024.0*1024*1024), 2)
self.logger.error('Free disk space low: remaining %s G (threshold: %s G)' % (
free_space, min_space))
def test(self):
return self._test(result_count=1, failure_amount=1)
def anomaly(self):
return self._test(result_count=3, failure_amount=3)
##############################################################################
#
# Copyright (c) 2017 Vifib SARL and Contributors. All Rights Reserved.
# Copyright (c) 2018 Vifib SARL and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
......@@ -25,26 +25,50 @@
#
##############################################################################
import unittest
from slapos.test.promise.plugin import TestPromisePluginMixin
from slapos.grid.promise import PromiseError
import os
import sqlite3
from slapos.test.promise import data
from slapos.promise.check_free_disk import getFreeSpace
from slapos.grid.promise import PromiseError
class TestFreeDisk(unittest.TestCase):
class TestCheckFreeDiskSpace(TestPromisePluginMixin):
def setUp(self):
self.base_path = "/".join(data.__file__.split("/")[:-1])
self.status = "ok"
TestPromisePluginMixin.setUp(self)
log_folder = os.path.join(self.partition_dir, 'var/log')
os.makedirs(log_folder)
self.db_file = '/tmp/collector.db'
self.base_path = "/".join(data.__file__.split("/")[:-1])
# populate db
self.conn = sqlite3.connect(self.db_file)
f = open(self.base_path+"/disktest.sql")
sql = f.read()
self.conn.executescript(sql)
self.conn.close()
self.conn.close()
self.promise_name = "check-free-disk-space.py"
self.th_file = os.path.join(self.partition_dir, 'min-disk-value')
with open(self.th_file, 'w') as f:
f.write('2048')
content = """from slapos.promise.plugin.check_free_disk_space import RunPromise
extra_config_dict = {
'collectordb': '%(collectordb)s',
'threshold-file': '%(th_file)s',
'test-check-date': '2017-10-02',
}
""" % {'collectordb': self.db_file, 'th_file': self.th_file}
self.writePromise(self.promise_name, content)
def tearDown(self):
TestPromisePluginMixin.tearDown(self)
if os.path.exists(self.db_file):
os.remove(self.db_file)
def test_check_disk(self):
self.assertEquals(288739385344,
......@@ -53,10 +77,22 @@ class TestFreeDisk(unittest.TestCase):
def test_check_free_disk_with_unavailable_dates(self):
self.assertEquals(0, getFreeSpace('/', '/tmp', '18:00', '2017-09-14'))
def tearDown(self):
if os.path.exists(self.db_file):
os.remove(self.db_file)
def test_disk_space_ok(self):
self.configureLauncher()
self.launcher.run()
result = self.getPromiseResult(self.promise_name)
self.assertEquals(result['result']['failed'], False)
self.assertEquals(result['result']['message'], "Disk usage: OK")
def test_disk_space_nok(self):
with open(self.th_file, 'w') as f:
f.write('298927494144')
self.configureLauncher()
with self.assertRaises(PromiseError):
self.launcher.run()
result = self.getPromiseResult(self.promise_name)
self.assertEquals(result['result']['failed'], True)
self.assertEquals(result['result']['message'], "Free disk space low: remaining 269.1 G (threshold: 291921381.0 G)")
if __name__ == '__main__':
unittest.main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment