Commit 17d8fe38 authored by Łukasz Nowak's avatar Łukasz Nowak

monitor: Adapt parameters to new requirements

check-maximum-elapsed-time and failure-amount are added to support
monitoring distant backends with some margin on the failures.
parents d1409fe5 a5dc903a
Pipeline #7995 failed with stage
...@@ -14,4 +14,4 @@ ...@@ -14,4 +14,4 @@
# not need these here). # not need these here).
[surykatka-requirements] [surykatka-requirements]
_update_hash_filename_ = requirements.txt _update_hash_filename_ = requirements.txt
md5sum = 2dfe4f8b8c5b6f5d3478b70e53c49201 md5sum = c4bd35bcc5c9c25efe1edff5dd022605
Click==7.0
certifi==2019.11.28 certifi==2019.11.28
chardet==3.0.4 chardet==3.0.4
Click==7.0
dnspython==1.16.0 dnspython==1.16.0
forcediphttpsadapter==1.0.1 forcediphttpsadapter==1.0.1
idna==2.8 idna==2.8
miniupnpc==2.0.2 miniupnpc==2.0.2
peewee==3.13.1 peewee==3.13.1
requests==2.22.0 requests==2.22.0
surykatka==0.2.0 surykatka==0.4.2
urllib3==1.25.7 urllib3==1.25.8
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
# not need these here). # not need these here).
[template] [template]
filename = instance.cfg filename = instance.cfg
md5sum = d778b6f436ae6864819eb2ff2d12a86f md5sum = dc9770bacea2c504b92ad2162e58d222
[template-monitor] [template-monitor]
_update_hash_filename_ = instance-monitor.cfg.jinja2 _update_hash_filename_ = instance-monitor.cfg.jinja2
...@@ -30,7 +30,7 @@ md5sum = 9e237dbdda59e788202f0da194a57d41 ...@@ -30,7 +30,7 @@ md5sum = 9e237dbdda59e788202f0da194a57d41
[template-monitor-edgebot] [template-monitor-edgebot]
_update_hash_filename_ = instance-monitor-edgebot.cfg.jinja2 _update_hash_filename_ = instance-monitor-edgebot.cfg.jinja2
md5sum = 8786e4245db0d27dfa4815222d970e52 md5sum = f28a329e830ed737d468abcb4e89e1a2
[network-bench-cfg] [network-bench-cfg]
filename = network_bench.cfg.in filename = network_bench.cfg.in
...@@ -42,4 +42,4 @@ md5sum = cad2402bbd21907cfed6bc5af8c5d3ab ...@@ -42,4 +42,4 @@ md5sum = cad2402bbd21907cfed6bc5af8c5d3ab
[template-surykatka-ini] [template-surykatka-ini]
_update_hash_filename_ = surykatka.ini.jinja2 _update_hash_filename_ = surykatka.ini.jinja2
md5sum = 40870921e05d93b5843ab34abd7e3902 md5sum = a2de719a5a65438c8c3ee5195442beb6
...@@ -22,7 +22,19 @@ ...@@ -22,7 +22,19 @@
"check-certificate-expiration-days": { "check-certificate-expiration-days": {
"default": "15", "default": "15",
"title": "Default certificate expiration days check", "title": "Default certificate expiration days check",
"description": "Default amount of days to consider certitifcate as being to-be-expired (default: 15).", "description": "Default amount of days to consider certificate as being to-be-expired (default: 15).",
"type": "string"
},
"check-maximum-elapsed-time": {
"default": "2",
"title": "Default maximum elapsed time for a site to reply (seconds)",
"description": "Default maximum elapsed time for a site to reply to be considered good (default: 2s).",
"type": "string"
},
"failure-amount": {
"default": "1",
"title": "Default amount of failures to consider URL as in bad state",
"description": "Default amount of failures to consider URL as in bad state, can be set to higher value for endpoints with accepted short outages (default: 1).",
"type": "string" "type": "string"
} }
} }
......
...@@ -21,7 +21,19 @@ ...@@ -21,7 +21,19 @@
"check-certificate-expiration-days": { "check-certificate-expiration-days": {
"default": "Master default", "default": "Master default",
"title": "Certificate expiration days check", "title": "Certificate expiration days check",
"description": "Default amount of days to consider certitifcate as being to-be-expired (default: comes from master partition).", "description": "Amount of days to consider certificate as being to-be-expired (default: comes from master partition).",
"type": "string"
},
"check-maximum-elapsed-time": {
"default": "Master default",
"title": "Maximum elapsed time for a site to reply (seconds)",
"description": "Maximum elapsed time for a site to reply to be considered good.(default: comes from master partition).",
"type": "string"
},
"failure-amount": {
"default": "Master default",
"title": "Amount of failures to consider URL as in bad state",
"description": "Amount of failures to consider URL as in bad state, can be set to higher value for endpoints with accepted short outages (default: comes from master partition).",
"type": "string" "type": "string"
} }
} }
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
{%- do CONFIGURATION.__setitem__(k[14:], v) %} {%- do CONFIGURATION.__setitem__(k[14:], v) %}
{%- endif %} {%- endif %}
{%- endfor %} {%- endfor %}
{%- set slave_instance_list = [] %} {%- set slave_instance_dict = {} %}
{%- set extra_slave_instance_list = slapparameter_dict.get('extra_slave_instance_list') %} {%- set extra_slave_instance_list = slapparameter_dict.get('extra_slave_instance_list') %}
{%- if extra_slave_instance_list %} {%- if extra_slave_instance_list %}
{#- Create slaves to process with setting up defaults #} {#- Create slaves to process with setting up defaults #}
...@@ -16,19 +16,31 @@ ...@@ -16,19 +16,31 @@
{%- if 'check-certificate-expiration-days' not in slave %} {%- if 'check-certificate-expiration-days' not in slave %}
{%- do slave.__setitem__('check-certificate-expiration-days', CONFIGURATION['check-certificate-expiration-days']) %} {%- do slave.__setitem__('check-certificate-expiration-days', CONFIGURATION['check-certificate-expiration-days']) %}
{%- endif %} {%- endif %}
{%- if 'failure-amount' not in slave %}
{%- do slave.__setitem__('failure-amount', CONFIGURATION['failure-amount']) %}
{%- endif %}
{%- if 'check-maximum-elapsed-time' not in slave %}
{%- do slave.__setitem__('check-maximum-elapsed-time', CONFIGURATION['check-maximum-elapsed-time']) %}
{%- endif %}
{%- if 'check-frontend-ip' not in slave %} {%- if 'check-frontend-ip' not in slave %}
{%- do slave.__setitem__('check-frontend-ip', CONFIGURATION['check-frontend-ip']) %} {%- do slave.__setitem__('check-frontend-ip', CONFIGURATION['check-frontend-ip']) %}
{%- endif %} {%- endif %}
{%- if 'url' in slave %} {%- if 'url' in slave %}
{%- do slave_instance_list.append(slave) %} {%- set class = slave['check-maximum-elapsed-time'] %}
{%- if class not in slave_instance_dict %}
{%- do slave_instance_dict.__setitem__(class, []) %}
{%- endif %}
{%- do slave_instance_dict[class].append(slave) %}
{%- endif %} {%- endif %}
{%- endfor %} {%- endfor %}
{%- endif %} {%- endif %}
{%- set part_list = [] %} {%- set part_list = [] %}
{%- for slave in sorted(slave_instance_list) %} {%- for class, slave_instance_list in slave_instance_dict.items() %}
{%- set part_id = 'http-query-' ~ slave['slave_reference'] ~ '-promise' %} {#- class is used to separate surykatka with different timeouts #}
{%- do part_list.append(part_id) %} {%- for slave in sorted(slave_instance_list) %}
{%- set safe_name = part_id.replace('_', '').replace('.', '-').replace(' ', '-') %} {%- set part_id = 'http-query-' ~ slave['slave_reference'] ~ '-promise' %}
{%- do part_list.append(part_id) %}
{%- set safe_name = part_id.replace('_', '').replace('.', '-').replace(' ', '-') %}
[{{part_id}}] [{{part_id}}]
<= monitor-promise-base <= monitor-promise-base
module = check_surykatka_json module = check_surykatka_json
...@@ -37,75 +49,81 @@ config-report = http_query ...@@ -37,75 +49,81 @@ config-report = http_query
config-url = {{ slave['url'] }} config-url = {{ slave['url'] }}
config-status-code = {{ slave['check-status-code'] }} config-status-code = {{ slave['check-status-code'] }}
config-certificate-expiration-days = {{ slave['check-certificate-expiration-days'] }} config-certificate-expiration-days = {{ slave['check-certificate-expiration-days'] }}
config-failure-amount = {{ slave['failure-amount'] }}
config-maximum-elapsed-time = {{ slave['check-maximum-elapsed-time'] }}
config-ip-list = {{ slave['check-frontend-ip'] }} config-ip-list = {{ slave['check-frontend-ip'] }}
config-json-file = ${surykatka-config:json} config-json-file = ${surykatka-config-{{ class }}:json}
{% endfor %} {%- endfor %}
[surykatka-bot-promise] [surykatka-bot-promise-{{ class }}]
<= monitor-promise-base <= monitor-promise-base
module = check_surykatka_json module = check_surykatka_json
name = surykatka-bot-promise.py name = surykatka-bot-promise-{{ class }}.py
config-report = bot_status config-report = bot_status
config-json-file = ${surykatka-config:json} config-json-file = ${surykatka-config-{{ class }}:json}
[buildout]
extends = {{ monitor_template_output }}
parts =
cron
cron-entry-surykatka-status
monitor-base
publish-connection-information
surykatka
surykatka-bot-promise
{% for part_id in sorted(part_list) %}
{{ part_id }}
{% endfor %}
eggs-directory = {{ eggs_directory }}
develop-eggs-directory = {{ develop_eggs_directory }}
offline = true
[surykatka-config] [surykatka-config-{{ class }}]
recipe = slapos.recipe.template:jinja2 recipe = slapos.recipe.template:jinja2
db = ${directory:srv}/surykatka.db db = ${directory:srv}/surykatka-{{ class }}.db
rendered = ${directory:etc}/surykatka.ini rendered = ${directory:etc}/surykatka-{{ class }}.ini
template = {{ template_surykatka_ini }} template = {{ template_surykatka_ini }}
slave_instance_list = {{ dumps(slave_instance_list) }} slave_instance_list = {{ dumps(slave_instance_list) }}
nameserver = {{ dumps(CONFIGURATION['nameserver']) }} nameserver = {{ dumps(CONFIGURATION['nameserver']) }}
json = ${directory:srv}/surykatka.json json = ${directory:srv}/surykatka-{{ class }}.json
{#- timeout is just a bit bigger than class time #}
timeout = {{ int(class) + 2 }}
context = context =
import json_module json import json_module json
key db :db key db :db
key nameserver :nameserver key nameserver :nameserver
key slave_instance_list :slave_instance_list key slave_instance_list :slave_instance_list
key timeout :timeout
[surykatka] [surykatka-{{ class }}]
recipe = slapos.cookbook:wrapper recipe = slapos.cookbook:wrapper
config = ${surykatka-config:rendered} config = ${surykatka-config-{{ class }}:rendered}
command-line = command-line =
{{ surykatka_binary }} --run crawl --reload --configuration ${:config} {{ surykatka_binary }} --run crawl --reload --configuration ${:config}
wrapper-path = ${monitor-directory:service}/${:_buildout_section_name_} wrapper-path = ${monitor-directory:service}/${:_buildout_section_name_}
hash-existing-files = ${buildout:directory}/software_release/buildout.cfg hash-existing-files = ${buildout:directory}/software_release/buildout.cfg
[surykatka-status-json] [surykatka-status-json-{{ class }}]
recipe = slapos.recipe.template:jinja2 recipe = slapos.recipe.template:jinja2
json = ${surykatka-config-{{ class }}:json}
template = inline:#!/bin/sh template = inline:#!/bin/sh
if {{ surykatka_binary }} --run status --configuration ${surykatka:config} --output json > ${surykatka-config:json}.tmp ; then if {{ surykatka_binary }} --run status --configuration ${surykatka-{{ class }}:config} --output json > ${:json}.tmp ; then
mv -f ${surykatka-config:json}.tmp ${surykatka-config:json} mv -f ${:json}.tmp ${:json}
else else
rm -f ${surykatka-config:json}.tmp rm -f ${:json}.tmp
fi fi
rendered = ${monitor-directory:bin}/${:_buildout_section_name_} rendered = ${monitor-directory:bin}/${:_buildout_section_name_}
mode = 0755 mode = 0755
[cron-entry-surykatka-status] [cron-entry-surykatka-status-{{ class }}]
recipe = slapos.cookbook:cron.d recipe = slapos.cookbook:cron.d
cron-entries = ${directory:etc}/cron.d cron-entries = ${directory:etc}/cron.d
name = surykatka-status name = surykatka-status-{{ class }}
frequency = */2 * * * * frequency = */2 * * * *
command = ${surykatka-status-json:rendered} command = ${surykatka-status-json-{{ class }}:rendered}
{%- do part_list.append('surykatka-' + class) %}
{%- do part_list.append('surykatka-bot-promise-' + class) %}
{%- do part_list.append('cron-entry-surykatka-status-' + class) %}
{%- endfor %}
[buildout]
extends = {{ monitor_template_output }}
parts =
cron
monitor-base
publish-connection-information
{% for part_id in sorted(part_list) %}
{{ part_id }}
{% endfor %}
eggs-directory = {{ eggs_directory }}
develop-eggs-directory = {{ develop_eggs_directory }}
offline = true
[publish-connection-information] [publish-connection-information]
recipe = slapos.cookbook:publish.serialised recipe = slapos.cookbook:publish.serialised
......
...@@ -44,6 +44,11 @@ context = import json_module json ...@@ -44,6 +44,11 @@ context = import json_module json
raw template_json_edgetest_test ${json-test-template:target} raw template_json_edgetest_test ${json-test-template:target}
mode = 0644 mode = 0644
[surykatka-wrapped]
recipe = slapos.cookbook:wrapper
wrapper-path = $${buildout:bin-directory}/surykatka-wrapped
command-line = ${surykatka:location}/bin/python3.7 ${surykatka:executable}
[instance-base-edgebot] [instance-base-edgebot]
recipe = slapos.recipe.template:jinja2 recipe = slapos.recipe.template:jinja2
template = ${template-monitor-edgebot:target} template = ${template-monitor-edgebot:target}
...@@ -59,7 +64,7 @@ context = import json_module json ...@@ -59,7 +64,7 @@ context = import json_module json
key slapparameter_dict slap-configuration:configuration key slapparameter_dict slap-configuration:configuration
key slap_software_type slap-configuration:slap-software-type key slap_software_type slap-configuration:slap-software-type
raw software_type edgebot raw software_type edgebot
key surykatka_binary :surykatka-binary key surykatka_binary surykatka-wrapped:wrapper-path
key template_surykatka_ini :template-surykatka-ini key template_surykatka_ini :template-surykatka-ini
raw buildout_bin ${buildout:bin-directory} raw buildout_bin ${buildout:bin-directory}
raw monitor_template_output ${monitor-template:output} raw monitor_template_output ${monitor-template:output}
...@@ -79,6 +84,8 @@ configuration.check-status-code = 200 ...@@ -79,6 +84,8 @@ configuration.check-status-code = 200
configuration.nameserver = configuration.nameserver =
configuration.check-frontend-ip = configuration.check-frontend-ip =
configuration.check-certificate-expiration-days = 15 configuration.check-certificate-expiration-days = 15
configuration.check-maximum-elapsed-time = 2
configuration.failure-amount = 2
# use monitor-base-port to have monitor listening on each instance # use monitor-base-port to have monitor listening on each instance
# on different port and also on different port than other services # on different port and also on different port than other services
# it makes it possible to instantiate it correctly on signle IP, for # it makes it possible to instantiate it correctly on signle IP, for
......
[SURYKATKA] [SURYKATKA]
INTERVAL = 120 INTERVAL = 120
TIMEOUT = {{ timeout }}
SQLITE = {{ db }} SQLITE = {{ db }}
{%- set nameserver_list = nameserver.split() %} {%- set nameserver_list = nameserver.split() %}
{%- if len(nameserver_list) > 0 %} {%- if len(nameserver_list) > 0 %}
......
This diff is collapsed.
...@@ -145,7 +145,7 @@ slapos.libnetworkcache = 0.20 ...@@ -145,7 +145,7 @@ slapos.libnetworkcache = 0.20
slapos.rebootstrap = 4.4 slapos.rebootstrap = 4.4
slapos.recipe.build = 0.42 slapos.recipe.build = 0.42
slapos.recipe.cmmi = 0.12 slapos.recipe.cmmi = 0.12
slapos.toolbox = 0.104 slapos.toolbox = 0.106
stevedore = 1.21.0 stevedore = 1.21.0
subprocess32 = 3.5.3 subprocess32 = 3.5.3
unicodecsv = 0.14.1 unicodecsv = 0.14.1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment