Commit dadfec3c authored by Łukasz Nowak's avatar Łukasz Nowak

monitor: Implement edgetest regions

Request and publication is switched to fully serialised approach, so that
complex data structures like lists and objects can be safely transmitted.

The requests are backward compatible, so for current and simple usage
automatic region is setup.

Much more information are published to main and shared instances, like
lists of available and assigned regions. Regions can be added and removed,
which will be reflected automatically for the slaves.

Information send to the bot node are minimised to only needed ones.

check-frontend-ip-list can be configured according to the list, and each
element overrides previous one:

 * globally on cluster
 * default per region
 * globally per slave
 * specific per region on slave

Note: It's known that:

 {%-     set base_slave_dict = json_module.loads(slave.pop('_')) %} {#- XXX: Unsafe! #}

      is really unsafe, but for most of usage of monitoring slaves it's
      considered good enough.
parent f1dba924
......@@ -14,7 +14,7 @@
# not need these here).
[template]
filename = instance.cfg
md5sum = 6c75441c334051d3c7d50f52b14eac0e
md5sum = 3e579a4643cf2e4ea611bd37dfa5fc31
[template-monitor]
_update_hash_filename_ = instance-monitor.cfg.jinja2
......@@ -26,11 +26,11 @@ md5sum = 2eb5596544d9c341acf653d4f7ce2680
[template-monitor-edgetest]
_update_hash_filename_ = instance-monitor-edgetest.cfg.jinja2
md5sum = 7f257f581e1e0fdec0dcfedd5ea6ddfd
md5sum = 188b00a1afceb35e73e9ce3f83b18910
[template-monitor-edgebot]
_update_hash_filename_ = instance-monitor-edgebot.cfg.jinja2
md5sum = a5cbf6d4d00a6341e5c3c92b567d8d63
md5sum = d9844380e929a6990fa37ab72db81c56
[network-bench-cfg]
filename = network_bench.cfg.in
......@@ -42,4 +42,4 @@ md5sum = d3cfa1f6760e3fa64ccd64acf213bdfb
[template-surykatka-ini]
_update_hash_filename_ = surykatka.ini.jinja2
md5sum = 1cd568bb04c51de871277a86635c704a
md5sum = 609c6cca763b73a80fa05ee56475eb20
......@@ -2,16 +2,57 @@
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object",
"properties": {
"region-dict": {
"title": "Regions",
"description": "Defines regions of the cluster",
"patternProperties": {
".*": {
"properties": {
"state": {
"title": "State",
"description": "State of the node of the region. Can be used to destroy not needed regions.",
"type": "string",
"default": "started",
"enum": [
"started",
"stopped",
"destroyed"
]
},
"sla-computer_guid": {
"title": "GUID of the computer on which this region shall be deployed",
"description": "Unique identifier of the computer, like \"COMP-1234\". By default, let Master choose a computer.",
"type": "string",
"default": ""
},
"nameserver-list": {
"default": [],
"title": "Nameservers",
"description": "List of nameservers to use.",
"type": "array"
},
"check-frontend-ip-list": {
"default": [],
"title": "Default Frontend IPs to check",
"description": "List of default frontend IPs to check, if empty no constraint is used.",
"type": "array"
}
},
"type": "object"
}
},
"type": "object"
},
"nameserver-list": {
"default": [],
"title": "Nameservers",
"description": "List of name servers to use.",
"title": "Nameservers (backward compatibility)",
"description": "List of nameservers to use. Note: This is backward compatibility, use region-dict for full configuration control.",
"type": "array"
},
"check-frontend-ip-list": {
"default": [],
"title": "Default frontend IPs to check",
"description": "Default list of Frontend IPs to check, if empty no constraint is used.",
"title": "Default Frontend IPs to check (backward compatibility)",
"description": "List of default frontend IPs to check, if empty no constraint is used. Note: This is backward compatibility, use region-dict for full configuration control.",
"type": "array"
}
}
......
......@@ -7,6 +7,34 @@
"description": "URL to check, like https://example.com/",
"type": "string"
},
"region-dict": {
"title": "Applicable Regions",
"description": "Puts the check on the defined regions. No definition will result with presence in all regions.",
"patternProperties": {
".*": {
"properties": {
"state": {
"title": "State",
"description": "State of the check of the region. Used only to make it correctly visible in the SlapOS Master UI if no other parameters are defined.",
"type": "string",
"default": "present",
"enum": [
"present"
]
},
"check-frontend-ip-list": {
"default": [],
"title": "Frontend IPs to check",
"description": "List of default frontend IPs to check, if empty no constraint is used. Defaults to region configuration.",
"type": "array"
}
},
"type": "object"
}
},
"type": "object",
"default": {}
},
"check-status-code": {
"title": "HTTP Code Check",
"description": "Expected response HTTP Code.",
......@@ -35,17 +63,17 @@
"type": "object",
"default": {}
},
"check-frontend-ip-list": {
"title": "Frontend IPs to check",
"description": "List of Frontend IPs to check, if empty no constraint is used (default: comes from master partition).",
"type": "array"
},
"failure-amount": {
"title": "Failure Amount",
"description": "Amount of failures to consider URL as in bad state, can be set to higher value for endpoints with accepted short outages.",
"type": "number",
"default": 2,
"minimum": 1
},
"check-frontend-ip-list": {
"title": "Frontend IPs to check (backward compatibility)",
"description": "List of Frontend IPs to check, if empty no constraint is used. Defaults to region configuration. Note: Use region-dict's check-frontend-ip-list to ensure specific check on each region.",
"type": "array"
}
}
}
......@@ -9,30 +9,27 @@
{%- set extra_slave_instance_list = slapparameter_dict.get('extra_slave_instance_list') %}
{%- if extra_slave_instance_list %}
{#- Create slaves to process with setting up defaults #}
{%- for slave in json_module.loads(extra_slave_instance_list) | sort(attribute='slave_title') %}
{%- if '_' in slave %}
{%- do slave.update(json_module.loads(slave.pop('_'))) %}
{%- do slave.setdefault('check-status-code', 200) %}
{%- do slave.setdefault('check-http-header-dict', {}) %}
{%- do slave.setdefault('check-certificate-expiration-days', 15) %}
{%- do slave.setdefault('failure-amount', 2) %}
{%- do slave.setdefault('check-maximum-elapsed-time', 2) %}
{%- do slave.setdefault('check-frontend-ip-list', CONFIGURATION['check-frontend-ip-list']) %}
{%- if 'url' in slave %}
{%- set class = slave['check-maximum-elapsed-time'] %}
{%- if class not in slave_instance_dict %}
{%- do slave_instance_dict.__setitem__(class, []) %}
{%- endif %}
{%- do slave_instance_dict[class].append(slave) %}
{%- for slave in extra_slave_instance_list | sort(attribute='-slave-title') %}
{%- do slave.setdefault('check-status-code', 200) %}
{%- do slave.setdefault('check-http-header-dict', {}) %}
{%- do slave.setdefault('check-certificate-expiration-days', 15) %}
{%- do slave.setdefault('failure-amount', 2) %}
{%- do slave.setdefault('check-maximum-elapsed-time', 2) %}
{%- do slave.setdefault('check-frontend-ip-list', CONFIGURATION['check-frontend-ip-list']) %}
{%- if 'url' in slave %}
{%- set class = slave['check-maximum-elapsed-time'] %}
{%- if class not in slave_instance_dict %}
{%- do slave_instance_dict.__setitem__(class, []) %}
{%- endif %}
{%- do slave_instance_dict[class].append(slave) %}
{%- endif %}
{%- endfor %}
{%- endif %}
{%- set part_list = [] %}
{%- for class, slave_instance_list in slave_instance_dict.items() %}
{#- class is used to separate surykatka with different timeouts #}
{%- for slave in slave_instance_list | sort(attribute='slave_title') %}
{%- set part_id = 'http-query-' ~ slave['slave_reference'] ~ '-promise' %}
{%- for slave in slave_instance_list | sort(attribute='-slave-title') %}
{%- set part_id = 'http-query-' ~ slave['-slave-reference'] ~ '-promise' %}
{%- do part_list.append(part_id) %}
{%- set safe_name = part_id.replace('_', '').replace('.', '-').replace(' ', '-') %}
[{{part_id}}]
......
......@@ -17,33 +17,85 @@ extra-context =
section slave_information slap-configuration
{% set part_list = [] -%}
# Publish information for each slave
{%- set edgebot_software_type = 'edgebot' %}
{%- set edgebot_quantity = slapparameter_dict.pop('edgebot-quantity', '1') | int %}
{%- set edgebot_list = [] %}
{%- set edgebot_section_list = [] %}
{%- set slave_list_name = 'extra_slave_instance_list' %}
{%- set request_dict = {} %}
{%- set namebase = "edgebot" %}
{%- set authorized_slave_list = [] %}
{%- set monitor_base_url_dict = {} -%}
{%- if 'region-dict' not in slapparameter_dict %}
{#- Be nice and allow to work with default configuration #}
{%- do slapparameter_dict.__setitem__('region-dict', {
'1': {
'sla-computer_guid': slap_configuration['computer'],
'state': slap_configuration['instance-state'],
'nameserver-list': slapparameter_dict.get('nameserver-list', []),
'check-frontend-ip-list': slapparameter_dict.get('check-frontend-ip-list', []),
}
}) %}
{%- endif %}
{%- set active_region_list = [] %}
{%- for region_name in sorted(slapparameter_dict['region-dict']) %}
{%- set region_parameter_dict = slapparameter_dict['region-dict'][region_name] %}
{%- if region_parameter_dict.get('state', 'started') == 'started' %}
{%- do active_region_list.append(region_name) %}
{%- endif %}
{%- endfor %}
{%- set authorized_slave_dict = {} %}
{%- set publish_slave_dict_dict = {} %}
{%- for slave in slave_instance_list | sort(attribute='slave_title') %}
{%- do authorized_slave_list.append(slave) %}
{%- set slave_reference = slave.pop('slave_reference') %}
{%- set publish_dict = {'assigned-region-dict': {}} %}
{%- if '_' in slave %}
{%- set base_slave_dict = json_module.loads(slave.pop('_')) %} {#- XXX: Unsafe! #}
{%- do base_slave_dict.__setitem__('-slave-title', slave['slave_title']) %}
{%- do base_slave_dict.__setitem__('-slave-reference', slave_reference) %}
{%- set slave_region_dict = base_slave_dict.pop('region-dict', {}) %}
{%- if slave_region_dict == {} %}
{%- for region in active_region_list %}
{%- do slave_region_dict.__setitem__(region, {}) %}
{%- endfor %}
{%- endif %}
{%- for region in slave_region_dict %}
{%- if region in active_region_list %}
{%- set region_info = {
'nameserver-list': slapparameter_dict['region-dict'][region].get('nameserver-list') or slapparameter_dict.get('slapparameter_dict') or [],
'check-frontend-ip-list': slave_region_dict[region].get('check-frontend-ip-list') or base_slave_dict.get('check-frontend-ip-list') or slapparameter_dict['region-dict'][region].get('check-frontend-ip-list') or slapparameter_dict.get('check-frontend-ip-list') or [],
} %}
{%- do publish_dict['assigned-region-dict'].__setitem__(region, region_info) %}
{%- set slave_dict = base_slave_dict.copy() %}
{%- do slave_dict.update(region_info) %}
{%- if region not in authorized_slave_dict %}
{%- do authorized_slave_dict.__setitem__(region, [slave_dict]) %}
{%- else %}
{%- do authorized_slave_dict[region].append(slave_dict) %}
{%- endif %}
{%- endif %}
{%- endfor %}
{%- endif %}
{%- do publish_slave_dict_dict.__setitem__(slave_reference, publish_dict) %}
{%- endfor %}
{%- set monitor_base_port = int(slap_configuration['configuration.monitor-base-port']) %}
{%- for i in range(1, edgebot_quantity + 1) %}
{%- set edgebot_name = "%s-%s" % (namebase, i) %}
{%- set request_section_title = 'request-%s' % edgebot_name %}
{%- do edgebot_list.append(edgebot_name) %}
{%- do edgebot_section_list.append(request_section_title) %}
{%- set number = {'i': 1} %}
{%- for region_name in sorted(slapparameter_dict['region-dict']) %}
{%- set region_parameter_dict = slapparameter_dict['region-dict'][region_name] %}
{%- set edgebot_name = "%s-%s" % (namebase, region_name) %}
{%- set request_section_title = 'request-%s' % (hashlib_module.md5(edgebot_name.encode('utf-8')).hexdigest(),) %}
{%- do part_list.append(request_section_title) %}
{%- do request_dict.__setitem__(request_section_title,
{
'config': {'monitor-httpd-port': monitor_base_port + i},
'name': edgebot_name,
'sla': {},
'state': 'started',
}) %}
{#- Note: monitor-httpd-port will vary on regions being added and removed,
but this is accepted, as it's only internal trick #}
{%- do request_dict.__setitem__(
request_section_title,
{
'config': {
'monitor-httpd-port': monitor_base_port + number['i'],
'check-frontend-ip-list': region_parameter_dict.get('check-frontend-ip-list', []),
'nameserver-list': region_parameter_dict.get('nameserver-list', []),
'extra_slave_instance_list': authorized_slave_dict.get(region_name, [])
},
'name': edgebot_name,
'sla': {'computer_guid': region_parameter_dict['sla-computer_guid']},
'state': region_parameter_dict.get('state', 'started'),
}) %}
{%- do number.__setitem__('i', number['i'] + 1) %}
{%- endfor %}
[replicate]
......@@ -54,38 +106,36 @@ config-monitor-username = ${monitor-instance-parameter:username}
config-monitor-password = ${monitor-htpasswd:passwd}
software-url = ${slap-connection:software-release-url}
software-type = {{edgebot_software_type}}
return = monitor-base-url
{%- set monitor_base_url_dict = {} -%}
{% for section, edgebot_request in request_dict.items() %}
[{{section}}]
<= replicate
name = {{ edgebot_request.get('name') }}
{%- if edgebot_request.get('state') %}
state = {{ edgebot_request.get('state') }}
{%- endif%}
{%- set slave_configuration_dict = slapparameter_dict %}
{%- do slave_configuration_dict.update(edgebot_request.get('config')) %}
{%- do slave_configuration_dict.__setitem__(slave_list_name, json_module.dumps(authorized_slave_list)) %}
{%- for config_key, config_value in slave_configuration_dict.items() %}
name = {{ edgebot_request['name'] }}
state = {{ edgebot_request['state'] }}
{%- if edgebot_request['state'] != 'destroyed' %}
{%- do monitor_base_url_dict.__setitem__(section, '${' ~ section ~ ':connection-monitor-base-url}') %}
return = monitor-base-url
{%- endif %}
{%- set edgebot_configuration_dict = edgebot_request['config'] %}
{%- for config_key, config_value in edgebot_configuration_dict.items() %}
config-{{ config_key }} = {{ dumps(config_value) }}
{% endfor -%}
{%- if edgebot_request.get('sla') %}
{%- for parameter, value in edgebot_request.get('sla').items() %}
{%- for parameter, value in edgebot_request['sla'].items() %}
sla-{{ parameter }} = {{ value }}
{%- endfor %}
{%- else %}
# As no SLA was provided, by default it is requested on the same computer
sla-computer_guid = ${slap-connection:computer-id}
{% endif %}
{%- do monitor_base_url_dict.__setitem__(section, '${' ~ section ~ ':connection-monitor-base-url}') -%}
{%- endfor %}
{%- endfor %}
{%- set directory_list = [] -%}
{%- for slave_instance in slave_instance_list -%}
{%- set publish_section_title = 'publish-%s' % slave_instance.get('slave_reference') -%}
# Publish information for each slave
{%- for slave_reference, publish_dict in publish_slave_dict_dict.items() -%}
{%- set publish_section_title = 'publish-%s' % (slave_reference,) -%}
{%- do part_list.append(publish_section_title) %}
[{{ publish_section_title }}]
recipe = slapos.cookbook:publish
-slave-reference = {{ slave_instance.get('slave_reference') }}
recipe = slapos.cookbook:publish.serialised
available-region-list = {{ dumps(list(active_region_list)) }}
-slave-reference = {{ slave_reference }}
{%- for key, value in publish_dict.items() %}
{{ key }} = {{ dumps(value) }}
{%- endfor %}
{% endfor %}
[monitor-conf-parameters]
......@@ -104,4 +154,10 @@ parts +=
{% for part in part_list %}
{{ ' %s' % part }}
{%- endfor %}
[publish-connection-information]
recipe = slapos.cookbook:publish.serialised
active-region-list = {{ dumps(list(active_region_list)) }}
sla-computer_guid = {{ dumps(slap_configuration['computer']) }}
sla-instance_guid = {{ dumps(slap_configuration['instance-guid']) }}
{%- endif %} {#- if slap_software_type == software_type #}
......@@ -32,6 +32,7 @@ template = ${template-monitor-edgetest:target}
rendered = $${buildout:directory}/template-monitor-base-edgetest.cfg
extensions = jinja2.ext.do
context = import json_module json
import hashlib_module hashlib
key develop_eggs_directory buildout:develop-eggs-directory
key eggs_directory buildout:eggs-directory
key slapparameter_dict slap-configuration:configuration
......
......@@ -9,7 +9,7 @@ NAMESERVER =
{%- endfor %}
{% endif %}
URL =
{%- for slave in slave_instance_list | sort(attribute='slave_title') %}
{%- for slave in slave_instance_list | sort(attribute='-slave-title') %}
{%- if 'url' in slave %}
{{ slave['url'] }}
{%- endif -%}
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment