Commit 0be36735 authored by Rafael Monnerat's avatar Rafael Monnerat

slapos_crm: Implement alarm to check Allocation Consistency

    Add a daily alarm to verify if all instance allocated have proper Allocation Supply configured and if they
    still respect the SLA.
parent 80da9249
Pipeline #38232 failed with stage
in 0 seconds
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="Alarm" module="erp5.portal_type"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>active_sense_method_id</string> </key>
<value> <string>Alarm_checkProjectAllocationConsistencyState</string> </value>
</item>
<item>
<key> <string>automatic_solve</string> </key>
<value> <int>0</int> </value>
</item>
<item>
<key> <string>description</string> </key>
<value> <string>Check per project and trigger activities to verify all compute nodes (monitored) per project.</string> </value>
</item>
<item>
<key> <string>enabled</string> </key>
<value> <int>1</int> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>slapos_crm_project_allocation_consistency</string> </value>
</item>
<item>
<key> <string>periodicity_hour</string> </key>
<value>
<tuple>
<int>2</int>
</tuple>
</value>
</item>
<item>
<key> <string>periodicity_hour_frequency</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>periodicity_minute</string> </key>
<value>
<tuple>
<int>2</int>
</tuple>
</value>
</item>
<item>
<key> <string>periodicity_minute_frequency</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>periodicity_month</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>periodicity_month_day</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>periodicity_start_date</string> </key>
<value>
<object>
<klass>
<global name="_reconstructor" module="copy_reg"/>
</klass>
<tuple>
<global name="DateTime" module="DateTime.DateTime"/>
<global name="object" module="__builtin__"/>
<none/>
</tuple>
<state>
<tuple>
<float>1288051200.0</float>
<string>GMT</string>
</tuple>
</state>
</object>
</value>
</item>
<item>
<key> <string>periodicity_week</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>portal_type</string> </key>
<value> <string>Alarm</string> </value>
</item>
<item>
<key> <string>sense_method_id</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Create tickets for Compute nodes that contains inconsistent instances</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
from Products.ZSQLCatalog.SQLCatalog import SimpleQuery, ComplexQuery
portal = context.getPortalObject()
monitor_enabled_category = portal.restrictedTraverse(
"portal_categories/monitor_scope/enabled", None)
portal = context.getPortalObject()
portal.portal_catalog.searchAndActivate(
validation_state='validated',
method_id='ComputeNode_checkProjectAllocationConsistencyState',
node=ComplexQuery(
SimpleQuery(portal_type='Compute Node'),
SimpleQuery(monitor_scope__uid=monitor_enabled_category.getUid()),
logical_operator='and'
),
group_by=['follow_up_uid'],
method_kw={'tag': tag},
activate_kw={'tag': tag, 'priority': 2}
)
context.activate(after_tag=tag).getId()
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="_reconstructor" module="copy_reg"/>
</klass>
<tuple>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
<global name="object" module="__builtin__"/>
<none/>
</tuple>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>tag, fixit, params</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>Alarm_checkProjectAllocationConsistencyState</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
from DateTime import DateTime
portal = context.getPortalObject()
if context.getMonitorScope() == "disabled":
return
project = context.getFollowUpValue()
if project.Project_isSupportRequestCreationClosed():
return
# Check if there is another consistency ticket already issued
# ....
compute_node_error_dict = {}
reference = context.getReference()
compute_node_title = context.getTitle()
# Use same dict from monitoring so we are consistent while writting
# Notification messages
error_dict = {
'should_notify': None,
'ticket_title': "%s has inconsistent allocated instances" % compute_node_title,
'ticket_description': None,
'notification_message_reference': None,
'compute_node_title': compute_node_title,
'compute_node_id': reference,
'last_contact': None,
'issue_document_reference': None,
'message': None,
'compute_node_error_dict': {}
}
# Since we would like a single ticket per compute node do all at once:
for compute_partition in context.contentValues(portal_type='Compute Partition'):
if compute_partition.getSlapState() == 'busy':
sla_error = compute_partition.ComputePartition_checkAllocatedSlaState()
allocation_supply_error = compute_partition.ComputePartition_checkAllocatedSupplyState()
compute_node_error_dict[compute_partition.getRelativeUrl()] = {
'sla_error': sla_error,
'allocation_supply_error': allocation_supply_error
}
if sla_error is not None or allocation_supply_error is not None:
error_dict['should_notify'] = True
if not error_dict['should_notify']:
return
## Write minimal message here, and replace the dict
error_dict['message'] = compute_node_error_dict
support_request = project.Project_createTicketWithCausality(
'Support Request',
error_dict['ticket_title'],
error_dict['ticket_description'],
causality=context.getRelativeUrl(),
destination_decision=project.getDestination()
)
if support_request is not None:
support_request.Ticket_createProjectEvent(
error_dict['ticket_title'], 'outgoing', 'Web Message',
portal.service_module.slapos_crm_information.getRelativeUrl(),
text_content=error_dict['message'],
content_type='text/plain',
notification_message=error_dict['notification_message_reference'],
#language=XXX,
substitution_method_parameter_dict=error_dict
)
return support_request
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="_reconstructor" module="copy_reg"/>
</klass>
<tuple>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
<global name="object" module="__builtin__"/>
<none/>
</tuple>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>ComputeNode_checkAllocationConsistencyState</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
portal = context.getPortalObject()
monitor_enabled_category = portal.restrictedTraverse(
"portal_categories/monitor_scope/enabled", None)
project = context.getFollowUpValue(portal_type='Project')
assert project is not None
if project.Project_isSupportRequestCreationClosed():
return
if monitor_enabled_category is not None:
project_uid = project.getUid()
portal.portal_catalog.searchAndActivate(
portal_type='Compute Node',
validation_state='validated',
monitor_scope__uid=monitor_enabled_category.getUid(),
follow_up__uid=project_uid,
method_id='ComputeNode_checkAllocationConsistencyState',
# This alarm bruteforce checking all documents,
# without changing them directly.
# Increase priority to not block other activities
activate_kw={'tag': tag, 'priority': 2}
)
project.activate(after_tag=tag).getId()
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="_reconstructor" module="copy_reg"/>
</klass>
<tuple>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
<global name="object" module="__builtin__"/>
<none/>
</tuple>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>tag</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>ComputeNode_checkProjectAllocationConsistencyState</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
compute_partition = context
compute_node = compute_partition.getParentValue()
assert compute_node.getPortalType() == 'Compute Node'
instance = compute_partition.getAggregateRelatedValue(portal_type='Software Instance')
assert instance is not None, 'Instance is None'
assert instance.getValidationState() != 'validated', 'Instance is invalid'
sla_error_list = []
sla_dict = instance.getSlaXmlAsDict()
if not sla_dict:
return sla_error_list
# Simple check of instance SLAs
if "computer_guid" in sla_dict:
computer_guid = sla_dict.pop("computer_guid")
if compute_node.getReference() != computer_guid:
sla_error_list.append('computer_guid do not match (%s != %s)' % (
computer_guid, compute_node.getReference()))
if "instance_guid" in sla_dict:
if instance.getPortalType() != 'Slave Instance':
sla_error_list.append('instance_guid is provided to a Software Instance')
else:
instance_guid = sla_dict.pop("instance_guid")
software_instance = compute_partition.getAggregateRelatedValue(portal_type='Software Instance')
if software_instance is None:
sla_error_list.append('instance_guid provided but no Software Instance was found')
if software_instance.getReference() != instance_guid:
sla_error_list.append('instance_guid do not match (%s != %s)' % (
instance_guid != software_instance.getReference()))
if 'network_guid' in sla_dict:
network_guid = sla_dict.pop('network_guid')
network_reference = compute_node.getSubordinationReference()
if network_reference != network_guid:
sla_error_list.append('network_guid do not match (%s != %s)' % (
network_guid, network_reference))
project_reference = compute_node.getFollowUpReference()
if 'project_guid' in sla_dict:
project_guid = sla_dict.pop("project_guid")
if project_reference != project_guid:
sla_error_list.append('project_guid do not match (%s != %s)' % (
project_guid, project_reference))
instance_project_reference = instance.getFollowUpReference()
if project_reference != instance_project_reference:
sla_error_list.append("Instance and Compute node project don't match (%s != %s)" % (
project_reference, instance_project_reference))
return sla_error_list
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="_reconstructor" module="copy_reg"/>
</klass>
<tuple>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
<global name="object" module="__builtin__"/>
<none/>
</tuple>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>ComputePartition_checkAllocatedSlaState</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
compute_partition = context
compute_node = compute_partition.getParentValue()
assert compute_node.getPortalType() == 'Compute Node'
instance = compute_partition.getAggregateRelatedValue(portal_type='Software Instance')
assert instance is not None, 'Instance is None'
assert instance.getValidationState() != 'validated', 'Instance is invalid'
project = instance.getFollowUpValue()
assert project is not None, 'Project is None'
instance_tree = instance.getSpecialiseValue(portal_type="Instance Tree")
instance_tree_context = instance_tree.asContext(
source_reference=instance.getSourceReference(),
url_string=instance.getUrlString()
)
software_product, software_release, software_type = instance_tree_context.InstanceTree_getSoftwareProduct()
if software_product is None:
return 'No Software Product matching'
person = instance_tree.getDestinationSectionValue()
allocation_cell_list = project.Project_getSoftwareProductPredicateList(
software_product=software_product,
software_product_type=software_type,
software_product_release=software_release,
destination_value=person,
node_value=compute_node,
predicate_portal_type='Allocation Supply Cell'
)
if not allocation_cell_list:
return 'No Allocation Supply'
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="_reconstructor" module="copy_reg"/>
</klass>
<tuple>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
<global name="object" module="__builtin__"/>
<none/>
</tuple>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>ComputePartition_checkAllocatedSupplyState</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment