Commit c4e940f6 authored by Romain Courteaud's avatar Romain Courteaud

slapos_cloud: fixup allocation supply cell upgrade/consistency check

- Support Slave Instance allocated in the same tree
- allow creating allocation consistency ticket on Instance Node
- handle allocation consistency for Slave Instance without Instance Node
parent c27057c5
......@@ -2,6 +2,10 @@ from zExceptions import Unauthorized
if REQUEST is not None:
raise Unauthorized
# If there is no software product, skip directly
if software_product is None:
return (None, [])
allocation_cell_list = []
instance_tree = context
compute_node = None
......@@ -20,16 +24,18 @@ if len(root_instance_list) == 1:
# Search the instance node linked to this partition
soft_instance = partition.getAggregateRelatedValue(portal_type='Software Instance')
if soft_instance is None:
return (None, allocation_cell_list)
# No way to guess how the Slave Instance was allocated if the Software Instance is not there anymore
return (None, [])
instance_node = soft_instance.getSpecialiseRelatedValue(portal_type='Instance Node')
if instance_node is None:
return (None, allocation_cell_list)
compute_node = instance_node
if instance_node is not None:
compute_node = instance_node
# Else, the Slave Instance was allocated with 'slave_on_same_instance_tree_allocable '
elif (compute_node.getPortalType() != 'Remote Node'):
return (None, allocation_cell_list)
if software_product is None:
return (compute_node, allocation_cell_list)
raise NotImplementedError('Unhandled node portal type: %s for %s' % (
compute_node.getPortalType(),
compute_node.getRelativeUrl()
))
person = context.getDestinationSectionValue(checked_permission='Access contents information')
......@@ -52,6 +58,10 @@ if (compute_node is None) and (root_instance is not None):
elif (root_instance.getPortalType() == 'Software Instance'):
allocation_cell_list = [x for x, y in allocation_cell_node_list if ("Remote Node" in y) or ("Compute Node" in y)]
if (compute_node is not None) and (root_instance is not None) and (root_instance.getPortalType() == 'Slave Instance') and (compute_node.getPortalType() == 'Compute Node'):
# If a Slave Instance uses a Compute Node to allocate, it requires slave_on_same_instance_tree_allocable
allocation_cell_list = [x for x in allocation_cell_list if x.isSlaveOnSameInstanceTreeAllocable()]
# Remove duplicated allocation cells
# ie, multiple allocation cells matching the same release/type/node
software_release_uid_dict = {}
......
......@@ -11,90 +11,78 @@ project = context.getFollowUpValue()
if project.Project_isSupportRequestCreationClosed():
return
# Exceptionally, we pre-check if the computer has a ticket already
# Since calculation is a bit expensive to "just try".
monitor_service_uid = portal.service_module.slapos_crm_monitoring.getUid()
ticket_portal_type = "Support Request"
if portal.portal_catalog.getResultValue(
portal_type=ticket_portal_type,
resource__uid=monitor_service_uid,
simulation_state=["validated", "submitted", "suspended"],
causality__uid=context.getUid(),
) is not None:
return
reference = context.getReference()
compute_node_title = context.getTitle()
# Use same dict from monitoring so we are consistent while writting
# Notification messages
error_dict = {
'should_notify': None,
'ticket_title': "%s has missing allocation supplies." % compute_node_title,
'ticket_description': None,
'notification_message_reference': 'slapos-crm-compute_node_check_allocation_supply_state.notification',
'compute_node_title': compute_node_title,
'compute_node_id': reference,
'last_contact': None,
'issue_document_reference': None,
'message': None,
'compute_node_error_dict': {}
}
# Since we would like a single ticket per compute node do all at once:
all_node_error_dict = {}
# Since we would like a single ticket per node,
# we aggregate all detected errors
for compute_partition in context.contentValues(portal_type='Compute Partition'):
if compute_partition.getSlapState() == 'busy':
compute_partition_error_dict = compute_partition.ComputePartition_checkAllocationConsistencyState()
if compute_partition_error_dict:
error_dict['should_notify'] = True
error_dict['compute_node_error_dict'].update(compute_partition_error_dict)
if not error_dict['should_notify']:
return
for node_relative_url, node_release_dict in compute_partition_error_dict.items():
if node_relative_url not in all_node_error_dict:
all_node_error_dict[node_relative_url] = {}
for node_release_url, node_type_dict in node_release_dict.items():
if node_release_url not in all_node_error_dict[node_relative_url]:
all_node_error_dict[node_relative_url][node_release_url] = {}
for node_type, failing_instance in node_type_dict.items():
all_node_error_dict[node_relative_url][node_release_url][node_type] = failing_instance
message = """The following contains instances that has Software Releases/Types that are missing on this %s's Allocation Supply configuration:
ticket_list = []
# Generate a single ticket per non consistent node
ticket_portal_type = 'Support Request'
for non_consistent_node_relative_url in all_node_error_dict:
non_consistent_node = portal.restrictedTraverse(non_consistent_node_relative_url)
non_consistent_node_title = non_consistent_node.getTitle()
non_consistent_node_reference = non_consistent_node.getReference()
# Use same dict from monitoring so we are consistent while writting
# Notification messages
compute_node_error_dict = all_node_error_dict[non_consistent_node_relative_url]
error_dict = {
'should_notify': True,
'ticket_title': "%s has missing allocation supplies." % non_consistent_node_title,
'ticket_description': None,
'notification_message_reference': 'slapos-crm-compute_node_check_allocation_supply_state.notification',
'compute_node_title': non_consistent_node_title,
'compute_node_id': non_consistent_node_reference,
'last_contact': None,
'issue_document_reference': None,
'message': None,
'compute_node_error_dict': compute_node_error_dict
}
""" % context.getPortalType()
message = """The following contains instances that has Software Releases/Types that are missing on this %s's Allocation Supply configuration:
# It includes instance nodes lacking supplies
error_dict_len = len(error_dict['compute_node_error_dict'])
for compute_node in error_dict['compute_node_error_dict']:
compute_node_error_dict = error_dict['compute_node_error_dict'][compute_node]
compute_node_value = portal.restrictedTraverse(compute_node)
if error_dict_len > 1 or compute_node_value.getPortalType() == 'Instance Node':
# Highlight better where it comes from, it may include instance nodes
# lacking supplies.
message += """
%s %s (%s)
""" % (compute_node_value.getTitle(),
compute_node_value.getReference(),
compute_node_value.getPortalType())
""" % non_consistent_node.getPortalType()
for sofware_release_url in compute_node_error_dict:
message += """ * Software Release: %s
""" % sofware_release_url
for sofware_type, value_list in six.iteritems(compute_node_error_dict[sofware_release_url]):
for sofware_type, instance in six.iteritems(compute_node_error_dict[sofware_release_url]):
message += """ * Software Type: %s (ie: %s)
""" % (sofware_type, value_list[0].getTitle())
error_dict['message'] = message
support_request = project.Project_createTicketWithCausality(
ticket_portal_type,
error_dict['ticket_title'],
error_dict['ticket_description'],
causality=context.getRelativeUrl(),
destination_decision=project.getDestination()
)
""" % (sofware_type, instance.getTitle())
if support_request is not None:
support_request.Ticket_createProjectEvent(
error_dict['ticket_title'], 'outgoing', 'Web Message',
portal.service_module.slapos_crm_information.getRelativeUrl(),
text_content=error_dict['message'],
content_type='text/plain',
notification_message=error_dict['notification_message_reference'],
#language=XXX,
substitution_method_parameter_dict=error_dict
error_dict['message'] = message
support_request = project.Project_createTicketWithCausality(
ticket_portal_type,
error_dict['ticket_title'],
error_dict['ticket_description'],
causality=non_consistent_node.getRelativeUrl(),
destination_decision=project.getDestination()
)
return support_request
if support_request is not None:
support_request.Ticket_createProjectEvent(
error_dict['ticket_title'], 'outgoing', 'Web Message',
portal.service_module.slapos_crm_information.getRelativeUrl(),
text_content=error_dict['message'],
content_type='text/plain',
notification_message=error_dict['notification_message_reference'],
#language=XXX,
substitution_method_parameter_dict=error_dict
)
ticket_list.append(support_request)
if not ticket_list:
return
if len(ticket_list) == 1:
return ticket_list[0]
return ticket_list
......@@ -42,30 +42,34 @@ for instance in instance_list:
software_product, software_release, software_type = instance_tree_context.InstanceTree_getSoftwareProduct()
allocable_compute_node, allocation_cell_list = instance_tree_context.InstanceTree_getNodeAndAllocationSupplyCellList(
software_product=software_product,
software_release=software_release,
software_type=software_type)
if software_product is None:
allocable_compute_node, allocation_cell_list = compute_node, []
else:
allocable_compute_node, allocation_cell_list = instance_tree_context.InstanceTree_getNodeAndAllocationSupplyCellList(
software_product=software_product,
software_release=software_release,
software_type=software_type)
if allocable_compute_node is None:
# Such case is not expected
raise ValueError('No allocable_compute_node found for %s' % instance_tree_context.getRelativeUrl())
if not allocation_cell_list:
# Sampling of the structure
# error_dict = {
# compute_node or instance_node or remote_node : {
# software_release_url: {
# software_type: (sample_instance, compute_node)
# software_type: sample_instance
# }
# }
# }
if allocable_compute_node is None:
value = (instance, compute_node)
else:
value = (instance, allocable_compute_node)
compute_node_url = value[1].getRelativeUrl()
if compute_node_url not in error_dict:
error_dict[compute_node_url] = {}
if instance_software_release_url not in error_dict:
error_dict[compute_node_url][instance_software_release_url] = {}
if instance_software_type not in error_dict[compute_node_url][instance_software_release_url]:
error_dict[compute_node_url][instance.getUrlString()][instance_software_type] = value
# value = (instance, allocable_compute_node)
allocable_compute_node_url = allocable_compute_node.getRelativeUrl()
if allocable_compute_node_url not in error_dict:
error_dict[allocable_compute_node_url] = {}
if instance_software_release_url not in error_dict[allocable_compute_node_url]:
error_dict[allocable_compute_node_url][instance_software_release_url] = {}
if instance_software_type not in error_dict[allocable_compute_node_url][instance_software_release_url]:
error_dict[allocable_compute_node_url][instance_software_release_url][instance_software_type] = instance
return error_dict
......@@ -481,7 +481,10 @@ class TestSlapOSPDMCreateUpgradeDecisionSkins(SlapOSTestCaseMixin):
is_slave_on_same_instance_tree_allocable=True, disable_alarm=True)
self.tic()
self.assertEqual(None, instance_tree.InstanceTree_createUpgradeDecision())
self.checkCreatedUpgradeDecision(
instance_tree.InstanceTree_createUpgradeDecision(),
instance_tree, software_product, new_release_variation, type_variation
)
##########################################################################
# Allocated on Remote Node
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment