Commit a403cff3 authored by Alain Takoudjou's avatar Alain Takoudjou Committed by Alain Takoudjou

Introduce new garbage collector for instances ignored by buildout

Buildout cannot request destroy sub instances when it is removed from
instance parameters by the user. Ex: request a cluster of KVM, with
kvm1 and kvm2 instances, edit parameters and remove kvm2 in the list.
Buildout will just ignore that instance and it will be removed only when
the hosting subscription will be destroyed.

                 -- kvm1
root instance --|
                 -X- kvm2

So when processing a partition, we send to master the list of requested sub
instance(s). If the previous list was ['kvm1', 'kvm2'], then after remove kvm2
the list will be ['kvm1'] (for the root instance). Commit this list to the
master will unlink between root instance and kvm2 (predecessor categorie).

A new alarm will search for unlinked instances: Instance which has no parent,
and destroy them (also destroy the sub tree).
parent 4675c289
......@@ -1681,6 +1681,123 @@ class TestSlapOSSlapToolInstanceAccess(TestSlapOSSlapToolMixin):
if os.path.exists(self.instance_request_simulator):
os.unlink(self.instance_request_simulator)
def test_updateInstancePredecessorList(self):
self._makeComplexComputer()
partition_id = self.start_requested_software_instance.getAggregateValue(
portal_type='Computer Partition').getReference()
self.login(self.start_requested_software_instance.getReference())
# Atach two software instances
instance_kw = dict(
software_release='http://a.release',
software_type='type',
instance_xml=self.generateSafeXml(),
sla_xml=self.generateSafeXml(),
shared=False,
software_title='Instance0',
state='started'
)
self.start_requested_software_instance.requestInstance(**instance_kw)
instance_kw['software_title'] = 'Instance1'
self.start_requested_software_instance.requestInstance(**instance_kw)
self.tic()
self.assertEqual(len(self.start_requested_software_instance.getPredecessorList()), 2)
self.assertSameSet(['Instance0', 'Instance1'],
self.start_requested_software_instance.getPredecessorTitleList())
# Update with no changes
instance_list_xml = """
<marshal>
<list id="i2"><string>Instance0</string><string>Instance1</string></list>
</marshal>"""
self.portal_slap.updateComputerPartitionRelatedInstanceList(
computer_id=self.computer_id,
computer_partition_id=partition_id,
instance_reference_xml=instance_list_xml)
self.tic()
self.assertSameSet(['Instance0', 'Instance1'],
self.start_requested_software_instance.getPredecessorTitleList())
# Update Instance0 was not requested
instance_list_xml = """
<marshal>
<list id="i2"><string>Instance1</string></list>
</marshal>"""
self.portal_slap.updateComputerPartitionRelatedInstanceList(
computer_id=self.computer_id,
computer_partition_id=partition_id,
instance_reference_xml=instance_list_xml)
self.tic()
self.assertSameSet(['Instance1'],
self.start_requested_software_instance.getPredecessorTitleList())
def test_updateInstancePredecessorList_one_child(self):
self._makeComplexComputer()
partition_id = self.start_requested_software_instance.getAggregateValue(
portal_type='Computer Partition').getReference()
self.login(self.start_requested_software_instance.getReference())
# Atach one software instance
instance_kw = dict(
software_release='http://a.release',
software_type='type',
instance_xml=self.generateSafeXml(),
sla_xml=self.generateSafeXml(),
shared=False,
software_title='Instance0',
state='started'
)
self.start_requested_software_instance.requestInstance(**instance_kw)
self.tic()
self.assertEqual(len(self.start_requested_software_instance.getPredecessorList()), 1)
self.assertSameSet(['Instance0'],
self.start_requested_software_instance.getPredecessorTitleList())
instance_list_xml = '<marshal><list id="i2" /></marshal>'
self.portal_slap.updateComputerPartitionRelatedInstanceList(
computer_id=self.computer_id,
computer_partition_id=partition_id,
instance_reference_xml=instance_list_xml)
self.tic()
self.assertEqual([],
self.start_requested_software_instance.getPredecessorTitleList())
def test_updateInstancePredecessorList_no_child(self):
self._makeComplexComputer()
partition_id = self.start_requested_software_instance.getAggregateValue(
portal_type='Computer Partition').getReference()
self.login(self.start_requested_software_instance.getReference())
self.assertEqual([],
self.start_requested_software_instance.getPredecessorTitleList())
instance_list_xml = '<marshal><list id="i2" /></marshal>'
self.portal_slap.updateComputerPartitionRelatedInstanceList(
computer_id=self.computer_id,
computer_partition_id=partition_id,
instance_reference_xml=instance_list_xml)
self.tic()
self.assertEqual([],
self.start_requested_software_instance.getPredecessorTitleList())
# Try with something that doesn't exist
instance_list_xml = """
<marshal>
<list id="i2"><string>instance0</string></list>
</marshal>"""
self.portal_slap.updateComputerPartitionRelatedInstanceList(
computer_id=self.computer_id,
computer_partition_id=partition_id,
instance_reference_xml=instance_list_xml)
self.tic()
self.assertEqual([],
self.start_requested_software_instance.getPredecessorTitleList())
def test_availableComputerPartition(self):
self._makeComplexComputer()
partition_id = self.start_requested_software_instance.getAggregateValue(
......
......@@ -518,6 +518,18 @@ class SlapTool(BaseTool):
connection_xml,
slave_reference)
security.declareProtected(Permissions.AccessContentsInformation,
'updateComputerPartitionRelatedInstanceList')
def updateComputerPartitionRelatedInstanceList(self, computer_id,
computer_partition_id,
instance_reference_xml):
"""
Update Software Instance predecessor list
"""
return self._updateComputerPartitionRelatedInstanceList(computer_id,
computer_partition_id,
instance_reference_xml)
security.declareProtected(Permissions.AccessContentsInformation,
'supplySupply')
def supplySupply(self, url, computer_id, state='available'):
......@@ -1365,6 +1377,44 @@ class SlapTool(BaseTool):
software_instance._instance_guid = instance_guid
return xml_marshaller.xml_marshaller.dumps(software_instance)
@UnrestrictedMethod
def _updateComputerPartitionRelatedInstanceList(self, computer_id,
computer_partition_id, instance_reference_xml):
"""
Update Software Instance predecessor list to match the given list. If one
instance was not requested by this computer partition, it should be removed
in the predecessor_list of this instance.
Once the link is removed, this instance will be trashed by Garbage Collect!
instance_reference_xml contain list of title of sub-instances requested by
this instance.
"""
software_instance_document = self.\
_getSoftwareInstanceForComputerPartition(computer_id,
computer_partition_id)
cache_reference = '%s-PREDLIST' % software_instance_document.getReference()
if self._getLastData(cache_reference) != instance_reference_xml:
instance_reference_list = xml_marshaller.xml_marshaller.loads(
instance_reference_xml)
current_predecessor_list = software_instance_document.getPredecessorValueList(
portal_type=['Software Instance', 'Slave Instance'])
current_predecessor_title_list = [i.getTitle() for i in
current_predecessor_list]
# If there are items to remove
if list(set(current_predecessor_title_list).difference(instance_reference_list)) != []:
predecessor_list = [instance.getRelativeUrl() for instance in
current_predecessor_list if instance.getTitle()
in instance_reference_list]
LOG('SlapTool', INFO, '%s, %s: Updating predecessor list to %s' % (
computer_id, computer_partition_id, predecessor_list), error=False)
software_instance_document.edit(predecessor_list=predecessor_list,
comment='predecessor_list edited to unlink non commited instances')
self._storeLastData(cache_reference, instance_reference_xml)
####################################################
# Internals methods
####################################################
......
......@@ -50,6 +50,7 @@ from lxml import etree
from slapos.slap.slap import NotFoundError
from slapos.slap.slap import ServerError
from slapos.slap.slap import COMPUTER_PARTITION_REQUEST_LIST_TEMPLATE_FILENAME
from slapos.util import mkdir_p, chownDirectory, string_to_boolean
from slapos.grid.exception import BuildoutFailedError
from slapos.grid.SlapObject import Software, Partition
......@@ -666,6 +667,19 @@ stderr_logfile_backups=1
if not promise_present:
self.logger.info("No promise.")
def _endInstallationTransaction(self, computer_partition):
partition_id = computer_partition.getId()
transaction_file_name = COMPUTER_PARTITION_REQUEST_LIST_TEMPLATE_FILENAME % partition_id
transaction_file_path = os.path.join(self.instance_root,
partition_id,
transaction_file_name)
if os.path.exists(transaction_file_path):
with open(transaction_file_path, 'r') as tf:
computer_partition.setComputerPartitionRelatedInstanceList(
[reference for reference in tf.read().split('\n') if reference]
)
def _addFirewallRule(self, rule_command):
"""
"""
......@@ -904,6 +918,14 @@ stderr_logfile_backups=1
self.logger.debug('Check if %s requires processing...' % computer_partition_id)
instance_path = os.path.join(self.instance_root, computer_partition_id)
os.environ['SLAPGRID_INSTANCE_ROOT'] = self.instance_root
# Check if transaction file of this partition exists, if the file was created,
# remove it so it will be generate with this new transaction
transaction_file_name = COMPUTER_PARTITION_REQUEST_LIST_TEMPLATE_FILENAME % computer_partition_id
transaction_file_path = os.path.join(instance_path, transaction_file_name)
if os.path.exists(transaction_file_path):
os.unlink(transaction_file_path)
# Try to get partition timestamp (last modification date)
timestamp_path = os.path.join(
......@@ -1027,6 +1049,7 @@ stderr_logfile_backups=1
local_partition.install()
computer_partition.available()
local_partition.start()
self._endInstallationTransaction(computer_partition)
if self.firewall_conf:
self._setupComputerPartitionFirewall(computer_partition,
partition_ip_list)
......
......@@ -337,6 +337,14 @@ class IComputerPartition(IBuildoutController, IRequester):
computer partition.
"""
def setComputerPartitionRelatedInstanceList(instance_reference_list):
"""
Set relation between this Instance and all his children.
instance_reference_list -- list of instances requested by this Computer
Partition.
"""
class IComputer(Interface):
"""
Computer interface specification
......
......@@ -36,6 +36,7 @@ __all__ = ["slap", "ComputerPartition", "Computer", "SoftwareRelease",
"Supply", "OpenOrder", "NotFoundError",
"ResourceNotReady", "ServerError", "ConnectionError"]
import os
import json
import logging
import re
......@@ -67,6 +68,7 @@ fallback_logger.addHandler(fallback_handler)
DEFAULT_SOFTWARE_TYPE = 'RootSoftwareInstance'
COMPUTER_PARTITION_REQUEST_LIST_TEMPLATE_FILENAME = '.slapos-request-transaction-%s'
class SlapDocument:
def __init__(self, connection_helper=None, hateoas_navigator=None):
......@@ -81,6 +83,7 @@ class SlapRequester(SlapDocument):
"""
Abstract class that allow to factor method for subclasses that use "request()"
"""
def _requestComputerPartition(self, request_dict):
try:
xml = self._connection_helper.POST('requestComputerPartition', data=request_dict)
......@@ -406,9 +409,38 @@ class ComputerPartition(SlapRequester):
self._partition_id = partition_id
self._request_dict = request_dict
# Just create an empty file (for nothing requested yet)
self._updateTransactionFile(partition_reference=None)
def __getinitargs__(self):
return (self._computer_id, self._partition_id, )
def _updateTransactionFile(self, partition_reference=None):
"""
Store reference to all Instances requested by this Computer Parition
"""
# Environ variable set by Slapgrid while processing this partition
instance_root = os.environ.get('SLAPGRID_INSTANCE_ROOT', '')
if not instance_root or not self._partition_id:
return
transaction_file_name = COMPUTER_PARTITION_REQUEST_LIST_TEMPLATE_FILENAME % self._partition_id
transaction_file_path = os.path.join(instance_root, self._partition_id,
transaction_file_name)
try:
if partition_reference is None:
if os.access(os.path.join(instance_root, self._partition_id), os.W_OK):
if os.path.exists(transaction_file_path):
return
transac_file = open(transaction_file_path, 'w')
transac_file.close()
else:
with open(transaction_file_path, 'a') as transac_file:
transac_file.write('%s\n' % partition_reference)
except OSError:
return
def request(self, software_release, software_type, partition_reference,
shared=False, partition_parameter_kw=None, filter_kw=None,
state=None):
......@@ -440,6 +472,7 @@ class ComputerPartition(SlapRequester):
'filter_xml': xml_marshaller.dumps(filter_kw),
'state': xml_marshaller.dumps(state),
}
self._updateTransactionFile(partition_reference)
return self._requestComputerPartition(request_dict)
def building(self):
......@@ -635,6 +668,15 @@ class ComputerPartition(SlapRequester):
)
return xml_marshaller.loads(xml)
def setComputerPartitionRelatedInstanceList(self, instance_reference_list):
self._connection_helper.POST('updateComputerPartitionRelatedInstanceList',
data={
'computer_id': self._computer_id,
'computer_partition_id': self._partition_id,
'instance_reference_xml': xml_marshaller.dumps(instance_reference_list)
}
)
def _addIpv6Brackets(url):
# if master_url contains an ipv6 without bracket, add it
# Note that this is mostly to limit specific issues with
......
......@@ -29,6 +29,7 @@ import logging
import os
import unittest
import urlparse
import tempfile
import httmock
......@@ -53,6 +54,8 @@ class SlapMixin(unittest.TestCase):
print 'Testing against SLAP server %r' % self.server_url
self.slap = slapos.slap.slap()
self.partition_id = 'PARTITION_01'
if os.environ.has_key('SLAPGRID_INSTANCE_ROOT'):
del os.environ['SLAPGRID_INSTANCE_ROOT']
def tearDown(self):
pass
......@@ -786,6 +789,84 @@ class TestComputerPartition(SlapMixin):
# request was done works correctly
self.assertEqual(requested_partition_id, requested_partition.getId())
def test_request_with_slapgrid_request_transaction(self):
from slapos.slap.slap import COMPUTER_PARTITION_REQUEST_LIST_TEMPLATE_FILENAME
partition_id = 'PARTITION_01'
instance_root = tempfile.mkdtemp()
partition_root = os.path.join(instance_root, partition_id)
os.mkdir(partition_root)
os.environ['SLAPGRID_INSTANCE_ROOT'] = instance_root
transaction_file_name = COMPUTER_PARTITION_REQUEST_LIST_TEMPLATE_FILENAME % partition_id
transaction_file_path = os.path.join(partition_root, transaction_file_name)
def handler(url, req):
qs = urlparse.parse_qs(url.query)
if (url.path == '/registerComputerPartition'
and 'computer_reference' in qs
and 'computer_partition_reference' in qs):
slap_partition = slapos.slap.ComputerPartition(
qs['computer_reference'][0],
qs['computer_partition_reference'][0])
return {
'status_code': 200,
'content': xml_marshaller.xml_marshaller.dumps(slap_partition)
}
elif (url.path == '/getComputerInformation'
and 'computer_id' in qs):
slap_computer = slapos.slap.Computer(qs['computer_id'][0])
slap_computer._software_release_list = []
slap_partition = slapos.slap.ComputerPartition(
qs['computer_id'][0],
partition_id)
slap_computer._computer_partition_list = [slap_partition]
return {
'status_code': 200,
'content': xml_marshaller.xml_marshaller.dumps(slap_computer)
}
elif url.path == '/requestComputerPartition':
raise RequestWasCalled
else:
return {
'status_code': 404
}
with httmock.HTTMock(handler):
self.computer_guid = self._getTestComputerId()
self.slap = slapos.slap.slap()
self.slap.initializeConnection(self.server_url)
computer_partition = self.slap.registerComputerPartition(
self.computer_guid, partition_id)
self.assertTrue(os.path.exists(transaction_file_path))
with open(transaction_file_path, 'r') as f:
content = f.read()
self.assertEqual(content, '')
self.assertRaises(RequestWasCalled,
computer_partition.request,
'http://server/new/' + self._getTestComputerId(),
'software_type', 'myref')
self.assertTrue(os.path.exists(transaction_file_path))
with open(transaction_file_path, 'r') as f:
content_list = f.read().strip().split('\n')
self.assertEqual(content_list, ['myref'])
# Not override
computer_partition = self.slap.registerComputerPartition(
self.computer_guid, partition_id)
self.assertTrue(os.path.exists(transaction_file_path))
with open(transaction_file_path, 'r') as f:
content_list = f.read().strip().split('\n')
self.assertEqual(content_list, ['myref'])
# Request a second instance
self.assertRaises(RequestWasCalled,
computer_partition.request,
'http://server/new/' + self._getTestComputerId(),
'software_type', 'mysecondref')
with open(transaction_file_path, 'r') as f:
content_list = f.read().strip().split('\n')
self.assertEquals(list(set(content_list)), ['myref', 'mysecondref'])
def _test_new_computer_partition_state(self, state):
"""
Helper method to automate assertions of failing states on new Computer
......
......@@ -51,6 +51,7 @@ from slapos.grid.utils import md5digest
from slapos.grid.watchdog import Watchdog
from slapos.grid import SlapObject
from slapos.grid.SlapObject import WATCHDOG_MARK
from slapos.slap.slap import COMPUTER_PARTITION_REQUEST_LIST_TEMPLATE_FILENAME
import slapos.grid.SlapObject
import httmock
......@@ -107,6 +108,8 @@ class BasicMixin(object):
self._tempdir = tempfile.mkdtemp()
self.software_root = os.path.join(self._tempdir, 'software')
self.instance_root = os.path.join(self._tempdir, 'instance')
if os.environ.has_key('SLAPGRID_INSTANCE_ROOT'):
del os.environ['SLAPGRID_INSTANCE_ROOT']
logging.basicConfig(level=logging.DEBUG)
self.setSlapgrid()
......@@ -338,6 +341,8 @@ class ComputerForTest(object):
return {'status_code': 200}
if url.path == '/softwareInstanceBang':
return {'status_code': 200}
if url.path == "/updateComputerPartitionRelatedInstanceList":
return {'status_code': 200}
if url.path == '/softwareInstanceError':
instance.error_log = '\n'.join(
[
......@@ -1647,16 +1652,21 @@ class TestSlapgridUsageReport(MasterMixin, unittest.TestCase):
# Then run usage report and see if it is still working
computer.sequence = []
self.assertEqual(self.grid.agregateAndSendUsage(), slapgrid.SLAPGRID_SUCCESS)
# registerComputerPartition will create one more file:
from slapos.slap.slap import COMPUTER_PARTITION_REQUEST_LIST_TEMPLATE_FILENAME
request_list_file = COMPUTER_PARTITION_REQUEST_LIST_TEMPLATE_FILENAME % instance.name
self.assertInstanceDirectoryListEqual(['0'])
self.assertItemsEqual(os.listdir(instance.partition_path),
['.slapgrid', '.0_wrapper.log', 'buildout.cfg',
'etc', 'software_release', 'worked', '.slapos-retention-lock-delay'])
'etc', 'software_release', 'worked',
'.slapos-retention-lock-delay', request_list_file])
wrapper_log = os.path.join(instance.partition_path, '.0_wrapper.log')
self.assertLogContent(wrapper_log, 'Working')
self.assertInstanceDirectoryListEqual(['0'])
self.assertItemsEqual(os.listdir(instance.partition_path),
['.slapgrid', '.0_wrapper.log', 'buildout.cfg',
'etc', 'software_release', 'worked', '.slapos-retention-lock-delay'])
'etc', 'software_release', 'worked',
'.slapos-retention-lock-delay', request_list_file])
wrapper_log = os.path.join(instance.partition_path, '.0_wrapper.log')
self.assertLogContent(wrapper_log, 'Working')
self.assertEqual(computer.sequence,
......@@ -2313,3 +2323,18 @@ class TestSlapgridCPWithFirewall(MasterMixin, unittest.TestCase):
rules_list = json.loads(frules.read())
self.checkRuleFromIpSource(ip, [source_ip[1]], rules_list)
class TestSlapgridCPWithTransaction(MasterMixin, unittest.TestCase):
def test_one_partition(self):
computer = ComputerForTest(self.software_root, self.instance_root)
with httmock.HTTMock(computer.request_handler):
instance = computer.instance_list[0]
partition = os.path.join(self.instance_root, '0')
request_list_file = os.path.join(partition,
COMPUTER_PARTITION_REQUEST_LIST_TEMPLATE_FILENAME % instance.name)
with open(request_list_file, 'w') as f:
f.write('some partition')
self.assertEqual(self.grid.processComputerPartitionList(), slapgrid.SLAPGRID_SUCCESS)
self.assertInstanceDirectoryListEqual(['0'])
self.assertFalse(os.path.exists(request_list_file))
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment