Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
S
slapos.core
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Labels
Merge Requests
21
Merge Requests
21
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Jobs
Commits
Open sidebar
nexedi
slapos.core
Commits
c4872a03
Commit
c4872a03
authored
Oct 06, 2022
by
Alain Takoudjou
Browse files
Options
Browse Files
Download
Plain Diff
slapos node boot: Start computer partitiong without connecting to master
See merge request
nexedi/slapos.core!431
parents
f85280ef
cada4581
Pipeline
#23994
failed with stage
in 0 seconds
Changes
6
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
133 additions
and
17 deletions
+133
-17
slapos/cli/boot.py
slapos/cli/boot.py
+60
-7
slapos/cli/format.py
slapos/cli/format.py
+6
-0
slapos/format.py
slapos/format.py
+3
-2
slapos/grid/slapgrid.py
slapos/grid/slapgrid.py
+8
-0
slapos/tests/test_cli.py
slapos/tests/test_cli.py
+12
-1
slapos/tests/test_slapgrid.py
slapos/tests/test_slapgrid.py
+44
-7
No files found.
slapos/cli/boot.py
View file @
c4872a03
...
...
@@ -31,6 +31,7 @@ from __future__ import print_function
import
subprocess
from
six.moves.urllib.parse
import
urlparse
from
six.moves
import
xmlrpc_client
as
xmlrpclib
from
time
import
sleep
import
glob
import
os
...
...
@@ -41,6 +42,11 @@ from slapos.cli.command import check_root_user
from
slapos.cli.entry
import
SlapOSApp
from
slapos.cli.config
import
ConfigCommand
from
slapos.format
import
isGlobalScopeAddress
from
slapos.grid.slapgrid
import
(
COMPUTER_PARTITION_REQUESTED_STATE_FILENAME
,
COMPUTER_PARTITION_STARTED_STATE
)
from
slapos.grid.svcbackend
import
(
_getSupervisordSocketPath
,
getSupervisorRPC
,
launchSupervisord
)
from
slapos.util
import
string_to_boolean
import
argparse
import
logging
...
...
@@ -59,6 +65,48 @@ def _removeTimestamp(instancehome, partition_base_name):
logger
.
info
(
"Removing %s"
,
timestamp_path
)
os
.
remove
(
timestamp_path
)
def
_startComputerPartition
(
partition_id
,
supervisord_socket
):
"""
With supervisord, start the instance that was deployed
"""
try
:
with
getSupervisorRPC
(
supervisord_socket
)
as
supervisor
:
supervisor
.
startProcessGroup
(
partition_id
,
False
)
except
xmlrpclib
.
Fault
as
exc
:
if
exc
.
faultString
.
startswith
(
'BAD_NAME:'
):
logger
.
info
(
"Nothing to start on %s..."
,
partition_id
)
else
:
raise
else
:
logger
.
info
(
"Requested start of %s..."
,
partition_id
)
def
_startComputerPartitionList
(
instance_root
,
partition_base_name
):
"""
Start services for partition which has requested state to 'started'
"""
partition_glob_path
=
os
.
path
.
join
(
instance_root
,
"%s*"
%
partition_base_name
)
launchSupervisord
(
instance_root
=
instance_root
,
logger
=
logger
)
for
partition_path
in
glob
.
glob
(
partition_glob_path
):
partition_state_path
=
os
.
path
.
join
(
partition_path
,
COMPUTER_PARTITION_REQUESTED_STATE_FILENAME
)
supervisord_socket_path
=
_getSupervisordSocketPath
(
instance_root
,
logger
)
if
os
.
path
.
exists
(
partition_state_path
):
partition_state
=
""
with
open
(
partition_state_path
)
as
f
:
partition_state
=
f
.
read
()
if
partition_state
==
COMPUTER_PARTITION_STARTED_STATE
:
# Call start for this computer partition
_startComputerPartition
(
os
.
path
.
basename
(
partition_path
.
rstrip
(
'/'
)),
supervisord_socket_path
)
def
_runBang
(
app
):
"""
...
...
@@ -76,7 +124,9 @@ def _runFormat(app):
Launch slapos node format.
"""
logger
.
info
(
"[BOOT] Invoking slapos node format..."
)
result
=
app
.
run
([
'node'
,
'format'
,
'--now'
,
'--verbose'
])
# '--local' parameter is to prevent node format command to post data to
# master, so this command can work without internet and setup partitions IP.
result
=
app
.
run
([
'node'
,
'format'
,
'--now'
,
'--local'
,
'--verbose'
])
if
result
==
1
:
return
0
return
1
...
...
@@ -196,6 +246,15 @@ class BootCommand(ConfigCommand):
if
ipv6_interface
is
not
None
:
_waitIpv6Ready
(
ipv6_interface
)
app
=
SlapOSApp
()
# Make sure slapos node format returns ok
while
not
_runFormat
(
app
):
logger
.
error
(
"[BOOT] Fail to format, try again in 15 seconds..."
)
sleep
(
15
)
# Start computer partition services
_startComputerPartitionList
(
instance_root
,
partition_base_name
)
# Check that node can ping master
if
valid_ipv4
(
master_hostname
):
_test_ping
(
master_hostname
)
...
...
@@ -205,12 +264,6 @@ class BootCommand(ConfigCommand):
# hostname
_ping_hostname
(
master_hostname
)
app
=
SlapOSApp
()
# Make sure slapos node format returns ok
while
not
_runFormat
(
app
):
logger
.
error
(
"[BOOT] Fail to format, try again in 15 seconds..."
)
sleep
(
15
)
# Make sure slapos node bang returns ok
while
not
_runBang
(
app
):
logger
.
error
(
"[BOOT] Fail to bang, try again in 15 seconds..."
)
...
...
slapos/cli/format.py
View file @
c4872a03
...
...
@@ -83,6 +83,12 @@ class FormatCommand(ConfigCommand):
help
=
'Launch slapformat without delay'
' (default: %(default)s)'
)
ap
.
add_argument
(
'--local'
,
default
=
False
,
# can have a default as it is not in .cfg
action
=
"store_true"
,
help
=
'Keep format data locally, do not post xml to master'
' (default: %(default)s)'
)
ap
.
add_argument
(
'-n'
,
'--dry_run'
,
default
=
False
,
# can have a default as it is not in .cfg
action
=
"store_true"
,
...
...
slapos/format.py
View file @
c4872a03
...
...
@@ -1408,6 +1408,7 @@ def do_format(conf):
computer
.
dump
(
path_to_xml
=
conf
.
computer_xml
,
path_to_json
=
conf
.
computer_json
,
logger
=
conf
.
logger
)
if
not
conf
.
local
:
conf
.
logger
.
info
(
'Posting information to %r'
%
conf
.
master_url
)
computer
.
send
(
conf
)
conf
.
logger
.
info
(
'slapos successfully prepared the computer.'
)
...
...
slapos/grid/slapgrid.py
View file @
c4872a03
...
...
@@ -90,6 +90,7 @@ SLAPGRID_PROMISE_FAIL = 2
PROMISE_TIMEOUT
=
20
COMPUTER_PARTITION_TIMESTAMP_FILENAME
=
'.timestamp'
COMPUTER_PARTITION_REQUESTED_STATE_FILENAME
=
'.requested_state'
COMPUTER_PARTITION_LATEST_BANG_TIMESTAMP_FILENAME
=
'.slapos_latest_bang_timestamp'
COMPUTER_PARTITION_INSTALL_ERROR_FILENAME
=
'.slapgrid-%s-error.log'
COMPUTER_PARTITION_WAIT_LIST_FILENAME
=
'.slapos-report-wait-service-list'
...
...
@@ -1125,6 +1126,10 @@ stderr_logfile_backups=1
instance_path
,
COMPUTER_PARTITION_TIMESTAMP_FILENAME
)
partition_state_path
=
os
.
path
.
join
(
instance_path
,
COMPUTER_PARTITION_REQUESTED_STATE_FILENAME
)
parameter_dict
=
computer_partition
.
getInstanceParameterDict
()
timestamp
=
parameter_dict
.
get
(
'timestamp'
)
...
...
@@ -1225,6 +1230,7 @@ stderr_logfile_backups=1
return
os
.
remove
(
timestamp_path
)
os
.
remove
(
partition_state_path
)
# Include Partition Logging
log_folder_path
=
"%s/.slapgrid/log"
%
instance_path
...
...
@@ -1339,6 +1345,8 @@ stderr_logfile_backups=1
if
timestamp
:
with
open
(
timestamp_path
,
'w'
)
as
f
:
f
.
write
(
str
(
timestamp
))
with
open
(
partition_state_path
,
'w'
)
as
f
:
f
.
write
(
str
(
computer_partition_state
))
def
FilterComputerPartitionList
(
self
,
computer_partition_list
):
"""
...
...
slapos/tests/test_cli.py
View file @
c4872a03
...
...
@@ -415,8 +415,13 @@ class TestCliBoot(CliMixin):
os
.
mkdir
(
os
.
path
.
join
(
instance_root
,
partition_base_name
+
'1'
))
timestamp
=
os
.
path
.
join
(
instance_root
,
partition_base_name
+
'1'
,
'.timestamp'
)
requested_state_path
=
os
.
path
.
join
(
instance_root
,
partition_base_name
+
'1'
,
'.requested_state'
)
with
open
(
timestamp
,
'w'
)
as
f
:
f
.
write
(
"1578552471"
)
with
open
(
requested_state_path
,
'w'
)
as
f
:
f
.
write
(
"started"
)
# make a config file using this instance root
with
tempfile
.
NamedTemporaryFile
(
mode
=
'w'
)
as
slapos_conf
:
...
...
@@ -441,17 +446,21 @@ class TestCliBoot(CliMixin):
patch
(
'slapos.cli.boot.netifaces.ifaddresses'
,
return_value
=
{
socket
.
AF_INET6
:
({
'addr'
:
'2000::1'
},),},)
as
ifaddresses
,
\
patch
(
'slapos.cli.boot._startComputerPartition'
,
return_value
=
None
)
as
start_partition
,
\
patch
(
'slapos.cli.boot.launchSupervisord'
,
return_value
=
None
),
\
patch
(
'slapos.cli.boot._ping_hostname'
,
return_value
=
1
)
as
_ping_hostname
:
app
.
run
((
'node'
,
'boot'
))
# boot command runs as root
check_root_user
.
assert_called_once
()
# Computer partition was started during boot
start_partition
.
assert_called_once
()
# it waits for interface to have an IPv6 address
ifaddresses
.
assert_called_once_with
(
'interface_name_from_config'
)
# then ping master hostname to wait for connectivity
_ping_hostname
.
assert_called_once_with
(
'slap.vifib.com'
)
# then format and bang
SlapOSApp
().
run
.
assert_any_call
([
'node'
,
'format'
,
'--now'
,
'--verbose'
])
SlapOSApp
().
run
.
assert_any_call
([
'node'
,
'format'
,
'--now'
,
'--
local'
,
'--
verbose'
])
SlapOSApp
().
run
.
assert_any_call
([
'node'
,
'bang'
,
'-m'
,
'Reboot'
])
# timestamp files have been removed
...
...
@@ -473,6 +482,7 @@ class TestCliBoot(CliMixin):
patch
(
'slapos.cli.boot.netifaces.ifaddresses'
,
side_effect
=
[
net1
,
net2
,
net3
]),
\
patch
(
'slapos.cli.boot._ping_hostname'
,
return_value
=
0
),
\
patch
(
'slapos.cli.boot._startComputerPartitionList'
,
return_value
=
None
)
as
start_partition
,
\
patch
(
'slapos.cli.format.check_root_user'
,
return_value
=
True
),
\
patch
(
'slapos.cli.format.logging.FileHandler'
,
return_value
=
logging
.
NullHandler
()),
\
patch
(
'slapos.cli.bang.check_root_user'
,
return_value
=
True
),
\
...
...
@@ -482,6 +492,7 @@ class TestCliBoot(CliMixin):
app
.
run
((
'node'
,
'boot'
))
check_root_user
.
assert_called_once
()
start_partition
.
assert_called_once
()
self
.
assertEqual
(
do_format
.
call_count
,
3
)
self
.
assertEqual
(
do_bang
.
call_count
,
3
)
...
...
slapos/tests/test_slapgrid.py
View file @
c4872a03
...
...
@@ -1401,7 +1401,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self
.
assertInstanceDirectoryListEqual
([
'0'
])
partition
=
os
.
path
.
join
(
self
.
instance_root
,
'0'
)
six
.
assertCountEqual
(
self
,
os
.
listdir
(
partition
),
[
'.slapgrid'
,
'.timestamp'
,
'buildout.cfg'
,
'software_release'
,
'worked'
,
'.slapos-retention-lock-delay'
])
[
'.slapgrid'
,
'.timestamp'
,
'
.requested_state'
,
'
buildout.cfg'
,
'software_release'
,
'worked'
,
'.slapos-retention-lock-delay'
])
six
.
assertCountEqual
(
self
,
os
.
listdir
(
self
.
software_root
),
[
instance
.
software
.
software_hash
])
timestamp_path
=
os
.
path
.
join
(
instance
.
partition_path
,
'.timestamp'
)
self
.
setSlapgrid
()
...
...
@@ -1422,7 +1422,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self
.
assertInstanceDirectoryListEqual
([
'0'
])
partition
=
os
.
path
.
join
(
self
.
instance_root
,
'0'
)
six
.
assertCountEqual
(
self
,
os
.
listdir
(
partition
),
[
'.slapgrid'
,
'.timestamp'
,
'buildout.cfg'
,
[
'.slapgrid'
,
'.timestamp'
,
'
.requested_state'
,
'
buildout.cfg'
,
'software_release'
,
'worked'
,
'.slapos-retention-lock-delay'
])
six
.
assertCountEqual
(
self
,
os
.
listdir
(
self
.
software_root
),
[
instance
.
software
.
software_hash
])
...
...
@@ -1445,7 +1445,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self
.
assertInstanceDirectoryListEqual
([
'0'
])
partition
=
os
.
path
.
join
(
self
.
instance_root
,
'0'
)
six
.
assertCountEqual
(
self
,
os
.
listdir
(
partition
),
[
'.slapgrid'
,
'.timestamp'
,
'buildout.cfg'
,
'software_release'
,
'worked'
,
'.slapos-retention-lock-delay'
])
[
'.slapgrid'
,
'.timestamp'
,
'
.requested_state'
,
'
buildout.cfg'
,
'software_release'
,
'worked'
,
'.slapos-retention-lock-delay'
])
six
.
assertCountEqual
(
self
,
os
.
listdir
(
self
.
software_root
),
[
instance
.
software
.
software_hash
])
instance
.
timestamp
=
str
(
int
(
timestamp
)
-
1
)
self
.
assertEqual
(
self
.
launchSlapgrid
(),
slapgrid
.
SLAPGRID_SUCCESS
)
...
...
@@ -1463,7 +1463,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self
.
assertInstanceDirectoryListEqual
([
'0'
])
partition
=
os
.
path
.
join
(
self
.
instance_root
,
'0'
)
six
.
assertCountEqual
(
self
,
os
.
listdir
(
partition
),
[
'.slapgrid'
,
'.timestamp'
,
'buildout.cfg'
,
'software_release'
,
'worked'
,
'.slapos-retention-lock-delay'
])
[
'.slapgrid'
,
'.timestamp'
,
'
.requested_state'
,
'
buildout.cfg'
,
'software_release'
,
'worked'
,
'.slapos-retention-lock-delay'
])
six
.
assertCountEqual
(
self
,
os
.
listdir
(
self
.
software_root
),
[
instance
.
software
.
software_hash
])
instance
.
timestamp
=
str
(
int
(
timestamp
)
+
1
)
self
.
assertEqual
(
self
.
launchSlapgrid
(),
slapgrid
.
SLAPGRID_SUCCESS
)
...
...
@@ -1491,7 +1491,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self
.
assertInstanceDirectoryListEqual
([
'0'
])
partition
=
os
.
path
.
join
(
self
.
instance_root
,
'0'
)
six
.
assertCountEqual
(
self
,
os
.
listdir
(
partition
),
[
'.slapgrid'
,
'.timestamp'
,
'buildout.cfg'
,
'software_release'
,
'worked'
,
'.slapos-retention-lock-delay'
])
[
'.slapgrid'
,
'.timestamp'
,
'
.requested_state'
,
'
buildout.cfg'
,
'software_release'
,
'worked'
,
'.slapos-retention-lock-delay'
])
six
.
assertCountEqual
(
self
,
os
.
listdir
(
self
.
software_root
),
[
instance
.
software
.
software_hash
])
instance
.
timestamp
=
None
...
...
@@ -1523,7 +1523,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self
.
launchSlapgrid
()
partition
=
os
.
path
.
join
(
self
.
instance_root
,
'0'
)
six
.
assertCountEqual
(
self
,
os
.
listdir
(
partition
),
[
'.slapgrid'
,
'.timestamp'
,
'buildout.cfg'
,
[
'.slapgrid'
,
'.timestamp'
,
'
.requested_state'
,
'
buildout.cfg'
,
'software_release'
,
'worked'
,
'.slapos-retention-lock-delay'
])
time
.
sleep
(
2
)
...
...
@@ -1533,7 +1533,7 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self
.
launchSlapgrid
()
six
.
assertCountEqual
(
self
,
os
.
listdir
(
partition
),
[
'.slapgrid'
,
'.timestamp'
,
'buildout.cfg'
,
[
'.slapgrid'
,
'.timestamp'
,
'
.requested_state'
,
'
buildout.cfg'
,
'software_release'
,
'worked'
,
'.slapos-retention-lock-delay'
])
def
test_one_partition_periodicity_from_file_does_not_disturb_others
(
self
):
...
...
@@ -1710,6 +1710,43 @@ class TestSlapgridCPPartitionProcessing(MasterMixin, unittest.TestCase):
self
.
launchSlapgrid
()
self
.
assertEqual
(
mock_method
.
call_count
,
2
)
def
test_partition_requested_state_created
(
self
):
computer
=
self
.
getTestComputerClass
()(
self
.
software_root
,
self
.
instance_root
)
with
httmock
.
HTTMock
(
computer
.
request_handler
):
instance
=
computer
.
instance_list
[
0
]
timestamp
=
str
(
int
(
time
.
time
()))
instance
.
timestamp
=
timestamp
self
.
assertEqual
(
self
.
grid
.
processComputerPartitionList
(),
slapgrid
.
SLAPGRID_SUCCESS
)
self
.
assertInstanceDirectoryListEqual
([
'0'
])
partition
=
os
.
path
.
join
(
self
.
instance_root
,
'0'
)
six
.
assertCountEqual
(
self
,
os
.
listdir
(
partition
),
[
'.slapgrid'
,
'.timestamp'
,
'.requested_state'
,
'buildout.cfg'
,
'software_release'
,
'worked'
,
'.slapos-retention-lock-delay'
])
six
.
assertCountEqual
(
self
,
os
.
listdir
(
self
.
software_root
),
[
instance
.
software
.
software_hash
])
requested_state_path
=
os
.
path
.
join
(
instance
.
partition_path
,
'.requested_state'
)
with
open
(
requested_state_path
)
as
f
:
self
.
assertEqual
(
f
.
read
(),
slapgrid
.
COMPUTER_PARTITION_STOPPED_STATE
)
self
.
assertEqual
(
instance
.
sequence
,
[
'/stoppedComputerPartition'
])
def
test_partition_requested_state_not_created_if_failed
(
self
):
computer
=
self
.
getTestComputerClass
()(
self
.
software_root
,
self
.
instance_root
)
with
httmock
.
HTTMock
(
computer
.
request_handler
):
instance
=
computer
.
instance_list
[
0
]
timestamp
=
str
(
int
(
time
.
time
()))
instance
.
timestamp
=
timestamp
instance
.
software
.
setBuildout
(
"""#!/bin/sh
exit 3"""
)
self
.
assertEqual
(
self
.
grid
.
processComputerPartitionList
(),
slapgrid
.
SLAPGRID_FAIL
)
self
.
assertInstanceDirectoryListEqual
([
'0'
])
self
.
assertEqual
(
instance
.
sequence
,
[
'/softwareInstanceError'
])
requested_state_path
=
os
.
path
.
join
(
instance
.
partition_path
,
'.requested_state'
)
self
.
assertFalse
(
os
.
path
.
exists
(
requested_state_path
))
def
test_one_partition_buildout_fail_does_not_disturb_others
(
self
):
"""
1. We set up two instance one using a corrupted buildout
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment