Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
N
neoppod
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Vincent Pelletier
neoppod
Commits
c681f666
Commit
c681f666
authored
Aug 16, 2019
by
Julien Muchembled
Browse files
Options
Browse Files
Download
Plain Diff
Bump protocol version
parents
2b9e14e8
c156f11a
Changes
29
Show whitespace changes
Inline
Side-by-side
Showing
29 changed files
with
626 additions
and
83 deletions
+626
-83
neo/admin/app.py
neo/admin/app.py
+250
-6
neo/admin/handler.py
neo/admin/handler.py
+116
-13
neo/client/app.py
neo/client/app.py
+2
-2
neo/lib/bootstrap.py
neo/lib/bootstrap.py
+3
-4
neo/lib/config.py
neo/lib/config.py
+22
-4
neo/lib/node.py
neo/lib/node.py
+1
-1
neo/lib/protocol.py
neo/lib/protocol.py
+18
-5
neo/lib/util.py
neo/lib/util.py
+7
-11
neo/master/app.py
neo/master/app.py
+16
-4
neo/master/backup_app.py
neo/master/backup_app.py
+8
-1
neo/master/handlers/__init__.py
neo/master/handlers/__init__.py
+1
-1
neo/master/handlers/administration.py
neo/master/handlers/administration.py
+6
-0
neo/master/handlers/backup.py
neo/master/handlers/backup.py
+8
-1
neo/master/handlers/identification.py
neo/master/handlers/identification.py
+4
-4
neo/master/handlers/master.py
neo/master/handlers/master.py
+1
-1
neo/master/pt.py
neo/master/pt.py
+1
-1
neo/neoctl/app.py
neo/neoctl/app.py
+20
-4
neo/neoctl/handler.py
neo/neoctl/handler.py
+1
-0
neo/neoctl/neoctl.py
neo/neoctl/neoctl.py
+6
-0
neo/storage/app.py
neo/storage/app.py
+1
-1
neo/storage/checker.py
neo/storage/checker.py
+1
-1
neo/storage/handlers/identification.py
neo/storage/handlers/identification.py
+1
-1
neo/storage/replicator.py
neo/storage/replicator.py
+1
-1
neo/tests/functional/testCluster.py
neo/tests/functional/testCluster.py
+21
-1
neo/tests/master/testMasterPT.py
neo/tests/master/testMasterPT.py
+3
-3
neo/tests/protocol
neo/tests/protocol
+5
-1
neo/tests/threaded/__init__.py
neo/tests/threaded/__init__.py
+40
-4
neo/tests/threaded/testReplication.py
neo/tests/threaded/testReplication.py
+59
-4
tools/stress
tools/stress
+3
-3
No files found.
neo/admin/app.py
View file @
c681f666
# -*- coding: utf-8 -*-
#
# Copyright (C) 2006-2019 Nexedi SA
#
...
...
@@ -14,17 +15,89 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import
getpass
,
os
,
smtplib
from
collections
import
Counter
from
email.mime.text
import
MIMEText
from
email.utils
import
formataddr
,
formatdate
from
time
import
time
from
traceback
import
format_exc
from
neo.lib
import
logging
from
neo.lib.app
import
BaseApplication
,
buildOptionParser
from
neo.lib.connection
import
ListeningConnection
from
neo.lib.connection
import
ClientConnection
,
ListeningConnection
,
\
ConnectionClosed
from
neo.lib.exception
import
PrimaryFailure
from
.handler
import
AdminEventHandler
,
MasterEventHandler
from
.handler
import
AdminEventHandler
,
BackupHandler
,
MasterEventHandler
,
\
UpstreamAdminHandler
,
NOT_CONNECTED_MESSAGE
from
neo.lib.bootstrap
import
BootstrapManager
from
neo.lib.protocol
import
ClusterStates
,
Errors
,
NodeTypes
,
Packets
from
neo.lib.logger
import
INF
from
neo.lib.protocol
import
\
CellStates
,
ClusterStates
,
Errors
,
NodeTypes
,
Packets
from
neo.lib.debug
import
register
as
registerLiveDebugger
from
neo.lib.util
import
add64
,
datetimeFromTID
,
dump
class
Monitor
(
object
):
def
__init__
(
self
):
self
.
down
=
0
self
.
monitor_changed
=
False
self
.
pt_summary
=
None
def
askLastIds
(
self
,
conn
,
_askLastTransaction
=
Packets
.
AskLastTransaction
(),
_askRecovery
=
Packets
.
AskRecovery
()):
if
self
.
cluster_state
==
ClusterStates
.
BACKINGUP
:
conn
.
ask
(
_askRecovery
)
conn
.
ask
(
_askLastTransaction
)
@
property
def
operational
(
self
):
return
self
.
cluster_state
in
(
ClusterStates
.
BACKINGUP
,
ClusterStates
.
RUNNING
)
@
property
def
severity
(
self
):
return
(
2
if
self
.
down
or
not
self
.
operational
else
1
if
list
(
self
.
pt_summary
)
!=
[
CellStates
.
UP_TO_DATE
]
or
isinstance
(
self
,
Backup
)
and
self
.
cluster_state
!=
ClusterStates
.
BACKINGUP
else
0
)
def
formatSummary
(
self
,
upstream
=
None
):
summary
=
self
.
pt_summary
summary
=
'%s; %s'
%
(
self
.
cluster_state
,
', '
.
join
(
'%s=%s'
%
pt
for
pt
in
sorted
(
summary
.
iteritems
()))
)
if
summary
else
str
(
self
.
cluster_state
)
if
self
.
down
:
summary
+=
'; DOWN=%s'
%
self
.
down
if
self
.
operational
:
backup
=
self
.
cluster_state
==
ClusterStates
.
BACKINGUP
tid
=
self
.
backup_tid
if
backup
else
self
.
ltid
x
=
datetimeFromTID
(
tid
)
if
upstream
and
backup
:
lag
=
(
upstream
[
0
]
-
x
).
total_seconds
()
if
lag
or
tid
>=
upstream
[
1
]:
lagging
=
self
.
max_lag
<
lag
else
:
lag
=
'ε'
lagging
=
self
.
max_lag
<=
0
extra
=
'; lag=%s'
%
lag
if
self
.
lagging
!=
lagging
:
self
.
lagging
=
lagging
self
.
monitor_changed
=
True
else
:
extra
=
' (%s)'
%
x
return
(
x
,
tid
),
'%s; ltid=%s%s'
%
(
summary
,
dump
(
tid
),
extra
)
return
None
,
summary
class
Backup
(
Monitor
):
cluster_state
=
None
conn
=
None
lagging
=
False
max_lag
=
0
@
buildOptionParser
class
Application
(
BaseApplication
):
class
Application
(
BaseApplication
,
Monitor
):
"""The storage node application."""
@
classmethod
...
...
@@ -33,12 +106,19 @@ class Application(BaseApplication):
_
.
description
=
"NEO Admin node"
cls
.
addCommonServerOptions
(
'admin'
,
'127.0.0.1:9999'
)
hint
=
' (the option can be repeated)'
_
=
_
.
group
(
'admin'
)
_
(
'monitor-email'
,
multiple
=
True
,
help
=
'recipient email for notifications'
+
hint
)
_
(
'monitor-backup'
,
multiple
=
True
,
help
=
'name of backup cluster to monitor'
+
hint
)
_
(
'smtp'
,
metavar
=
'HOST[:PORT]'
,
help
=
'SMTP for email notifications'
)
_
.
int
(
'i'
,
'nid'
,
help
=
"specify an NID to use for this process (testing purpose)"
)
def
__init__
(
self
,
config
):
super
(
Application
,
self
).
__init__
(
BaseApplication
.
__init__
(
self
,
config
.
get
(
'ssl'
),
config
.
get
(
'dynamic_master_list'
))
for
address
in
config
[
'masters'
]:
self
.
nm
.
createMaster
(
address
=
address
)
...
...
@@ -46,6 +126,23 @@ class Application(BaseApplication):
self
.
name
=
config
[
'cluster'
]
self
.
server
=
config
[
'bind'
]
self
.
backup_dict
=
{
x
:
Backup
()
for
x
in
config
.
get
(
'monitor_backup'
,
())}
self
.
email_list
=
config
.
get
(
'monitor_email'
,
())
if
self
.
email_list
:
self
.
smtp
=
smtplib
.
SMTP
()
self
.
smtp_host
=
config
.
get
(
'smtp'
)
or
'localhost'
email_from
=
os
.
getenv
(
'EMAIL'
)
if
not
email_from
:
try
:
email_from
=
getpass
.
getuser
()
except
Exception
:
email_from
=
None
self
.
email_from
=
formataddr
((
"NEO "
+
self
.
name
,
email_from
))
self
.
smtp_exc
=
None
self
.
smtp_retry
=
INF
self
.
notifying
=
set
()
logging
.
debug
(
'IP address is %s, port is %d'
,
*
self
.
server
)
# The partition table is initialized after getting the number of
...
...
@@ -53,8 +150,11 @@ class Application(BaseApplication):
self
.
pt
=
None
self
.
uuid
=
config
.
get
(
'nid'
)
logging
.
node
(
self
.
name
,
self
.
uuid
)
self
.
backup_handler
=
BackupHandler
(
self
)
self
.
master_event_handler
=
MasterEventHandler
(
self
)
self
.
upstream_admin_handler
=
UpstreamAdminHandler
(
self
)
self
.
cluster_state
=
None
self
.
upstream_admin
=
self
.
upstream_admin_conn
=
None
self
.
reset
()
registerLiveDebugger
(
on_log
=
self
.
log
)
...
...
@@ -63,6 +163,8 @@ class Application(BaseApplication):
super
(
Application
,
self
).
close
()
def
reset
(
self
):
Monitor
.
__init__
(
self
)
self
.
asking_monitor_information
=
[]
self
.
master_conn
=
None
self
.
master_node
=
None
...
...
@@ -112,13 +214,155 @@ class Application(BaseApplication):
"""
self
.
cluster_state
=
None
# search, find, connect and identify to the primary master
bootstrap
=
BootstrapManager
(
self
,
NodeTypes
.
ADMIN
,
self
.
server
)
bootstrap
=
BootstrapManager
(
self
,
NodeTypes
.
ADMIN
,
self
.
server
,
backup
=
list
(
self
.
backup_dict
))
self
.
master_node
,
self
.
master_conn
=
bootstrap
.
getPrimaryConnection
()
# passive handler
self
.
master_conn
.
setHandler
(
self
.
master_event_handler
)
self
.
master_conn
.
ask
(
Packets
.
AskClusterState
())
def
connectToUpstreamAdmin
(
self
):
if
self
.
listening_conn
:
# if running
conn
=
self
.
upstream_admin_conn
=
ClientConnection
(
self
,
self
.
upstream_admin_handler
,
self
.
upstream_admin
)
conn
.
ask
(
Packets
.
RequestIdentification
(
NodeTypes
.
ADMIN
,
None
,
None
,
self
.
name
,
None
,
{}))
def
partitionTableUpdated
(
self
):
pt
=
self
.
pt
if
pt
:
down_set
=
set
()
pt_summary
=
Counter
()
for
offset
in
xrange
(
pt
.
np
):
for
cell
in
pt
.
getCellList
(
offset
):
node
=
cell
.
getNode
()
if
not
node
.
isRunning
():
down_set
.
add
(
node
)
pt_summary
.
update
((
cell
.
getState
(),))
self
.
updateMonitorInformation
(
None
,
down
=
len
(
down_set
),
pt_summary
=
dict
(
pt_summary
))
def
askMonitorInformation
(
self
,
conn
):
asking
=
self
.
asking_monitor_information
or
self
.
notifying
self
.
asking_monitor_information
.
append
((
conn
,
conn
.
getPeerId
()))
if
not
asking
:
self
.
_notify
(
self
.
operational
)
def
updateMonitorInformation
(
self
,
name
,
**
kw
):
monitor
=
self
if
name
is
None
else
self
.
backup_dict
[
name
]
kw
=
{
k
:
v
for
k
,
v
in
kw
.
iteritems
()
if
v
!=
getattr
(
monitor
,
k
)}
if
not
kw
:
return
monitor
.
monitor_changed
=
True
monitor
.
__dict__
.
update
(
kw
)
if
name
is
None
and
self
.
upstream_admin_conn
:
self
.
upstream_admin_conn
.
send
(
Packets
.
NotifyMonitorInformation
(
kw
))
if
not
self
.
notifying
:
self
.
em
.
setTimeout
(
None
,
None
)
self
.
_notify
(
self
.
operational
)
def
_notify
(
self
,
ask_ids
=
True
,
_askLastTransaction
=
Packets
.
AskLastTransaction
(),
_askRecovery
=
Packets
.
AskRecovery
()):
if
ask_ids
:
self
.
askLastIds
(
self
.
master_conn
)
self
.
notifying
=
notifying
=
{
None
}
for
name
,
monitor
in
self
.
backup_dict
.
iteritems
():
if
monitor
.
operational
:
monitor
.
askLastIds
(
monitor
.
conn
)
notifying
.
add
(
name
)
if
self
.
notifying
or
self
.
cluster_state
is
None
is
not
self
.
master_conn
:
return
severity
=
[],
[],
[]
my_severity
=
self
.
severity
severity
[
my_severity
].
append
(
self
.
name
)
changed
=
set
()
if
self
.
monitor_changed
:
self
.
monitor_changed
=
False
changed
.
add
(
self
.
name
)
if
self
.
master_conn
is
None
:
body
=
NOT_CONNECTED_MESSAGE
else
:
upstream
,
body
=
self
.
formatSummary
()
body
=
[
body
]
for
name
,
backup
in
self
.
backup_dict
.
iteritems
():
body
+=
''
,
name
,
' '
+
backup
.
formatSummary
(
upstream
)[
1
]
severity
[
backup
.
severity
or
backup
.
lagging
].
append
(
name
)
if
backup
.
monitor_changed
:
backup
.
monitor_changed
=
False
changed
.
add
(
name
)
body
=
'
\
n
'
.
join
(
body
)
if
changed
or
self
.
smtp_retry
<
time
():
logging
.
debug
(
'monitor notification'
)
email_list
=
self
.
email_list
while
email_list
:
# not a loop
msg
=
MIMEText
(
body
+
(
self
.
smtp_exc
or
''
))
msg
[
'Date'
]
=
formatdate
()
clusters
,
x
=
severity
[
1
:]
while
1
:
if
x
:
clusters
=
clusters
+
x
x
=
'PROBLEM'
elif
clusters
:
x
=
'WARNING'
else
:
x
=
'OK'
break
clusters
=
changed
.
intersection
(
clusters
)
if
clusters
:
x
+=
' (%s)'
%
', '
.
join
(
sorted
(
clusters
))
break
msg
[
'Subject'
]
=
'NEO monitoring: '
+
x
msg
[
'From'
]
=
self
.
email_from
msg
[
'To'
]
=
', '
.
join
(
email_list
)
s
=
self
.
smtp
try
:
s
.
connect
(
self
.
smtp_host
)
s
.
sendmail
(
None
,
email_list
,
msg
.
as_string
())
except
Exception
:
x
=
format_exc
()
logging
.
error
(
x
)
if
changed
or
not
self
.
smtp_exc
:
self
.
smtp_exc
=
(
"
\
n
\
n
A notification could not be sent at %s:
\
n
\
n
%s"
%
(
msg
[
'Date'
],
x
))
retry
=
self
.
smtp_retry
=
time
()
+
600
else
:
self
.
smtp_exc
=
None
self
.
smtp_retry
=
INF
if
not
(
self
.
operational
and
any
(
monitor
.
operational
for
monitor
in
self
.
backup_dict
.
itervalues
())):
break
retry
=
time
()
+
600
finally
:
s
.
close
()
self
.
em
.
setTimeout
(
retry
,
self
.
_notify
)
break
neoctl
=
self
.
asking_monitor_information
if
neoctl
:
del
severity
[
my_severity
][
0
]
if
self
.
smtp_exc
:
my_severity
=
2
body
+=
self
.
smtp_exc
severity
[
1
].
sort
()
severity
[
2
].
sort
()
severity
[
my_severity
].
insert
(
0
,
None
)
p
=
Packets
.
AnswerMonitorInformation
(
severity
[
1
],
severity
[
2
],
body
)
for
conn
,
msg_id
in
neoctl
:
try
:
conn
.
send
(
p
,
msg_id
)
except
ConnectionClosed
:
pass
del
self
.
asking_monitor_information
[:]
def
maybeNotify
(
self
,
name
):
try
:
self
.
notifying
.
remove
(
name
)
except
KeyError
:
return
self
.
_notify
(
False
)
def
sendPartitionTable
(
self
,
conn
,
min_offset
,
max_offset
,
uuid
):
pt
=
self
.
pt
if
max_offset
==
0
:
...
...
neo/admin/handler.py
View file @
c681f666
...
...
@@ -14,19 +14,19 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from
neo.lib
import
logging
,
protocol
from
neo.lib
import
logging
from
neo.lib.handler
import
EventHandler
from
neo.lib.protocol
import
uuid_str
,
Packets
from
neo.lib.protocol
import
uuid_str
,
\
NodeTypes
,
NotReadyError
,
Packets
,
ProtocolError
from
neo.lib.pt
import
PartitionTable
from
neo.lib.exception
import
PrimaryFailure
NOT_CONNECTED_MESSAGE
=
'Not connected to a primary master.'
def
AdminEventHandlerType
(
name
,
bases
,
d
):
def
check_primary_master
(
func
):
def
wrapper
(
self
,
*
args
,
**
kw
):
if
self
.
app
.
master_conn
is
not
None
:
return
func
(
self
,
*
args
,
**
kw
)
raise
protocol
.
NotReadyError
(
'Not connected to a primary master.'
)
return
wrapper
def
check_connection
(
func
):
return
lambda
self
,
conn
,
*
args
,
**
kw
:
\
self
.
_checkConnection
(
conn
)
and
func
(
self
,
conn
,
*
args
,
**
kw
)
def
forward_ask
(
klass
):
return
lambda
self
,
conn
,
*
args
:
self
.
app
.
master_conn
.
ask
(
...
...
@@ -47,7 +47,7 @@ def AdminEventHandlerType(name, bases, d):
Packets
.
TweakPartitionTable
,
):
d
[
x
.
handler_method_name
]
=
forward_ask
(
x
)
return
type
(
name
,
bases
,
{
k
:
v
if
k
[
0
]
==
'_'
else
check_
primary_master
(
v
)
return
type
(
name
,
bases
,
{
k
:
v
if
k
[
0
]
==
'_'
else
check_
connection
(
v
)
for
k
,
v
in
d
.
iteritems
()})
class
AdminEventHandler
(
EventHandler
):
...
...
@@ -55,6 +55,26 @@ class AdminEventHandler(EventHandler):
__metaclass__
=
AdminEventHandlerType
def
_checkConnection
(
self
,
conn
):
if
self
.
app
.
master_conn
is
None
:
raise
NotReadyError
(
NOT_CONNECTED_MESSAGE
)
return
True
def
requestIdentification
(
self
,
conn
,
node_type
,
uuid
,
address
,
name
,
*
_
):
if
node_type
!=
NodeTypes
.
ADMIN
:
raise
ProtocolError
(
"reject non-admin node"
)
app
=
self
.
app
try
:
backup
=
app
.
backup_dict
[
name
]
except
KeyError
:
raise
ProtocolError
(
"unknown backup cluster %r"
%
name
)
if
backup
.
conn
is
not
None
:
raise
ProtocolError
(
"already connected"
)
backup
.
conn
=
conn
conn
.
setHandler
(
app
.
backup_handler
)
conn
.
answer
(
Packets
.
AcceptIdentification
(
NodeTypes
.
ADMIN
,
None
,
None
))
def
askPartitionList
(
self
,
conn
,
min_offset
,
max_offset
,
uuid
):
logging
.
info
(
"ask partition list from %s to %s for %s"
,
min_offset
,
max_offset
,
uuid_str
(
uuid
))
...
...
@@ -83,6 +103,9 @@ class AdminEventHandler(EventHandler):
self
.
app
.
master_conn
.
send
(
Packets
.
FlushLog
())
super
(
AdminEventHandler
,
self
).
flushLog
(
conn
)
def
askMonitorInformation
(
self
,
conn
):
self
.
app
.
askMonitorInformation
(
conn
)
class
MasterEventHandler
(
EventHandler
):
""" This class is just used to dispatch message to right handler"""
...
...
@@ -104,13 +127,93 @@ class MasterEventHandler(EventHandler):
forward
.
send
(
packet
,
kw
[
'msg_id'
])
def
answerClusterState
(
self
,
conn
,
state
):
self
.
app
.
cluster_state
=
state
self
.
app
.
updateMonitorInformation
(
None
,
cluster_state
=
state
)
notifyClusterInformation
=
answerClusterState
def
sendPartitionTable
(
self
,
conn
,
ptid
,
num_replicas
,
row_list
):
pt
=
self
.
app
.
pt
=
object
.
__new__
(
PartitionTable
)
pt
.
load
(
ptid
,
num_replicas
,
row_list
,
self
.
app
.
nm
)
app
=
self
.
app
app
.
pt
=
object
.
__new__
(
PartitionTable
)
app
.
pt
.
load
(
ptid
,
num_replicas
,
row_list
,
app
.
nm
)
app
.
partitionTableUpdated
()
def
notifyPartitionChanges
(
self
,
conn
,
ptid
,
num_replicas
,
cell_list
):
self
.
app
.
pt
.
update
(
ptid
,
num_replicas
,
cell_list
,
self
.
app
.
nm
)
app
=
self
.
app
app
.
pt
.
update
(
ptid
,
num_replicas
,
cell_list
,
app
.
nm
)
app
.
partitionTableUpdated
()
def
notifyNodeInformation
(
self
,
*
args
):
super
(
MasterEventHandler
,
self
).
notifyNodeInformation
(
*
args
)
self
.
app
.
partitionTableUpdated
()
def
notifyUpstreamAdmin
(
self
,
conn
,
addr
):
app
=
self
.
app
node
=
app
.
upstream_admin
if
node
is
None
:
node
=
app
.
upstream_admin
=
app
.
nm
.
createAdmin
()
elif
node
.
getAddress
()
==
addr
:
return
node
.
setAddress
(
addr
)
if
app
.
upstream_admin_conn
:
app
.
upstream_admin_conn
.
close
()
else
:
app
.
connectToUpstreamAdmin
()
def
answerLastTransaction
(
self
,
conn
,
ltid
):
app
=
self
.
app
app
.
ltid
=
ltid
app
.
maybeNotify
(
None
)
def
answerRecovery
(
self
,
name
,
ptid
,
backup_tid
,
truncate_tid
):
self
.
app
.
backup_tid
=
backup_tid
def
monitor
(
func
):
def
wrapper
(
self
,
conn
,
*
args
,
**
kw
):
for
name
,
backup
in
self
.
app
.
backup_dict
.
iteritems
():
if
backup
.
conn
is
conn
:
return
func
(
self
,
name
,
*
args
,
**
kw
)
raise
AssertionError
return
wrapper
class
BackupHandler
(
EventHandler
):
@
monitor
def
connectionClosed
(
self
,
name
):
app
=
self
.
app
app
.
backup_dict
[
name
]
=
app
.
backup_dict
[
name
].
__class__
()
app
.
maybeNotify
(
name
)
@
monitor
def
notifyMonitorInformation
(
self
,
name
,
info
):
self
.
app
.
updateMonitorInformation
(
name
,
**
info
)
@
monitor
def
answerRecovery
(
self
,
name
,
ptid
,
backup_tid
,
truncate_tid
):
self
.
app
.
backup_dict
[
name
].
backup_tid
=
backup_tid
@
monitor
def
answerLastTransaction
(
self
,
name
,
ltid
):
app
=
self
.
app
app
.
backup_dict
[
name
].
ltid
=
ltid
app
.
maybeNotify
(
name
)
class
UpstreamAdminHandler
(
AdminEventHandler
):
def
_checkConnection
(
self
,
conn
):
assert
conn
is
self
.
app
.
upstream_admin_conn
return
super
(
UpstreamAdminHandler
,
self
).
_checkConnection
(
conn
)
def
connectionClosed
(
self
,
conn
):
app
=
self
.
app
if
conn
is
app
.
upstream_admin_conn
:
app
.
connectToUpstreamAdmin
()
connectionFailed
=
connectionClosed
def
_acceptIdentification
(
self
,
node
):
node
.
send
(
Packets
.
NotifyMonitorInformation
({
'cluster_state'
:
self
.
app
.
cluster_state
,
'down'
:
self
.
app
.
down
,
'pt_summary'
:
self
.
app
.
pt_summary
,
}))
neo/client/app.py
View file @
c681f666
...
...
@@ -228,7 +228,7 @@ class Application(ThreadedApplication):
node
=
node
,
dispatcher
=
self
.
dispatcher
)
p
=
Packets
.
RequestIdentification
(
NodeTypes
.
CLIENT
,
self
.
uuid
,
None
,
self
.
name
,
None
,
(),
()
)
self
.
uuid
,
None
,
self
.
name
,
None
,
{}
)
try
:
ask
(
conn
,
p
,
handler
=
handler
)
except
ConnectionClosed
:
...
...
@@ -270,7 +270,7 @@ class Application(ThreadedApplication):
conn
=
MTClientConnection
(
self
,
self
.
storage_event_handler
,
node
,
dispatcher
=
self
.
dispatcher
)
p
=
Packets
.
RequestIdentification
(
NodeTypes
.
CLIENT
,
self
.
uuid
,
None
,
self
.
name
,
self
.
id_timestamp
,
(),
()
)
self
.
uuid
,
None
,
self
.
name
,
self
.
id_timestamp
,
{}
)
try
:
self
.
_ask
(
conn
,
p
,
handler
=
self
.
storage_bootstrap_handler
)
except
ConnectionClosed
:
...
...
neo/lib/bootstrap.py
View file @
c681f666
...
...
@@ -26,15 +26,14 @@ class BootstrapManager(EventHandler):
Manage the bootstrap stage, lookup for the primary master then connect to it
"""
def
__init__
(
self
,
app
,
node_type
,
server
=
None
,
devpath
=
(),
new_nid
=
()
):
def
__init__
(
self
,
app
,
node_type
,
server
=
None
,
**
extra
):
"""
Manage the bootstrap stage of a non-master node, it lookup for the
primary master node, connect to it then returns when the master node
is ready.
"""
self
.
server
=
server
self
.
devpath
=
devpath
self
.
new_nid
=
new_nid
self
.
extra
=
extra
self
.
node_type
=
node_type
app
.
nm
.
reset
()
...
...
@@ -43,7 +42,7 @@ class BootstrapManager(EventHandler):
def
connectionCompleted
(
self
,
conn
):
EventHandler
.
connectionCompleted
(
self
,
conn
)
conn
.
ask
(
Packets
.
RequestIdentification
(
self
.
node_type
,
self
.
uuid
,
self
.
server
,
self
.
app
.
name
,
None
,
self
.
devpath
,
self
.
new_nid
))
self
.
server
,
self
.
app
.
name
,
None
,
self
.
extra
))
def
connectionFailed
(
self
,
conn
):
EventHandler
.
connectionFailed
(
self
,
conn
)
...
...
neo/lib/config.py
View file @
c681f666
...
...
@@ -18,6 +18,15 @@ import argparse, os, sys
from
functools
import
wraps
from
ConfigParser
import
SafeConfigParser
class
_DefaultList
(
list
):
"""
Special list type for default values of 'append' argparse actions,
so that the parser restarts from an empty list when the option is
used on the command-line.
"""
def
__copy__
(
self
):
return
[]
class
_Required
(
object
):
...
...
@@ -30,6 +39,8 @@ class _Required(object):
class
_Option
(
object
):
multiple
=
False
def
__init__
(
self
,
*
args
,
**
kw
):
if
len
(
args
)
>
1
:
self
.
short
,
self
.
name
=
args
...
...
@@ -51,7 +62,12 @@ class _Option(object):
action
.
required
=
_Required
(
option_list
,
self
.
name
)
def
fromConfigFile
(
self
,
cfg
,
section
):
return
self
(
cfg
.
get
(
section
,
self
.
name
.
replace
(
'-'
,
'_'
)))
value
=
cfg
.
get
(
section
,
self
.
name
.
replace
(
'-'
,
'_'
))
if
self
.
multiple
:
return
[
self
(
value
)
for
value
in
value
.
splitlines
()
if
value
]
return
self
(
value
)
@
staticmethod
def
parse
(
value
):
...
...
@@ -81,6 +97,11 @@ class Option(_Option):
kw
[
x
]
=
getattr
(
self
,
x
)
except
AttributeError
:
pass
if
self
.
multiple
:
kw
[
'action'
]
=
'append'
default
=
kw
.
get
(
'default'
)
if
default
:
kw
[
'default'
]
=
_DefaultList
(
default
)
return
kw
@
staticmethod
...
...
@@ -132,9 +153,6 @@ class OptionGroup(object):
class
Argument
(
Option
):
def
__init__
(
self
,
name
,
**
kw
):
super
(
Argument
,
self
).
__init__
(
name
,
**
kw
)
def
_asArgparse
(
self
,
parser
,
option_list
):
kw
=
{
'help'
:
self
.
help
,
'type'
:
self
}
for
x
in
'default'
,
'metavar'
,
'nargs'
,
'choices'
:
...
...
neo/lib/node.py
View file @
c681f666
...
...
@@ -28,7 +28,7 @@ class Node(object):
_connection
=
None
_identified
=
False
devpath
=
()
extra
=
{}
id_timestamp
=
None
def
__init__
(
self
,
manager
,
address
=
None
,
uuid
=
None
,
state
=
NodeStates
.
DOWN
):
...
...
neo/lib/protocol.py
View file @
c681f666
...
...
@@ -20,7 +20,7 @@ from msgpack import packb
# The protocol version must be increased whenever upgrading a node may require
# to upgrade other nodes.
PROTOCOL_VERSION
=
0
PROTOCOL_VERSION
=
1
# By encoding the handshake packet with msgpack, the whole NEO stream can be
# decoded with msgpack. The first byte is 0x92, which is different from TLS
# Handshake (0x16).
...
...
@@ -312,6 +312,8 @@ class Packet(object):
class
PacketRegistryFactory
(
dict
):
_next_code
=
0
def
__call__
(
self
,
name
,
base
,
d
):
for
k
,
v
in
d
.
items
():
if
isinstance
(
v
,
type
)
and
issubclass
(
v
,
Packet
):
...
...
@@ -323,10 +325,9 @@ class PacketRegistryFactory(dict):
def
register
(
self
,
doc
,
ignore_when_closed
=
None
,
request
=
False
,
error
=
False
,
_base
=
(
Packet
,),
**
kw
):
""" Register a packet in the packet registry """
code
=
len
(
self
)
if
doc
is
None
:
self
[
code
]
=
None
return
# None registered only to skip a code number (for compatibility)
code
=
self
.
_next_code
assert
code
<
RESPONSE_MASK
self
.
_next_code
=
code
+
1
if
error
and
not
request
:
assert
not
code
code
=
RESPONSE_MASK
...
...
@@ -826,6 +827,18 @@ class Packets(dict):
:nodes: ctl -> A -> M -> *
"""
)
AskMonitorInformation
,
AnswerMonitorInformation
=
request
(
"""
:nodes: ctl -> A
"""
)
NotifyMonitorInformation
=
notify
(
"""
:nodes: A -> A
"""
)
NotifyUpstreamAdmin
=
notify
(
"""
:nodes: M -> A
"""
)
del
notify
,
request
...
...
neo/lib/util.py
View file @
c681f666
...
...
@@ -39,7 +39,8 @@ nextafter()
TID_LOW_OVERFLOW
=
2
**
32
TID_LOW_MAX
=
TID_LOW_OVERFLOW
-
1
SECOND_PER_TID_LOW
=
60.0
/
TID_LOW_OVERFLOW
SECOND_FROM_UINT32
=
60.
/
TID_LOW_OVERFLOW
MICRO_FROM_UINT32
=
1e6
/
TID_LOW_OVERFLOW
TID_CHUNK_RULES
=
(
(
-
1900
,
0
),
(
-
1
,
12
),
...
...
@@ -52,7 +53,7 @@ def tidFromTime(tm):
gmt
=
gmtime
(
tm
)
return
packTID
(
(
gmt
.
tm_year
,
gmt
.
tm_mon
,
gmt
.
tm_mday
,
gmt
.
tm_hour
,
gmt
.
tm_min
),
int
((
gmt
.
tm_sec
+
(
tm
-
int
(
tm
)))
/
SECOND_
PER_TID_LOW
))
int
((
gmt
.
tm_sec
+
(
tm
-
int
(
tm
)))
/
SECOND_
FROM_UINT32
))
def
packTID
(
higher
,
lower
):
"""
...
...
@@ -95,15 +96,10 @@ def unpackTID(ptid):
higher
.
reverse
()
return
(
tuple
(
higher
),
lower
)
def
timeStringFromTID
(
ptid
):
"""
Return a string in the format "yyyy-mm-dd hh:mm:ss.ssssss" from a TID
"""
higher
,
lower
=
unpackTID
(
ptid
)
seconds
=
lower
*
SECOND_PER_TID_LOW
return
'%04d-%02d-%02d %02d:%02d:%09.6f'
%
(
higher
[
0
],
higher
[
1
],
higher
[
2
],
higher
[
3
],
higher
[
4
],
seconds
)
def
datetimeFromTID
(
tid
):
higher
,
lower
=
unpackTID
(
tid
)
seconds
,
lower
=
divmod
(
lower
*
60
,
TID_LOW_OVERFLOW
)
return
datetime
(
*
(
higher
+
(
seconds
,
int
(
lower
*
MICRO_FROM_UINT32
))))
def
addTID
(
ptid
,
offset
):
"""
...
...
neo/master/app.py
View file @
c681f666
...
...
@@ -182,12 +182,15 @@ class Application(BaseApplication):
self
.
playPrimaryRole
()
self
.
playSecondaryRole
()
def
getNodeInformation
Dict
(
self
,
node_list
):
def
getNodeInformation
Getter
(
self
,
node_list
):
node_dict
=
defaultdict
(
list
)
admin_dict
=
defaultdict
(
list
)
# group modified nodes by destination node type
for
node
in
node_list
:
node_info
=
node
.
asTuple
()
if
node
.
isAdmin
():
for
backup
in
node
.
extra
.
get
(
'backup'
,
()):
admin_dict
[
backup
].
append
(
node_info
)
continue
node_dict
[
NodeTypes
.
ADMIN
].
append
(
node_info
)
node_dict
[
NodeTypes
.
STORAGE
].
append
(
node_info
)
...
...
@@ -197,18 +200,27 @@ class Application(BaseApplication):
if
node
.
isStorage
():
continue
node_dict
[
NodeTypes
.
MASTER
].
append
(
node_info
)
return
node_dict
def
getNodeListFor
(
node
):
node_list
=
node_dict
.
get
(
node
.
getType
())
if
node
.
isClient
():
admin_list
=
admin_dict
.
get
(
node
.
extra
.
get
(
'backup'
))
if
admin_list
:
if
node_list
:
return
node_list
+
admin_list
return
admin_list
return
node_list
return
getNodeListFor
def
broadcastNodesInformation
(
self
,
node_list
):
"""
Broadcast changes for a set a nodes
Send only one packet per connection to reduce bandwidth
"""
node_dict
=
self
.
getNodeInformationDict
(
node_list
)
getNodeListFor
=
self
.
getNodeInformationGetter
(
node_list
)
now
=
monotonic_time
()
# send at most one non-empty notification packet per node
for
node
in
self
.
nm
.
getIdentifiedList
():
node_list
=
node_dict
.
get
(
node
.
getType
()
)
node_list
=
getNodeListFor
(
node
)
# We don't skip pending storage nodes because we don't send them
# the full list of nodes when they're added, and it's also quite
# useful to notify them about new masters.
...
...
neo/master/backup_app.py
View file @
c681f666
...
...
@@ -99,7 +99,8 @@ class BackupApplication(object):
pt
=
app
.
pt
while
True
:
app
.
changeClusterState
(
ClusterStates
.
STARTING_BACKUP
)
bootstrap
=
BootstrapManager
(
self
,
NodeTypes
.
CLIENT
)
bootstrap
=
BootstrapManager
(
self
,
NodeTypes
.
CLIENT
,
backup
=
app
.
name
)
# {offset -> node}
self
.
primary_partition_dict
=
{}
# [[tid]]
...
...
@@ -367,3 +368,9 @@ class BackupApplication(object):
uuid_str
(
cell
.
getUUID
()),
offset
,
dump
(
tid
),
uuid_str
(
node
.
getUUID
()))
cell
.
getNode
().
send
(
p
)
def
notifyUpstreamAdmin
(
self
,
addr
):
node_list
=
self
.
app
.
nm
.
getAdminList
(
only_identified
=
True
)
if
node_list
:
min
(
node_list
,
key
=
lambda
node
:
node
.
getUUID
()).
send
(
Packets
.
NotifyUpstreamAdmin
(
addr
))
neo/master/handlers/__init__.py
View file @
c681f666
...
...
@@ -52,7 +52,7 @@ class MasterHandler(EventHandler):
node_list
=
app
.
nm
.
getList
()
node_list
.
remove
(
node
)
node_list
=
([
node
.
asTuple
()]
# for id_timestamp
+
app
.
getNodeInformation
Dict
(
node_list
)[
node
.
getType
()]
)
+
app
.
getNodeInformation
Getter
(
node_list
)(
node
)
)
conn
.
send
(
Packets
.
NotifyNodeInformation
(
monotonic_time
(),
node_list
))
def
handlerSwitched
(
self
,
conn
,
new
):
...
...
neo/master/handlers/administration.py
View file @
c681f666
...
...
@@ -58,6 +58,12 @@ class AdministrationHandler(MasterHandler):
def
handlerSwitched
(
self
,
conn
,
new
):
assert
new
super
(
AdministrationHandler
,
self
).
handlerSwitched
(
conn
,
new
)
app
=
self
.
app
.
backup_app
if
app
is
not
None
:
for
node
in
app
.
nm
.
getAdminList
():
if
node
.
isRunning
():
app
.
notifyUpstreamAdmin
(
node
.
getAddress
())
break
def
connectionLost
(
self
,
conn
,
new_state
):
node
=
self
.
app
.
nm
.
getByUUID
(
conn
.
getUUID
())
...
...
neo/master/handlers/backup.py
View file @
c681f666
...
...
@@ -16,7 +16,7 @@
from
neo.lib.exception
import
PrimaryFailure
from
neo.lib.handler
import
EventHandler
from
neo.lib.protocol
import
ZERO_TID
from
neo.lib.protocol
import
NodeTypes
,
NodeStates
,
Packets
,
ZERO_TID
from
neo.lib.pt
import
PartitionTable
class
BackupHandler
(
EventHandler
):
...
...
@@ -36,6 +36,13 @@ class BackupHandler(EventHandler):
def
notifyPartitionChanges
(
self
,
conn
,
ptid
,
num_replicas
,
cell_list
):
self
.
app
.
pt
.
update
(
ptid
,
num_replicas
,
cell_list
,
self
.
app
.
nm
)
def
notifyNodeInformation
(
self
,
conn
,
timestamp
,
node_list
):
super
(
BackupHandler
,
self
).
notifyNodeInformation
(
conn
,
timestamp
,
node_list
)
for
node_type
,
addr
,
_
,
state
,
_
in
node_list
:
if
node_type
==
NodeTypes
.
ADMIN
and
state
==
NodeStates
.
RUNNING
:
self
.
app
.
notifyUpstreamAdmin
(
addr
)
def
answerLastTransaction
(
self
,
conn
,
tid
):
app
=
self
.
app
prev_tid
=
app
.
app
.
getLastTransaction
()
...
...
neo/master/handlers/identification.py
View file @
c681f666
...
...
@@ -24,7 +24,7 @@ from ..app import monotonic_time
class
IdentificationHandler
(
EventHandler
):
def
requestIdentification
(
self
,
conn
,
node_type
,
uuid
,
address
,
name
,
id_timestamp
,
devpath
,
new_nid
):
address
,
name
,
id_timestamp
,
extra
):
app
=
self
.
app
self
.
checkClusterName
(
name
)
if
address
==
app
.
server
:
...
...
@@ -60,6 +60,7 @@ class IdentificationHandler(EventHandler):
# cloned/evil/buggy node connecting to us
raise
ProtocolError
(
'already connected'
)
new_nid
=
extra
.
pop
(
'new_nid'
,
None
)
state
=
NodeStates
.
RUNNING
if
node_type
==
NodeTypes
.
CLIENT
:
if
app
.
cluster_state
==
ClusterStates
.
RUNNING
:
...
...
@@ -111,8 +112,7 @@ class IdentificationHandler(EventHandler):
uuid
=
uuid
,
address
=
address
)
else
:
node
.
setUUID
(
uuid
)
if
devpath
:
node
.
devpath
=
tuple
(
devpath
)
node
.
extra
=
extra
node
.
id_timestamp
=
monotonic_time
()
node
.
setState
(
state
)
app
.
broadcastNodesInformation
([
node
])
...
...
@@ -135,7 +135,7 @@ class IdentificationHandler(EventHandler):
class
SecondaryIdentificationHandler
(
EventHandler
):
def
requestIdentification
(
self
,
conn
,
node_type
,
uuid
,
address
,
name
,
id_timestamp
,
devpath
,
new_nid
):
address
,
name
,
id_timestamp
,
extra
):
app
=
self
.
app
self
.
checkClusterName
(
name
)
if
address
==
app
.
server
:
...
...
neo/master/handlers/master.py
View file @
c681f666
...
...
@@ -40,7 +40,7 @@ class ElectionHandler(SecondaryHandler):
super
(
ElectionHandler
,
self
).
connectionCompleted
(
conn
)
app
=
self
.
app
conn
.
ask
(
Packets
.
RequestIdentification
(
NodeTypes
.
MASTER
,
app
.
uuid
,
app
.
server
,
app
.
name
,
app
.
election
,
(),
()
))
app
.
uuid
,
app
.
server
,
app
.
name
,
app
.
election
,
{}
))
def
connectionFailed
(
self
,
conn
):
super
(
ElectionHandler
,
self
).
connectionFailed
(
conn
)
...
...
neo/master/pt.py
View file @
c681f666
...
...
@@ -250,7 +250,7 @@ class PartitionTable(neo.lib.pt.PartitionTable):
devpath_max
=
[]
devpaths
=
[()]
*
node_count
if
repeats
>
1
:
_devpaths
=
[
x
[
0
].
devpath
for
x
in
node_list
]
_devpaths
=
[
x
[
0
].
extra
.
get
(
'devpath'
,
())
for
x
in
node_list
]
max_depth
=
min
(
map
(
len
,
_devpaths
))
depth
=
0
while
1
:
...
...
neo/neoctl/app.py
View file @
c681f666
...
...
@@ -14,11 +14,11 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import
sys
import
json
,
sys
from
.neoctl
import
NeoCTL
,
NotReadyException
from
neo.lib.node
import
NodeManager
from
neo.lib.pt
import
PartitionTable
from
neo.lib.util
import
p64
,
u64
,
tidFromTime
,
timeStringFromTID
from
neo.lib.util
import
p64
,
u64
,
datetimeFromTID
,
tidFromTime
from
neo.lib.protocol
import
uuid_str
,
formatNodeList
,
\
ClusterStates
,
NodeStates
,
NodeTypes
,
UUID_NAMESPACES
,
ZERO_TID
...
...
@@ -29,6 +29,7 @@ action_dict = {
'node'
:
'getNodeList'
,
'cluster'
:
'getClusterState'
,
'primary'
:
'getPrimary'
,
'summary'
:
'getSummary'
,
},
'set'
:
{
'cluster'
:
'setClusterState'
,
...
...
@@ -100,12 +101,12 @@ class TerminalNeoCTL(object):
if
backup_tid
:
ltid
=
self
.
neoctl
.
getLastTransaction
()
r
=
"backup_tid = 0x%x (%s)"
%
(
u64
(
backup_tid
),
timeString
FromTID
(
backup_tid
))
datetime
FromTID
(
backup_tid
))
else
:
loid
,
ltid
=
self
.
neoctl
.
getLastIds
()
r
=
"last_oid = 0x%x"
%
(
u64
(
loid
))
return
r
+
"
\
n
last_tid = 0x%x (%s)
\
n
last_ptid = %s"
%
\
(
u64
(
ltid
),
timeString
FromTID
(
ltid
),
ptid
)
(
u64
(
ltid
),
datetime
FromTID
(
ltid
),
ptid
)
def
getPartitionRowList
(
self
,
params
):
"""
...
...
@@ -159,6 +160,21 @@ class TerminalNeoCTL(object):
assert
len
(
params
)
==
1
return
self
.
neoctl
.
setClusterState
(
self
.
asClusterState
(
params
[
0
]))
def
getSummary
(
self
,
params
):
"""
Get a summary of the health of this cluster and backups.
The first line reports severities: it is a commented json dump of
{severity: [backup_name | null]}
where severity is either "warning" or "problem"
and null refers to this cluster
"""
assert
len
(
params
)
==
0
warning
,
problem
,
summary
=
self
.
neoctl
.
getMonitorInformation
()
return
"# %s
\
n
%s"
%
(
json
.
dumps
({
k
:
v
for
k
,
v
in
zip
(
(
'warning'
,
'problem'
),
(
warning
,
problem
),
)
if
v
}),
summary
)
def
setNumReplicas
(
self
,
params
):
"""
Set number of replicas.
...
...
neo/neoctl/handler.py
View file @
c681f666
...
...
@@ -64,3 +64,4 @@ class CommandEventHandler(EventHandler):
answerLastTransaction
=
__answer
(
Packets
.
AnswerLastTransaction
)
answerRecovery
=
__answer
(
Packets
.
AnswerRecovery
)
answerTweakPartitionTable
=
__answer
(
Packets
.
AnswerTweakPartitionTable
)
answerMonitorInformation
=
__answer
(
Packets
.
AnswerMonitorInformation
)
neo/neoctl/neoctl.py
View file @
c681f666
...
...
@@ -216,3 +216,9 @@ class NeoCTL(BaseApplication):
conn
.
send
(
Packets
.
FlushLog
())
while
conn
.
pending
():
self
.
em
.
poll
(
1
)
def
getMonitorInformation
(
self
):
response
=
self
.
__ask
(
Packets
.
AskMonitorInformation
())
if
response
[
0
]
!=
Packets
.
AnswerMonitorInformation
:
raise
RuntimeError
(
response
)
return
response
[
1
:]
neo/storage/app.py
View file @
c681f666
...
...
@@ -252,7 +252,7 @@ class Application(BaseApplication):
# search, find, connect and identify to the primary master
bootstrap
=
BootstrapManager
(
self
,
NodeTypes
.
STORAGE
,
None
if
self
.
new_nid
else
self
.
server
,
self
.
devpath
,
self
.
new_nid
)
devpath
=
self
.
devpath
,
new_nid
=
self
.
new_nid
)
self
.
master_node
,
self
.
master_conn
=
bootstrap
.
getPrimaryConnection
()
self
.
dm
.
setUUID
(
self
.
uuid
)
...
...
neo/storage/checker.py
View file @
c681f666
...
...
@@ -51,7 +51,7 @@ class Checker(object):
else
:
conn
=
ClientConnection
(
app
,
StorageOperationHandler
(
app
),
node
)
conn
.
ask
(
Packets
.
RequestIdentification
(
NodeTypes
.
STORAGE
,
uuid
,
app
.
server
,
name
,
app
.
id_timestamp
,
(),
()
))
uuid
,
app
.
server
,
name
,
app
.
id_timestamp
,
{}
))
self
.
conn_dict
[
conn
]
=
node
.
isIdentified
()
conn_set
=
set
(
self
.
conn_dict
)
conn_set
.
discard
(
None
)
...
...
neo/storage/handlers/identification.py
View file @
c681f666
...
...
@@ -32,7 +32,7 @@ class IdentificationHandler(EventHandler):
return
self
.
app
.
nm
def
requestIdentification
(
self
,
conn
,
node_type
,
uuid
,
address
,
name
,
id_timestamp
,
devpath
,
new_nid
):
id_timestamp
,
extra
):
self
.
checkClusterName
(
name
)
app
=
self
.
app
# reject any incoming connections if not ready
...
...
neo/storage/replicator.py
View file @
c681f666
...
...
@@ -350,7 +350,7 @@ class Replicator(object):
try
:
conn
.
ask
(
Packets
.
RequestIdentification
(
NodeTypes
.
STORAGE
,
None
if
name
else
app
.
uuid
,
app
.
server
,
name
or
app
.
name
,
app
.
id_timestamp
,
(),
()
))
app
.
id_timestamp
,
{}
))
except
ConnectionClosed
:
if
previous_node
is
self
.
current_node
:
return
...
...
neo/tests/functional/testCluster.py
View file @
c681f666
...
...
@@ -14,12 +14,21 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from
functools
import
partial
import
unittest
import
transaction
from
neo.lib.protocol
import
NodeStates
from
neo.neoctl.app
import
TerminalNeoCTL
from
.
import
NEOCluster
,
NEOFunctionalTest
class
TerminalNeoCTL
(
TerminalNeoCTL
):
def
__init__
(
self
,
cluster
):
self
.
neoctl
=
cluster
.
neoctl
def
__del__
(
self
):
pass
class
ClusterTests
(
NEOFunctionalTest
):
def
_tearDown
(
self
,
success
):
...
...
@@ -118,12 +127,20 @@ class ClusterTests(NEOFunctionalTest):
self
.
neo
.
start
()
self
.
neo
.
expectClusterRunning
()
self
.
neo
.
expectOudatedCells
(
0
)
# check neoctl cli
getSummary
=
partial
(
TerminalNeoCTL
(
self
.
neo
).
getSummary
,
())
ok_empty
=
'# {}
\
n
RUNNING;'
\
' UP_TO_DATE=1; ltid=0000000000000000 (1900-01-01 00:00:00)'
self
.
assertEqual
(
getSummary
(),
ok_empty
)
# connect a client a check it's known
db
,
conn
=
self
.
neo
.
getZODBConnection
()
self
.
assertEqual
(
len
(
self
.
neo
.
getClientlist
()),
1
)
# drop the storage, the cluster is no more operational...
self
.
neo
.
getStorageProcessList
()[
0
].
stop
()
self
.
neo
.
expectClusterRecovering
()
# check severity returned by the cli
self
.
assertEqual
(
getSummary
(),
'# {"problem": [null]}
\
n
RECOVERING; UP_TO_DATE=1; DOWN=1'
)
# ...and the client gets disconnected
self
.
assertEqual
(
len
(
self
.
neo
.
getClientlist
()),
0
)
# restart storage so that the cluster is operational again
...
...
@@ -134,6 +151,9 @@ class ClusterTests(NEOFunctionalTest):
conn
.
root
()[
'plop'
]
=
1
transaction
.
commit
()
self
.
assertEqual
(
len
(
self
.
neo
.
getClientlist
()),
1
)
summary
=
getSummary
()
self
.
assertTrue
(
summary
.
startswith
(
'# {}
\
n
RUNNING;'
),
summary
)
self
.
assertNotEqual
(
summary
,
ok_empty
)
def
testStorageLostDuringRecovery
(
self
):
"""
...
...
neo/tests/master/testMasterPT.py
View file @
c681f666
...
...
@@ -325,7 +325,7 @@ class MasterPartitionTableTests(NeoUnitTestBase):
pt
.
make
(
sn
)
pt
.
log
()
for
i
,
s
in
enumerate
(
sn
,
sn_count
):
s
.
devpath
=
tuple
(
bin
(
i
)[
3
:
-
1
])
s
.
extra
=
{
'devpath'
:
tuple
(
bin
(
i
)[
3
:
-
1
])}
self
.
assertEqual
(
Counter
(
x
[
2
]
for
x
in
self
.
tweak
(
pt
)),
{
CellStates
.
OUT_OF_DATE
:
96
,
CellStates
.
FEEDING
:
96
,
...
...
@@ -360,12 +360,12 @@ class MasterPartitionTableTests(NeoUnitTestBase):
assert
len
(
topo
)
<=
sn_count
sn2
=
sn
[:
len
(
topo
)]
for
s
in
sn2
:
s
.
devpath
=
()
s
.
extra
=
{}
k
=
(
1
,
7
)[
even
]
pt
=
PartitionTable
(
np
*
k
,
i
)
pt
.
make
(
sn2
)
for
devpath
,
s
in
zip
(
topo
,
sn2
):
s
.
devpath
=
tuple
(
devpath
)
s
.
extra
=
{
'devpath'
:
tuple
(
devpath
)}
if
type
(
expected
)
is
tuple
:
self
.
assertTrue
(
self
.
tweak
(
pt
))
self
.
update
(
pt
)
...
...
neo/tests/protocol
View file @
c681f666
...
...
@@ -16,6 +16,7 @@ AnswerInformationLocked(p64)
AnswerLastIDs(?p64,?p64)
AnswerLastTransaction(p64)
AnswerLockedTransactions({p64:?p64})
AnswerMonitorInformation([?bin],[?bin],bin)
AnswerNewOIDs([p64])
AnswerNodeList([(NodeTypes,?(bin,int),?int,NodeStates,?float)])
AnswerObject(p64,p64,?p64,?int,bin,bin,?p64)
...
...
@@ -50,6 +51,7 @@ AskLastIDs()
AskLastTransaction()
AskLockInformation(p64,p64)
AskLockedTransactions()
AskMonitorInformation()
AskNewOIDs(int)
AskNodeList(NodeTypes)
AskObject(p64,?p64,?p64)
...
...
@@ -77,6 +79,7 @@ InvalidateObjects(p64,[p64])
NotPrimaryMaster(?int,[(bin,int)])
NotifyClusterInformation(ClusterStates)
NotifyDeadlock(p64,p64)
NotifyMonitorInformation({bin:any})
NotifyNodeInformation(float,[(NodeTypes,?(bin,int),?int,NodeStates,?float)])
NotifyPartitionChanges(int,int,[(int,int,CellStates)])
NotifyPartitionCorrupted(int,[int])
...
...
@@ -85,11 +88,12 @@ NotifyRepair(bool)
NotifyReplicationDone(int,p64)
NotifyTransactionFinished(p64,p64)
NotifyUnlockInformation(p64)
NotifyUpstreamAdmin((bin,int))
Ping()
Pong()
Repair([int],bool)
Replicate(p64,bin,{int:?(bin,int)})
RequestIdentification(NodeTypes,?int,?(bin,int),bin,?float,
any,[int]
)
RequestIdentification(NodeTypes,?int,?(bin,int),bin,?float,
{bin:any}
)
SendPartitionTable(?int,int,[[(int,CellStates)]])
SetClusterState(ClusterStates)
SetNodeState(int,NodeStates)
...
...
neo/tests/threaded/__init__.py
View file @
c681f666
...
...
@@ -20,6 +20,7 @@ import os, random, select, socket, sys, tempfile
import
thread
,
threading
,
time
,
traceback
,
weakref
from
collections
import
deque
from
contextlib
import
contextmanager
from
email
import
message_from_string
from
itertools
import
count
from
functools
import
partial
,
wraps
from
zlib
import
decompress
...
...
@@ -301,6 +302,14 @@ class TestSerialized(Serialized):
return
self
.
_epoll
.
poll
(
timeout
)
class
FakeSMTP
(
list
):
close
=
connect
=
lambda
*
_
:
None
def
sendmail
(
self
,
*
args
):
self
.
append
(
args
)
class
Node
(
object
):
def
getConnectionList
(
self
,
*
peers
):
...
...
@@ -421,7 +430,11 @@ class ServerNode(Node):
self
.
em
.
wakeup
(
thread
.
exit
)
class
AdminApplication
(
ServerNode
,
neo
.
admin
.
app
.
Application
):
pass
def
__setattr__
(
self
,
name
,
value
):
if
name
==
'smtp'
:
value
=
FakeSMTP
()
super
(
AdminApplication
,
self
).
__setattr__
(
name
,
value
)
class
MasterApplication
(
ServerNode
,
neo
.
master
.
app
.
Application
):
pass
...
...
@@ -691,6 +704,9 @@ class NEOCluster(object):
self
.
_resource_dict
[
result
]
=
self
return
result
[
1
]
def
_allocateName
(
self
,
_new
=
lambda
:
random
.
randint
(
0
,
100
)):
return
'neo_%s'
%
self
.
_allocate
(
'name'
,
_new
)
@
staticmethod
def
_patch
():
cls
=
NEOCluster
...
...
@@ -717,10 +733,10 @@ class NEOCluster(object):
def
__init__
(
self
,
master_count
=
1
,
partitions
=
1
,
replicas
=
0
,
upstream
=
None
,
adapter
=
os
.
getenv
(
'NEO_TESTS_ADAPTER'
,
'SQLite'
),
storage_count
=
None
,
db_list
=
None
,
clear_databases
=
True
,
compress
=
True
,
compress
=
True
,
backup_count
=
0
,
importer
=
None
,
autostart
=
None
,
dedup
=
False
,
name
=
None
):
self
.
name
=
name
or
'neo_%s'
%
self
.
_allocate
(
'name'
,
lambda
:
random
.
randint
(
0
,
100
))
self
.
name
=
name
or
self
.
_allocateName
()
self
.
backup_list
=
[
self
.
_allocateName
()
for
x
in
xrange
(
backup_count
)]
self
.
compress
=
compress
self
.
num_partitions
=
partitions
master_list
=
[
MasterApplication
.
newAddress
()
...
...
@@ -759,6 +775,9 @@ class NEOCluster(object):
kw
[
'wait'
]
=
0
self
.
storage_list
=
[
StorageApplication
(
database
=
db
(
x
),
**
kw
)
for
x
in
db_list
]
kw
[
'monitor_email'
]
=
self
.
name
,
if
backup_count
:
kw
[
'monitor_backup'
]
=
self
.
backup_list
self
.
admin_list
=
[
AdminApplication
(
**
kw
)]
def
__repr__
(
self
):
...
...
@@ -1133,6 +1152,23 @@ class NEOThreadedTest(NeoTestBase):
ob
.
_p_activate
()
ob
.
_p_jar
.
readCurrent
(
ob
)
def
assertNoMonitorInformation
(
self
,
cluster
):
self
.
assertFalse
(
cluster
.
admin
.
smtp
)
def
assertMonitor
(
self
,
cluster
,
severity
,
summary
,
*
backups
):
msg
=
message_from_string
(
cluster
.
admin
.
smtp
.
pop
(
0
)[
2
])
self
.
assertIn
((
'OK'
,
'WARNING'
,
'PROBLEM'
)[
severity
],
msg
[
'subject'
])
msg
=
msg
.
get_payload
().
splitlines
()
def
assertStartsWith
(
a
,
b
):
self
.
assertTrue
(
a
.
startswith
(
b
),
(
a
,
b
))
assertStartsWith
(
msg
.
pop
(
0
),
summary
)
expected
=
{
k
.
name
:
v
for
k
,
v
in
backups
}
while
msg
:
self
.
assertFalse
(
msg
.
pop
(
0
))
x
=
expected
.
pop
(
msg
.
pop
(
0
))
assertStartsWith
(
msg
.
pop
(
0
),
' %s'
%
x
)
self
.
assertFalse
(
expected
)
class
ThreadId
(
list
):
...
...
neo/tests/threaded/testReplication.py
View file @
c681f666
# -*- coding: utf-8 -*-
#
# Copyright (C) 2012-2019 Nexedi SA
#
...
...
@@ -41,10 +42,14 @@ from .test import PCounter, PCounterWithResolution # XXX
def
backup_test
(
partitions
=
1
,
upstream_kw
=
{},
backup_kw
=
{}):
def
decorator
(
wrapped
):
def
wrapper
(
self
):
with
NEOCluster
(
partitions
=
partitions
,
**
upstream_kw
)
as
upstream
:
with
NEOCluster
(
partitions
=
partitions
,
backup_count
=
1
,
**
upstream_kw
)
as
upstream
:
upstream
.
start
()
name
,
=
upstream
.
backup_list
with
NEOCluster
(
partitions
=
partitions
,
upstream
=
upstream
,
**
backup_kw
)
as
backup
:
name
=
name
,
**
backup_kw
)
as
backup
:
self
.
assertMonitor
(
upstream
,
2
,
'RECOVERING'
,
(
backup
,
None
))
backup
.
start
()
backup
.
neoctl
.
setClusterState
(
ClusterStates
.
STARTING_BACKUP
)
self
.
tic
()
...
...
@@ -321,6 +326,10 @@ class ReplicationTests(NEOThreadedTest):
delay
=
f
.
delayNotifyUnlockInformation
()
t1
.
commit
()
self
.
tic
()
warning
,
problem
,
msg
=
upstream
.
neoctl
.
getMonitorInformation
()
self
.
assertEqual
(
warning
,
(
backup
.
name
,))
self
.
assertFalse
(
problem
)
self
.
assertTrue
(
msg
.
endswith
(
'lag=ε'
),
msg
)
def
storeObject
(
orig
,
*
args
,
**
kw
):
p
.
revert
()
f
.
remove
(
delay
)
...
...
@@ -331,6 +340,10 @@ class ReplicationTests(NEOThreadedTest):
t1
.
begin
()
self
.
assertEqual
(
5
,
ob
.
value
)
self
.
assertEqual
(
1
,
self
.
checkBackup
(
backup
))
warning
,
problem
,
msg
=
upstream
.
neoctl
.
getMonitorInformation
()
self
.
assertFalse
(
warning
)
self
.
assertFalse
(
problem
)
self
.
assertTrue
(
msg
.
endswith
(
'lag=0.0'
),
msg
)
@
with_cluster
()
def
testBackupEarlyInvalidation
(
self
,
upstream
):
...
...
@@ -761,6 +774,22 @@ class ReplicationTests(NEOThreadedTest):
@
backup_test
(
2
,
backup_kw
=
dict
(
replicas
=
1
))
def
testResumingBackupReplication
(
self
,
backup
):
upstream
=
backup
.
upstream
for
monitor
in
'RECOVERING'
,
'VERIFYING'
,
'RUNNING'
:
monitor
+=
'; UP_TO_DATE=2'
self
.
assertMonitor
(
upstream
,
2
,
monitor
,
(
backup
,
None
))
self
.
assertMonitor
(
upstream
,
0
,
monitor
,
(
backup
,
'BACKINGUP; UP_TO_DATE=4;'
))
def
checkMonitor
():
self
.
assertMonitor
(
upstream
,
2
,
monitor
,
(
backup
,
'BACKINGUP; OUT_OF_DATE=2, UP_TO_DATE=2; DOWN=1;'
))
self
.
assertNoMonitorInformation
(
upstream
)
warning
,
problem
,
_
=
upstream
.
neoctl
.
getMonitorInformation
()
self
.
assertFalse
(
warning
)
self
.
assertEqual
(
problem
,
(
backup
.
name
,))
warning
,
problem
,
_
=
backup
.
neoctl
.
getMonitorInformation
()
self
.
assertFalse
(
warning
)
self
.
assertEqual
(
problem
,
(
None
,))
t
,
c
=
upstream
.
getTransaction
()
r
=
c
.
root
()
r
[
1
]
=
PCounter
()
...
...
@@ -789,11 +818,18 @@ class ReplicationTests(NEOThreadedTest):
return
x
.
pop
(
conn
.
getUUID
(),
1
)
newTransaction
()
self
.
assertEqual
(
getBackupTid
(),
tids
[
1
])
self
.
assertNoMonitorInformation
(
upstream
)
primary
.
stop
()
backup
.
join
((
primary
,))
primary
.
resetNode
()
checkMonitor
()
primary
.
start
()
self
.
tic
()
self
.
assertMonitor
(
upstream
,
1
,
monitor
,
(
backup
,
'BACKINGUP; OUT_OF_DATE=2, UP_TO_DATE=2; ltid='
))
warning
,
problem
,
_
=
backup
.
neoctl
.
getMonitorInformation
()
self
.
assertEqual
(
warning
,
(
None
,))
self
.
assertFalse
(
problem
)
primary
,
slave
=
slave
,
primary
self
.
assertEqual
(
tids
,
getTIDList
(
slave
))
self
.
assertEqual
(
tids
[:
1
],
getTIDList
(
primary
))
...
...
@@ -803,6 +839,11 @@ class ReplicationTests(NEOThreadedTest):
self
.
assertEqual
(
4
,
self
.
checkBackup
(
backup
))
self
.
assertEqual
(
getBackupTid
(
min
),
tids
[
1
])
self
.
assertMonitor
(
upstream
,
1
,
monitor
,
(
backup
,
'BACKINGUP; OUT_OF_DATE=1, UP_TO_DATE=3; ltid='
))
self
.
assertMonitor
(
upstream
,
0
,
monitor
,
(
backup
,
'BACKINGUP; UP_TO_DATE=4;'
))
# Check that replication resumes from the maximum possible tid
# (for UP_TO_DATE cells of a backup cluster). More precisely:
# - cells are handled independently (done here by blocking replication
...
...
@@ -811,6 +852,7 @@ class ReplicationTests(NEOThreadedTest):
# we interrupt replication of obj in the middle of a transaction)
slave
.
stop
()
backup
.
join
((
slave
,))
checkMonitor
()
ask
=
[]
def
delayReplicate
(
conn
,
packet
):
if
isinstance
(
packet
,
Packets
.
AskFetchObjects
):
...
...
@@ -820,16 +862,28 @@ class ReplicationTests(NEOThreadedTest):
return
ask
.
append
(
packet
.
_args
)
conn
,
=
upstream
.
master
.
getConnectionList
(
backup
.
master
)
admins
=
upstream
.
admin
,
backup
.
admin
with
ConnectionFilter
()
as
f
,
Patch
(
replicator
.
Replicator
,
_nextPartitionSortKey
=
lambda
orig
,
self
,
offset
:
offset
):
f
.
add
(
delayReplicate
)
delayReconnect
=
f
.
delayAskLastTransaction
()
delayReconnect
=
f
.
delayAskLastTransaction
(
lambda
conn
:
self
.
getConnectionApp
(
conn
)
not
in
admins
)
# Without the following delay, the upstream admin may be notified
# that the backup is back in BACKINGUP state before getting the
# last tid (from the upstream master); note that in such case,
# we would have 2 consecutive identical notifications.
delayMonitor
=
f
.
delayNotifyMonitorInformation
(
lambda
_
,
x
=
iter
((
0
,)):
next
(
x
,
1
))
conn
.
close
()
newTransaction
()
self
.
assertMonitor
(
upstream
,
2
,
monitor
,
(
backup
,
'STARTING_BACKUP; OUT_OF_DATE=2, UP_TO_DATE=2; DOWN=1'
))
f
.
remove
(
delayMonitor
)
newTransaction
()
checkMonitor
()
newTransaction
()
self
.
assertFalse
(
ask
)
self
.
assertEqual
(
f
.
filtered_count
,
1
)
self
.
assertEqual
(
f
.
filtered_count
,
2
)
with
Patch
(
replicator
,
FETCH_COUNT
=
1
):
f
.
remove
(
delayReconnect
)
self
.
tic
()
...
...
@@ -859,6 +913,7 @@ class ReplicationTests(NEOThreadedTest):
])
self
.
tic
()
self
.
assertEqual
(
2
,
self
.
checkBackup
(
backup
))
checkMonitor
()
@
with_cluster
(
start_cluster
=
0
,
replicas
=
1
)
def
testStoppingDuringReplication
(
self
,
cluster
):
...
...
tools/stress
View file @
c681f666
...
...
@@ -17,7 +17,7 @@ from neo.lib.connector import SocketConnector
from
neo.lib.debug
import
PdbSocket
from
neo.lib.node
import
Node
from
neo.lib.protocol
import
NodeTypes
from
neo.lib.util
import
timeString
FromTID
,
p64
,
u64
from
neo.lib.util
import
datetime
FromTID
,
p64
,
u64
from
neo.storage.app
import
DATABASE_MANAGER_DICT
,
\
Application
as
StorageApplication
from
neo.tests
import
getTempDirectory
,
mysql_pool
...
...
@@ -533,7 +533,7 @@ class Application(StressApplication):
ltid
=
self
.
ltid
stdscr
.
addstr
(
y
,
0
,
'last oid: 0x%x
\
n
last tid: 0x%x (%s)
\
n
clients: '
%
(
u64
(
self
.
loid
),
u64
(
ltid
),
timeString
FromTID
(
ltid
)))
%
(
u64
(
self
.
loid
),
u64
(
ltid
),
datetime
FromTID
(
ltid
)))
before
=
after
=
0
for
i
,
p
in
enumerate
(
self
.
cluster
.
process_dict
[
Client
]):
if
i
:
...
...
@@ -708,7 +708,7 @@ def main():
ok
=
tid
finally
:
conn
.
close
()
print
(
'bad: 0x%x (%s)'
%
(
u64
(
bad
),
timeString
FromTID
(
bad
)))
print
(
'bad: 0x%x (%s)'
%
(
u64
(
bad
),
datetime
FromTID
(
bad
)))
finally
:
db
.
close
()
finally
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment