Commit 2b321c02 authored by Julien Muchembled's avatar Julien Muchembled

WIP: Make admin node a web-app

The goal is to get rid off the neoctl command-line tool, and to manage the
cluster via a web browser, or tools like 'wget'. Then, it will be possible to
provide an web user interface to connect to the underlying DB of any storage
node, usually a SQL client.

The design of admin app is finished:
- it's threaded like clients
- it's a WSGI app

I also hacked a HTTP API as quickly as possible to make all tests pass.

TODO:
- define a better HTTP API
- there's no UI at all yet
- remove all unused packets from the protocol (those that were only used
  between neoctl and admin node)

There's currently no UI implemented.

There are a few dead files, not deleted yet, in case that they contain a few
pieces of useful code:
 neo/neoctl/app.py
 neo/neoctl/handler.py
 neo/scripts/neoctl.py
parent 9bd14bf2
This diff is collapsed.
...@@ -14,73 +14,18 @@ ...@@ -14,73 +14,18 @@
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
from neo.lib import logging, protocol from bottle import HTTPError
from neo.lib.handler import EventHandler from neo.lib import logging
from neo.lib.protocol import uuid_str, Packets from neo.lib.handler import AnswerBaseHandler, EventHandler, MTEventHandler
from neo.lib.exception import PrimaryFailure from neo.lib.exception import PrimaryFailure
def check_primary_master(func):
def wrapper(self, *args, **kw):
if self.app.bootstrapped:
return func(self, *args, **kw)
raise protocol.NotReadyError('Not connected to a primary master.')
return wrapper
def forward_ask(klass):
return check_primary_master(lambda self, conn, *args, **kw:
self.app.master_conn.ask(klass(*args, **kw),
conn=conn, msg_id=conn.getPeerId()))
class AdminEventHandler(EventHandler):
"""This class deals with events for administrating cluster."""
@check_primary_master
def askPartitionList(self, conn, min_offset, max_offset, uuid):
logging.info("ask partition list from %s to %s for %s",
min_offset, max_offset, uuid_str(uuid))
self.app.sendPartitionTable(conn, min_offset, max_offset, uuid)
@check_primary_master
def askNodeList(self, conn, node_type):
if node_type is None:
node_type = 'all'
node_filter = None
else:
node_filter = lambda n: n.getType() is node_type
logging.info("ask list of %s nodes", node_type)
node_list = self.app.nm.getList(node_filter)
node_information_list = [node.asTuple() for node in node_list ]
p = Packets.AnswerNodeList(node_information_list)
conn.answer(p)
@check_primary_master
def askClusterState(self, conn):
conn.answer(Packets.AnswerClusterState(self.app.cluster_state))
@check_primary_master
def askPrimary(self, conn):
master_node = self.app.master_node
conn.answer(Packets.AnswerPrimary(master_node.getUUID()))
askLastIDs = forward_ask(Packets.AskLastIDs)
askLastTransaction = forward_ask(Packets.AskLastTransaction)
addPendingNodes = forward_ask(Packets.AddPendingNodes)
tweakPartitionTable = forward_ask(Packets.TweakPartitionTable)
setClusterState = forward_ask(Packets.SetClusterState)
setNodeState = forward_ask(Packets.SetNodeState)
checkReplicas = forward_ask(Packets.CheckReplicas)
class MasterEventHandler(EventHandler): class MasterEventHandler(EventHandler):
""" This class is just used to dispacth message to right handler"""
def _connectionLost(self, conn): def _connectionLost(self, conn):
app = self.app conn.cancelRequests("connection to master lost")
if app.listening_conn: # if running self.app.nm.getByUUID(conn.getUUID()).setUnknown()
assert app.master_conn in (conn, None) if self.app.master_conn is not None:
conn.cancelRequests("connection to master lost") assert self.app.master_conn is conn
app.reset()
app.uuid = None
raise PrimaryFailure raise PrimaryFailure
def connectionFailed(self, conn): def connectionFailed(self, conn):
...@@ -89,18 +34,6 @@ class MasterEventHandler(EventHandler): ...@@ -89,18 +34,6 @@ class MasterEventHandler(EventHandler):
def connectionClosed(self, conn): def connectionClosed(self, conn):
self._connectionLost(conn) self._connectionLost(conn)
def dispatch(self, conn, packet, kw={}):
if 'conn' in kw:
# expected answer
if packet.isResponse():
packet.setId(kw['msg_id'])
kw['conn'].answer(packet)
else:
self.app.request_handler.dispatch(conn, packet, kw)
else:
# unexpected answers and notifications
super(MasterEventHandler, self).dispatch(conn, packet, kw)
def answerClusterState(self, conn, state): def answerClusterState(self, conn, state):
self.app.cluster_state = state self.app.cluster_state = state
...@@ -109,23 +42,22 @@ class MasterEventHandler(EventHandler): ...@@ -109,23 +42,22 @@ class MasterEventHandler(EventHandler):
# implemented for factorize code (as done for bootstrap) # implemented for factorize code (as done for bootstrap)
logging.debug("answerNodeInformation") logging.debug("answerNodeInformation")
def notifyPartitionChanges(self, conn, ptid, cell_list): def notifyNodeInformation(self, conn, node_list):
self.app.pt.update(ptid, cell_list, self.app.nm) self.app.nm.update(node_list)
def answerPartitionTable(self, conn, ptid, row_list): def answerPartitionTable(self, conn, ptid, row_list):
self.app.pt.load(ptid, row_list, self.app.nm) self.app.pt.load(ptid, row_list, self.app.nm)
self.app.bootstrapped = True
def sendPartitionTable(self, conn, ptid, row_list): class MasterNotificationsHandler(MasterEventHandler, MTEventHandler):
if self.app.bootstrapped:
self.app.pt.load(ptid, row_list, self.app.nm)
def notifyClusterInformation(self, conn, cluster_state): notifyClusterInformation = MasterEventHandler.answerClusterState.im_func
self.app.cluster_state = cluster_state sendPartitionTable = MasterEventHandler.answerPartitionTable.im_func
def notifyNodeInformation(self, conn, node_list): def notifyPartitionChanges(self, conn, ptid, cell_list):
self.app.nm.update(node_list) self.app.pt.update(ptid, cell_list, self.app.nm)
class MasterRequestEventHandler(EventHandler): class PrimaryAnswersHandler(AnswerBaseHandler):
""" This class handle all answer from primary master node""" """ This class handle all answer from primary master node"""
# XXX: to be deleted ?
def protocolError(self, conn, message):
raise HTTPError(400, message)
...@@ -592,6 +592,11 @@ class ClientConnection(Connection): ...@@ -592,6 +592,11 @@ class ClientConnection(Connection):
handler.connectionStarted(self) handler.connectionStarted(self)
self._connect() self._connect()
def convertToMT(self, dispatcher):
assert self.__class__ is ClientConnection, self
self.__class__ = MTClientConnection
self._initMT(dispatcher)
def _connect(self): def _connect(self):
try: try:
connected = self.connector.makeClientConnection() connected = self.connector.makeClientConnection()
...@@ -688,11 +693,14 @@ class MTClientConnection(ClientConnection): ...@@ -688,11 +693,14 @@ class MTClientConnection(ClientConnection):
return wrapper return wrapper
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
self.lock = lock = RLock() self._initMT(kwargs.pop('dispatcher'))
self.dispatcher = kwargs.pop('dispatcher') with self.lock:
with lock:
super(MTClientConnection, self).__init__(*args, **kwargs) super(MTClientConnection, self).__init__(*args, **kwargs)
def _initMT(self, dispatcher):
self.lock = RLock()
self.dispatcher = dispatcher
def ask(self, packet, timeout=CRITICAL_TIMEOUT, on_timeout=None, def ask(self, packet, timeout=CRITICAL_TIMEOUT, on_timeout=None,
queue=None, **kw): queue=None, **kw):
with self.lock: with self.lock:
......
...@@ -135,10 +135,16 @@ class ThreadedApplication(BaseApplication): ...@@ -135,10 +135,16 @@ class ThreadedApplication(BaseApplication):
handler.dispatch(conn, packet, kw) handler.dispatch(conn, packet, kw)
def _ask(self, conn, packet, handler=None, **kw): def _ask(self, conn, packet, handler=None, **kw):
self.setHandlerData(None) # The following line is more than optimization. If an admin node sends
queue = self._thread_container.queue # a packet that causes the master to disconnect (e.g. stop a cluster),
msg_id = conn.ask(packet, queue=queue, **kw) # we want at least to return the answer for this request, even if the
get = queue.get # polling thread already exited and cleared self.__dict__: returning
# the result of getHandlerData() would raise an AttributeError.
# This is tested by testShutdown (neo.tests.threaded.test.Test).
thread_container = self._thread_container
thread_container.answer = None
msg_id = conn.ask(packet, queue=thread_container.queue, **kw)
get = thread_container.queue.get
_handlePacket = self._handlePacket _handlePacket = self._handlePacket
while True: while True:
qconn, qpacket, kw = get(True) qconn, qpacket, kw = get(True)
...@@ -152,7 +158,6 @@ class ThreadedApplication(BaseApplication): ...@@ -152,7 +158,6 @@ class ThreadedApplication(BaseApplication):
raise ValueError, 'ForgottenPacket for an ' \ raise ValueError, 'ForgottenPacket for an ' \
'explicitely expected packet.' 'explicitely expected packet.'
_handlePacket(qconn, qpacket, kw, handler) _handlePacket(qconn, qpacket, kw, handler)
break return thread_container.answer # see above comment
if not is_forgotten and qpacket is not None: if not is_forgotten and qpacket is not None:
_handlePacket(qconn, qpacket, kw) _handlePacket(qconn, qpacket, kw)
return self.getHandlerData()
...@@ -14,157 +14,100 @@ ...@@ -14,157 +14,100 @@
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
from neo.lib.app import BaseApplication import json, socket
from neo.lib.connection import ClientConnection from urllib import URLopener, urlencode
from neo.lib.protocol import ClusterStates, NodeStates, ErrorCodes, Packets from neo.lib.protocol import CellStates, ClusterStates, NodeTypes, NodeStates, \
from .handler import CommandEventHandler ZERO_TID
from neo.lib.util import u64
class NotReadyException(Exception): class NotReadyException(Exception):
pass pass
class NeoCTL(BaseApplication): class NeoCTL(object):
connection = None def __init__(self, address):
connected = False host, port = address
if ":" in host:
def __init__(self, address, **kw): host = "[%s]" % host
super(NeoCTL, self).__init__(**kw) self.base_url = "http://%s:%s/" % (host, port)
self.server = self.nm.createAdmin(address=address) self._open = URLopener().open
self.handler = CommandEventHandler(self)
self.response_queue = [] def _ask(self, path, **kw):
if kw:
def __getConnection(self): path += "?" + urlencode(sorted(x for x in kw.iteritems()
if not self.connected: if '' is not x[1] is not None))
self.connection = ClientConnection(self, self.handler, self.server) try:
# Never delay reconnection to master. This speeds up unit tests return self._open(self.base_url + path).read()
# and it should not change anything for normal use. except IOError, e:
self.connection.setReconnectionNoDelay() e0 = e[0]
while not self.connected: if e0 == 'socket error' or e0 == 'http error' and e[1] == 503:
self.em.poll(1) raise NotReadyException
if self.connection is None: raise
raise NotReadyException('not connected')
return self.connection
def __ask(self, packet):
# TODO: make thread-safe
connection = self.__getConnection()
connection.ask(packet)
response_queue = self.response_queue
assert len(response_queue) == 0
while self.connected:
self.em.poll(1)
if response_queue:
break
else:
raise NotReadyException, 'Connection closed'
response = response_queue.pop()
if response[0] == Packets.Error and \
response[1] == ErrorCodes.NOT_READY:
raise NotReadyException(response[2])
return response
def enableStorageList(self, uuid_list): def enableStorageList(self, uuid_list):
""" """
Put all given storage nodes in "running" state. Put all given storage nodes in "running" state.
""" """
packet = Packets.AddPendingNodes(uuid_list) self._ask('enableStorageList', node_list=','.join(map(str, uuid_list)))
response = self.__ask(packet)
if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
raise RuntimeError(response)
return response[2]
def tweakPartitionTable(self, uuid_list=()): def tweakPartitionTable(self, uuid_list=()):
response = self.__ask(Packets.TweakPartitionTable(uuid_list)) self._ask('tweakPartitionTable', node_list=','.join(map(str, uuid_list)))
if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
raise RuntimeError(response)
return response[2]
def setClusterState(self, state): def setClusterState(self, state):
""" """
Set cluster state. Set cluster state.
""" """
packet = Packets.SetClusterState(state) self._ask('setClusterState', state=state)
response = self.__ask(packet)
if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
raise RuntimeError(response)
return response[2]
def _setNodeState(self, node, state):
"""
Kill node, or remove it permanently
"""
response = self.__ask(Packets.SetNodeState(node, state))
if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
raise RuntimeError(response)
return response[2]
def getClusterState(self): def getClusterState(self):
""" """
Get cluster state. Get cluster state.
""" """
packet = Packets.AskClusterState() state = self._ask('getClusterState')
response = self.__ask(packet) if state:
if response[0] != Packets.AnswerClusterState: return getattr(ClusterStates, state)
raise RuntimeError(response)
return response[1]
def getLastIds(self):
response = self.__ask(Packets.AskLastIDs())
if response[0] != Packets.AnswerLastIDs:
raise RuntimeError(response)
return response[1:]
def getLastTransaction(self):
response = self.__ask(Packets.AskLastTransaction())
if response[0] != Packets.AnswerLastTransaction:
raise RuntimeError(response)
return response[1]
def getNodeList(self, node_type=None): def getNodeList(self, node_type=None):
""" """
Get a list of nodes, filtering with given type. Get a list of nodes, filtering with given type.
""" """
packet = Packets.AskNodeList(node_type) node_list = json.loads(self._ask('getNodeList', node_type=node_type))
response = self.__ask(packet) return ((getattr(NodeTypes, node_type), address and tuple(address),
if response[0] != Packets.AnswerNodeList: uuid, getattr(NodeStates, state))
raise RuntimeError(response) for node_type, address, uuid, state in node_list)
return response[1] # node_list
def getPartitionRowList(self, min_offset=0, max_offset=0, node=None): def getPartitionRowList(self, min_offset=0, max_offset=0, node=None):
""" """
Get a list of partition rows, bounded by min & max and involving Get a list of partition rows, bounded by min & max and involving
given node. given node.
""" """
packet = Packets.AskPartitionList(min_offset, max_offset, node) ptid, row_list = json.loads(self._ask('getPartitionRowList',
response = self.__ask(packet) min_offset=min_offset, max_offset=max_offset, node=node))
if response[0] != Packets.AnswerPartitionList: return ptid, [(offset, [(node, getattr(CellStates, state))
raise RuntimeError(response) for node, state in row])
return response[1:3] # ptid, row_list for offset, row in row_list]
def startCluster(self): def startCluster(self):
""" """
Set cluster into "verifying" state. Set cluster into "verifying" state.
""" """
return self.setClusterState(ClusterStates.VERIFYING) self._ask('startCluster')
def killNode(self, node): def killNode(self, node):
return self._setNodeState(node, NodeStates.UNKNOWN) self._ask('killNode', node=node)
def dropNode(self, node): def dropNode(self, node):
return self._setNodeState(node, NodeStates.DOWN) self._ask('dropNode', node=node)
def getPrimary(self): def getPrimary(self):
""" """
Return the primary master UUID. Return the primary master UUID.
""" """
packet = Packets.AskPrimary() return int(self._ask('getPrimary'))
response = self.__ask(packet)
if response[0] != Packets.AnswerPrimary: def checkReplicas(self, partition_dict, min_tid=ZERO_TID, max_tid=None):
raise RuntimeError(response) kw = {'pt': ','.join('%s:%s' % (k, '' if v is None else v)
return response[1] for k, v in partition_dict.iteritems())}
if max_tid is not None:
def checkReplicas(self, *args): kw['max_tid'] = u64(max_tid)
response = self.__ask(Packets.CheckReplicas(*args)) self._ask('checkReplicas', min_tid=u64(min_tid), **kw)
if response[0] != Packets.Error or response[1] != ErrorCodes.ACK:
raise RuntimeError(response)
return response[2]
...@@ -29,6 +29,7 @@ defaults = dict( ...@@ -29,6 +29,7 @@ defaults = dict(
masters = '127.0.0.1:10000', masters = '127.0.0.1:10000',
) )
def main(args=None): def main(args=None):
# build configuration dict from command line options # build configuration dict from command line options
(options, args) = parser.parse_args(args=args) (options, args) = parser.parse_args(args=args)
...@@ -39,6 +40,5 @@ def main(args=None): ...@@ -39,6 +40,5 @@ def main(args=None):
# and then, load and run the application # and then, load and run the application
from neo.admin.app import Application from neo.admin.app import Application
app = Application(config) host, port = config.getBind()
app.run() Application(config).serve(host=host, port=port)
...@@ -363,7 +363,7 @@ class NEOCluster(object): ...@@ -363,7 +363,7 @@ class NEOCluster(object):
pending_count += 1 pending_count += 1
if pending_count == target[0]: if pending_count == target[0]:
neoctl.startCluster() neoctl.startCluster()
except (NotReadyException, RuntimeError): except (NotReadyException, IOError):
pass pass
if not pdb.wait(test, MAX_START_TIME): if not pdb.wait(test, MAX_START_TIME):
raise AssertionError('Timeout when starting cluster') raise AssertionError('Timeout when starting cluster')
......
...@@ -47,7 +47,7 @@ class MasterTests(NEOFunctionalTest): ...@@ -47,7 +47,7 @@ class MasterTests(NEOFunctionalTest):
break break
neoctl.killNode(uuid) neoctl.killNode(uuid)
self.neo.expectDead(master) self.neo.expectDead(master)
self.assertRaises(RuntimeError, neoctl.killNode, primary_uuid) self.assertRaises(IOError, neoctl.killNode, primary_uuid)
def testStoppingPrimaryWithTwoSecondaries(self): def testStoppingPrimaryWithTwoSecondaries(self):
# Wait for masters to stabilize # Wait for masters to stabilize
......
...@@ -173,7 +173,7 @@ class StorageTests(NEOFunctionalTest): ...@@ -173,7 +173,7 @@ class StorageTests(NEOFunctionalTest):
self.neo.expectOudatedCells(2) self.neo.expectOudatedCells(2)
self.neo.expectClusterRunning() self.neo.expectClusterRunning()
self.assertRaises(RuntimeError, self.neo.neoctl.killNode, self.assertRaises(IOError, self.neo.neoctl.killNode,
started[1].getUUID()) started[1].getUUID())
started[1].stop() started[1].stop()
# Cluster not operational anymore. Only cells of second storage that # Cluster not operational anymore. Only cells of second storage that
...@@ -324,7 +324,7 @@ class StorageTests(NEOFunctionalTest): ...@@ -324,7 +324,7 @@ class StorageTests(NEOFunctionalTest):
self.neo.expectStorageNotKnown(started[0]) self.neo.expectStorageNotKnown(started[0])
self.neo.expectAssignedCells(started[0], 0) self.neo.expectAssignedCells(started[0], 0)
self.neo.expectAssignedCells(started[1], 10) self.neo.expectAssignedCells(started[1], 10)
self.assertRaises(RuntimeError, self.neo.neoctl.dropNode, self.assertRaises(IOError, self.neo.neoctl.dropNode,
started[1].getUUID()) started[1].getUUID())
self.neo.expectClusterRunning() self.neo.expectClusterRunning()
......
This diff is collapsed.
...@@ -534,9 +534,8 @@ class Test(NEOThreadedTest): ...@@ -534,9 +534,8 @@ class Test(NEOThreadedTest):
# tell admin to shutdown the cluster # tell admin to shutdown the cluster
cluster.neoctl.setClusterState(ClusterStates.STOPPING) cluster.neoctl.setClusterState(ClusterStates.STOPPING)
# all nodes except clients should exit # all nodes except clients should exit
cluster.join(cluster.master_list cluster.join(cluster.master_list + cluster.storage_list,
+ cluster.storage_list (cluster.admin,))
+ cluster.admin_list)
finally: finally:
cluster.stop() cluster.stop()
cluster.reset() # reopen DB to check partition tables cluster.reset() # reopen DB to check partition tables
......
...@@ -26,7 +26,7 @@ if not os.path.exists('mock.py'): ...@@ -26,7 +26,7 @@ if not os.path.exists('mock.py'):
zodb_require = ['ZODB3>=3.10', 'ZODB3<3.11dev'] zodb_require = ['ZODB3>=3.10', 'ZODB3<3.11dev']
extras_require = { extras_require = {
'admin': [], 'admin': ['bottle'],
'client': zodb_require, 'client': zodb_require,
'ctl': [], 'ctl': [],
'master': [], 'master': [],
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment