Commit 2e6c8705 authored by Aurel's avatar Aurel

implement minimal version of admin node which just connect to PMN and

wait for notifications


git-svn-id: https://svn.erp5.org/repos/neo/branches/prototype3@433 71dcc9de-d417-0410-9af5-da40c76e7ee4
parent 64ebcd65
#
# Copyright (C) 2006-2009 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import logging
import os
from time import time
from struct import unpack
from collections import deque
from neo.config import ConfigurationManager
from neo.protocol import TEMPORARILY_DOWN_STATE, DOWN_STATE, BROKEN_STATE, \
INVALID_UUID, INVALID_PTID, partition_cell_states
from neo.node import NodeManager, MasterNode, StorageNode, ClientNode, AdminNode
from neo.event import EventManager
from neo.connection import ListeningConnection, ClientConnection
from neo.exception import OperationFailure, PrimaryFailure
from neo.admin.handler import MonitoringEventHandler, AdminEventHandler
from neo.connector import getConnectorHandler
class Application(object):
"""The storage node application."""
def __init__(self, file, section):
config = ConfigurationManager(file, section)
self.num_partitions = None
self.num_replicas = None
self.name = config.getName()
logging.debug('the name is %s', self.name)
self.connector_handler = getConnectorHandler(config.getConnector())
self.server = config.getServer()
logging.debug('IP address is %s, port is %d', *(self.server))
self.master_node_list = config.getMasterNodeList()
logging.debug('master nodes are %s', self.master_node_list)
# Internal attributes.
self.em = EventManager()
self.nm = NodeManager()
# The partition table is initialized after getting the number of
# partitions.
self.pt = None
self.uuid = INVALID_UUID
self.primary_master_node = None
def run(self):
"""Make sure that the status is sane and start a loop."""
if self.num_partitions is not None and self.num_partitions <= 0:
raise RuntimeError, 'partitions must be more than zero'
if len(self.name) == 0:
raise RuntimeError, 'cluster name must be non-empty'
for server in self.master_node_list:
self.nm.add(MasterNode(server = server))
# Make a listening port.
ListeningConnection(self.em, None, addr = self.server,
connector_handler = self.connector_handler)
# Connect to a primary master node, verify data, and
# start the operation. This cycle will be executed permentnly,
# until the user explicitly requests a shutdown.
while 1:
self.connectToPrimaryMaster()
try:
while 1:
self.em.poll(1)
except PrimaryFailure:
logging.error('primary master is down')
def connectToPrimaryMaster(self):
"""Find a primary master node, and connect to it.
If a primary master node is not elected or ready, repeat
the attempt of a connection periodically.
Note that I do not accept any connection from non-master nodes
at this stage."""
logging.info('connecting to a primary master node')
handler = MonitoringEventHandler(self)
em = self.em
nm = self.nm
# First of all, make sure that I have no connection.
for conn in em.getConnectionList():
if not conn.isListeningConnection():
conn.close()
index = 0
self.trying_master_node = None
self.primary_master_node = None
t = 0
while 1:
em.poll(1)
if self.primary_master_node is not None:
# If I know which is a primary master node, check if
# I have a connection to it already.
for conn in em.getConnectionList():
if not conn.isListeningConnection() and not conn.isServerConnection():
uuid = conn.getUUID()
if uuid is not None:
node = nm.getNodeByUUID(uuid)
if node is self.primary_master_node:
logging.info("connected to primary master node %s:%d" % node.getServer())
# Yes, I have.
return
if self.trying_master_node is None and t + 1 < time():
# Choose a master node to connect to.
if self.primary_master_node is not None:
# If I know a primary master node, pinpoint it.
self.trying_master_node = self.primary_master_node
else:
# Otherwise, check one by one.
master_list = nm.getMasterNodeList()
try:
self.trying_master_node = master_list[index]
except IndexError:
index = 0
self.trying_master_node = master_list[0]
index += 1
print "connecting to %s:%d" % self.trying_master_node.getServer()
ClientConnection(em, handler, \
addr = self.trying_master_node.getServer(),
connector_handler = self.connector_handler)
t = time()
#
# Copyright (C) 2009 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import logging
from neo.handler import EventHandler
from neo.protocol import INVALID_UUID, RUNNING_STATE, BROKEN_STATE, \
MASTER_NODE_TYPE, STORAGE_NODE_TYPE, CLIENT_NODE_TYPE, \
ADMIN_NODE_TYPE
from neo.node import MasterNode, StorageNode, ClientNode
from neo.connection import ClientConnection
from neo.protocol import Packet
from neo.pt import PartitionTable
from neo.exception import PrimaryFailure
from neo.util import dump
class BaseEventHandler(EventHandler):
""" Base handler for admin node """
def connectionAccepted(self, conn, s, addr):
"""Called when a connection is accepted."""
# we only accept connection from command tool
logging.info("accepted a connection from %s:%d" %(conn.getAddress(),))
if conn.isServerConnection():
conn.setHandler(AdminEventHandler)
else:
# XXX why do we get there ?
self.handleUnexpectedPacket(conn, packet)
class AdminEventHandler(EventHandler):
"""This class deals with events for administrating cluster."""
pass
class MonitoringEventHandler(EventHandler):
"""This class deals with events for monitoring cluster."""
def __init__(self, app):
self.app = app
EventHandler.__init__(self)
def connectionCompleted(self, conn):
app = self.app
if app.trying_master_node is None:
# Should not happen.
raise RuntimeError('connection completed while not trying to connect')
p = Packet()
msg_id = conn.getNextId()
p.requestNodeIdentification(msg_id, ADMIN_NODE_TYPE, app.uuid,
app.server[0], app.server[1], app.name)
conn.addPacket(p)
conn.expectMessage(msg_id)
EventHandler.connectionCompleted(self, conn)
def connectionFailed(self, conn):
app = self.app
if app.primary_master_node and conn.getUUID() == app.primary_master_node.getUUID():
raise PrimaryFailure
if app.trying_master_node is None:
# Should not happen.
raise RuntimeError('connection failed while not trying to connect')
if app.trying_master_node is app.primary_master_node:
# Tried to connect to a primary master node and failed.
# So this would effectively mean that it is dead.
app.primary_master_node = None
app.trying_master_node = None
EventHandler.connectionFailed(self, conn)
def timeoutExpired(self, conn):
app = self.app
if app.primary_master_node and conn.getUUID() == app.primary_master_node.getUUID():
raise PrimaryFailure
if app.trying_master_node is app.primary_master_node:
# If a primary master node timeouts, I should not rely on it.
app.primary_master_node = None
app.trying_master_node = None
EventHandler.timeoutExpired(self, conn)
def connectionClosed(self, conn):
app = self.app
if app.primary_master_node and conn.getUUID() == app.primary_master_node.getUUID():
raise PrimaryFailure
if app.trying_master_node is app.primary_master_node:
# If a primary master node closes, I should not rely on it.
app.primary_master_node = None
app.trying_master_node = None
EventHandler.connectionClosed(self, conn)
def peerBroken(self, conn):
app = self.app
if app.primary_master_node and conn.getUUID() == app.primary_master_node.getUUID():
raise PrimaryFailure
if app.trying_master_node is app.primary_master_node:
# If a primary master node gets broken, I should not rely
# on it.
app.primary_master_node = None
app.trying_master_node = None
EventHandler.peerBroken(self, conn)
def handleNotReady(self, conn, packet, message):
app = self.app
if app.trying_master_node is not None:
app.trying_master_node = None
conn.close()
def handleAcceptNodeIdentification(self, conn, packet, node_type,
uuid, ip_address, port,
num_partitions, num_replicas, your_uuid):
app = self.app
node = app.nm.getNodeByServer(conn.getAddress())
if node_type != MASTER_NODE_TYPE:
# The peer is not a master node!
logging.error('%s:%d is not a master node', ip_address, port)
app.nm.remove(node)
conn.close()
return
if conn.getAddress() != (ip_address, port):
# The server address is different! Then why was
# the connection successful?
logging.error('%s:%d is waiting for %s:%d',
conn.getAddress()[0], conn.getAddress()[1],
ip_address, port)
app.nm.remove(node)
conn.close()
return
if app.num_partitions is None:
app.num_partitions = num_partitions
app.num_replicas = num_replicas
app.pt = PartitionTable(num_partitions, num_replicas)
elif app.num_partitions != num_partitions:
raise RuntimeError('the number of partitions is inconsistent')
elif app.num_replicas != num_replicas:
raise RuntimeError('the number of replicas is inconsistent')
conn.setUUID(uuid)
node.setUUID(uuid)
if your_uuid != INVALID_UUID:
# got an uuid from the primary master
app.uuid = your_uuid
# Ask a primary master.
msg_id = conn.getNextId()
conn.addPacket(Packet().askPrimaryMaster(msg_id))
conn.expectMessage(msg_id)
def handleAnswerPrimaryMaster(self, conn, packet, primary_uuid,
known_master_list):
app = self.app
# Register new master nodes.
for ip_address, port, uuid in known_master_list:
addr = (ip_address, port)
n = app.nm.getNodeByServer(addr)
if n is None:
n = MasterNode(server = addr)
app.nm.add(n)
if uuid != INVALID_UUID:
# If I don't know the UUID yet, believe what the peer
# told me at the moment.
if n.getUUID() is None or n.getUUID() != uuid:
n.setUUID(uuid)
if primary_uuid != INVALID_UUID:
primary_node = app.nm.getNodeByUUID(primary_uuid)
if primary_node is None:
# I don't know such a node. Probably this information
# is old. So ignore it.
pass
else:
app.primary_master_node = primary_node
if app.trying_master_node is primary_node:
# I am connected to the right one.
logging.info('connected to a primary master node')
# This is a workaround to prevent handling of
# packets for the verification phase.
else:
app.trying_master_node = None
conn.close()
else:
if app.primary_master_node is not None:
# The primary master node is not a primary master node
# any longer.
app.primary_master_node = None
app.trying_master_node = None
conn.close()
def handleSendPartitionTable(self, conn, packet, ptid, row_list):
logging.warning("handleSendPartitionTable")
def handleNotifyPartitionChanges(self, conn, packet, ptid, cell_list):
logging.warning("handleNotifyPartitionChanges")
def handleNotifyNodeInformation(self, conn, packet, node_list):
logging.warning("handleNotifyNodeInformation")
#! /usr/bin/env python
#
# neoadmin - run an administrator node of NEO
#
# Copyright (C) 2009 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from optparse import OptionParser
from neo.admin.app import Application
import logging
parser = OptionParser()
parser.add_option('-v', '--verbose', action = 'store_true',
help = 'print verbose messages')
parser.add_option('-c', '--config', help = 'specify a configuration file')
parser.add_option('-s', '--section', help = 'specify a configuration section')
parser.add_option('-l', '--logfile', help = 'specify a logging file')
(options, args) = parser.parse_args()
config = options.config or 'neo.conf'
section = options.section or 'admin'
logfile = options.logfile or None
if options.verbose:
logging.basicConfig(filename = logfile, level = logging.INFO)
else:
logging.basicConfig(filename = logfile, level = logging.WARNING)
app = Application(config, section)
app.run()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment