Commit 8945183f authored by mskold/marty@linux.site's avatar mskold/marty@linux.site

Merge mskold@bk-internal.mysql.com:/home/bk/mysql-5.0-ndb

into  mysql.com:/windows/Linux_space/MySQL/mysql-5.0-ndb
parents c41a4472 ad88a849
use test;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
Connected to Management Server at: :
Waiting for completed, this may take several minutes
Backup started from node
Backup started from node completed
StartGCP: StopGCP:
#Records: #LogRecords:
Data: bytes Log: bytes
create table t1
(pk int key
,a1 BIT(1), a2 BIT(5), a3 BIT(33), a4 BIT(63), a5 BIT(64)
,b1 TINYINT, b2 TINYINT UNSIGNED
,c1 SMALLINT, c2 SMALLINT UNSIGNED
,d1 INT, d2 INT UNSIGNED
,e1 BIGINT, e2 BIGINT UNSIGNED
,f1 CHAR(1) BINARY, f2 CHAR(32) BINARY, f3 CHAR(255) BINARY
,g1 VARCHAR(32) BINARY, g2 VARCHAR(255) BINARY, g3 VARCHAR(1000) BINARY
,h1 BINARY(1), h2 BINARY(8), h3 BINARY(255)
,i1 VARBINARY(32), i2 VARBINARY(255), i3 VARBINARY(1000)
) engine ndb;
insert into t1 values
(1
,0x1, 0x17, 0x789a, 0x789abcde, 0xfedc0001
,127, 255
,32767, 65535
,2147483647, 4294967295
,9223372036854775807, 18446744073709551615
,'1','12345678901234567890123456789012','123456789'
,'1','12345678901234567890123456789012','123456789'
,0x12,0x123456789abcdef0, 0x012345
,0x12,0x123456789abcdef0, 0x00123450
);
insert into t1 values
(2
,0, 0, 0, 0, 0
,-128, 0
,-32768, 0
,-2147483648, 0
,-9223372036854775808, 0
,'','',''
,'','',''
,0x0,0x0,0x0
,0x0,0x0,0x0
);
insert into t1 values
(3
,NULL,NULL,NULL,NULL,NULL
,NULL,NULL
,NULL,NULL
,NULL,NULL
,NULL,NULL
,NULL,NULL,NULL
,NULL,NULL,NULL
,NULL,NULL,NULL
,NULL,NULL,NULL
);
Connected to Management Server at: :
Waiting for completed, this may take several minutes
Backup started from node
Backup started from node completed
StartGCP: StopGCP:
#Records: #LogRecords:
Data: bytes Log: bytes
-- source include/have_ndb.inc
-- source include/ndb_default_cluster.inc
-- source include/not_embedded.inc
--disable_warnings
use test;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
--enable_warnings
#NO.1 test output of backup
--exec $NDB_TOOLS_DIR/../src/mgmclient/ndb_mgm -e "start backup" |sed -e 's/[0-9]//g' |sed -e 's/localhost//g' |sed -e 's/\.\.\.*//g'
create table t1
(pk int key
,a1 BIT(1), a2 BIT(5), a3 BIT(33), a4 BIT(63), a5 BIT(64)
,b1 TINYINT, b2 TINYINT UNSIGNED
,c1 SMALLINT, c2 SMALLINT UNSIGNED
,d1 INT, d2 INT UNSIGNED
,e1 BIGINT, e2 BIGINT UNSIGNED
,f1 CHAR(1) BINARY, f2 CHAR(32) BINARY, f3 CHAR(255) BINARY
,g1 VARCHAR(32) BINARY, g2 VARCHAR(255) BINARY, g3 VARCHAR(1000) BINARY
,h1 BINARY(1), h2 BINARY(8), h3 BINARY(255)
,i1 VARBINARY(32), i2 VARBINARY(255), i3 VARBINARY(1000)
) engine ndb;
insert into t1 values
(1
,0x1, 0x17, 0x789a, 0x789abcde, 0xfedc0001
,127, 255
,32767, 65535
,2147483647, 4294967295
,9223372036854775807, 18446744073709551615
,'1','12345678901234567890123456789012','123456789'
,'1','12345678901234567890123456789012','123456789'
,0x12,0x123456789abcdef0, 0x012345
,0x12,0x123456789abcdef0, 0x00123450
);
insert into t1 values
(2
,0, 0, 0, 0, 0
,-128, 0
,-32768, 0
,-2147483648, 0
,-9223372036854775808, 0
,'','',''
,'','',''
,0x0,0x0,0x0
,0x0,0x0,0x0
);
insert into t1 values
(3
,NULL,NULL,NULL,NULL,NULL
,NULL,NULL
,NULL,NULL
,NULL,NULL
,NULL,NULL
,NULL,NULL,NULL
,NULL,NULL,NULL
,NULL,NULL,NULL
,NULL,NULL,NULL
);
#NO.2 test output of backup after some simple SQL operations
--exec $NDB_TOOLS_DIR/../src/mgmclient/ndb_mgm -e "start backup" |sed -e 's/[0-9]//g' |sed -e 's/localhost//g' |sed -e 's/\.\.\.*//g'
......@@ -175,5 +175,5 @@ private:
char m_text[MAX_TEXT_LENGTH];
};
extern void getRestartAction(Uint32 action, BaseString &str);
#endif
......@@ -959,6 +959,7 @@ TransporterRegistry::performReceive()
{
Uint32 * ptr;
Uint32 sz = t->getReceiveData(&ptr);
transporter_recv_from(callbackObj, nodeId);
Uint32 szUsed = unpack(ptr, sz, nodeId, ioStates[nodeId]);
t->updateReceiveDataPtr(szUsed);
}
......
Next QMGR 1
Next NDBCNTR 1000
Next NDBCNTR 1002
Next NDBFS 2000
Next DBACC 3002
Next DBTUP 4014
......@@ -501,3 +501,4 @@ TUP:
NDBCNTR:
1000: Crash insertion on SystemError::CopyFragRef
1001: Delay sending NODE_FAILREP (to own node), until error is cleared
......@@ -404,6 +404,9 @@ void Dbdict::execFSCLOSECONF(Signal* signal)
case FsConnectRecord::OPEN_READ_SCHEMA2:
openSchemaFile(signal, 1, fsPtr.i, false, false);
break;
case FsConnectRecord::OPEN_READ_TAB_FILE2:
openTableFile(signal, 1, fsPtr.i, c_readTableRecord.tableId, false);
break;
default:
jamLine((fsPtr.p->fsState & 0xFFF));
ndbrequire(false);
......@@ -783,8 +786,11 @@ void Dbdict::readTableConf(Signal* signal,
void Dbdict::readTableRef(Signal* signal,
FsConnectRecordPtr fsPtr)
{
/**
* First close corrupt file
*/
fsPtr.p->fsState = FsConnectRecord::OPEN_READ_TAB_FILE2;
openTableFile(signal, 1, fsPtr.i, c_readTableRecord.tableId, false);
closeFile(signal, fsPtr.p->filePtr, fsPtr.i);
return;
}//Dbdict::readTableRef()
......
......@@ -4459,12 +4459,18 @@ void Dbdih::failedNodeLcpHandling(Signal* signal, NodeRecordPtr failedNodePtr)
jam();
const Uint32 nodeId = failedNodePtr.i;
if (c_lcpState.m_participatingLQH.get(failedNodePtr.i)){
if (isMaster() && c_lcpState.m_participatingLQH.get(failedNodePtr.i))
{
/*----------------------------------------------------*/
/* THE NODE WAS INVOLVED IN A LOCAL CHECKPOINT. WE */
/* MUST UPDATE THE ACTIVE STATUS TO INDICATE THAT */
/* THE NODE HAVE MISSED A LOCAL CHECKPOINT. */
/*----------------------------------------------------*/
/**
* Bug#28717, Only master should do this, as this status is copied
* to other nodes
*/
switch (failedNodePtr.p->activeStatus) {
case Sysfile::NS_Active:
jam();
......
......@@ -1375,6 +1375,13 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal)
{
jamEntry();
if (ERROR_INSERTED(1001))
{
sendSignalWithDelay(reference(), GSN_NODE_FAILREP, signal, 100,
signal->getLength());
return;
}
const NodeFailRep * nodeFail = (NodeFailRep *)&signal->theData[0];
NdbNodeBitmask allFailed;
allFailed.assign(NdbNodeBitmask::Size, nodeFail->theNodes);
......
......@@ -569,7 +569,7 @@ AsyncFile*
Ndbfs::createAsyncFile(){
// Check limit of open files
if (theFiles.size()+1 == m_maxFiles) {
if (theFiles.size() == m_maxFiles) {
// Print info about all open files
for (unsigned i = 0; i < theFiles.size(); i++){
AsyncFile* file = theFiles[i];
......
......@@ -16,6 +16,7 @@
#include <ndb_global.h>
#include <my_pthread.h>
#include <sys/times.h>
#include "WatchDog.hpp"
#include "GlobalData.hpp"
......@@ -129,6 +130,13 @@ WatchDog::run(){
break;
}//switch
g_eventLogger.warning("Ndb kernel is stuck in: %s", last_stuck_action);
{
struct tms my_tms;
times(&my_tms);
g_eventLogger.info("User time: %llu System time: %llu",
(Uint64)my_tms.tms_utime,
(Uint64)my_tms.tms_stime);
}
if(alerts == 3){
shutdownSystem(last_stuck_action);
}
......
......@@ -25,6 +25,7 @@
#include <mgmapi.h>
#include <util/BaseString.hpp>
#include <ndbd_exit_codes.h>
class MgmtSrvr;
......@@ -125,7 +126,7 @@ public:
int executeStatus(int processId, const char* parameters, bool all);
int executeEventReporting(int processId, const char* parameters, bool all);
int executeDumpState(int processId, const char* parameters, bool all);
int executeStartBackup(char * parameters);
int executeStartBackup(char * parameters, bool interactive);
int executeAbortBackup(char * parameters);
int executeStop(Vector<BaseString> &command_list, unsigned command_pos,
int *node_ids, int no_of_nodes);
......@@ -768,6 +769,113 @@ CommandInterpreter::printError()
}
}
/*
* print log event from mgmsrv to console screen
*/
static void
printLogEvent(struct ndb_logevent* event)
{
switch (event->type) {
/**
* NDB_MGM_EVENT_CATEGORY_BACKUP
*/
case NDB_LE_BackupStarted:
ndbout_c("Backup %d started from node %d",
event->BackupStarted.backup_id, event->BackupStarted.starting_node);
break;
case NDB_LE_BackupFailedToStart:
ndbout_c("Backup request from %d failed to start. Error: %d",
event->BackupFailedToStart.starting_node, event->BackupFailedToStart.error);
break;
case NDB_LE_BackupCompleted:
ndbout_c("Backup %u started from node %u completed\n"
" StartGCP: %u StopGCP: %u\n"
" #Records: %u #LogRecords: %u\n"
" Data: %u bytes Log: %u bytes",
event->BackupCompleted.backup_id, event->BackupCompleted.starting_node,
event->BackupCompleted.start_gci, event->BackupCompleted.stop_gci,
event->BackupCompleted.n_records, event->BackupCompleted.n_log_records,
event->BackupCompleted.n_bytes, event->BackupCompleted.n_log_bytes);
break;
case NDB_LE_BackupAborted:
ndbout_c("Backup %d started from %d has been aborted. Error: %d",
event->BackupAborted.backup_id, event->BackupAborted.starting_node,
event->BackupAborted.error);
break;
/**
* NDB_MGM_EVENT_CATEGORY_STARTUP
*/
case NDB_LE_NDBStartStarted:
ndbout_c("Start initiated (version %d.%d.%d)",
getMajor(event->NDBStartStarted.version),
getMinor(event->NDBStartStarted.version),
getBuild(event->NDBStartStarted.version));
break;
case NDB_LE_NDBStartCompleted:
ndbout_c("Started (version %d.%d.%d)",
getMajor(event->NDBStartCompleted.version),
getMinor(event->NDBStartCompleted.version),
getBuild(event->NDBStartCompleted.version));
break;
case NDB_LE_NDBStopStarted:
ndbout_c("%s shutdown initiated",
(event->NDBStopStarted.stoptype == 1 ? "Cluster" : "Node"));
break;
case NDB_LE_NDBStopCompleted:
{
BaseString action_str("");
BaseString signum_str("");
getRestartAction(event->NDBStopCompleted.action, action_str);
if (event->NDBStopCompleted.signum)
signum_str.appfmt(" Initiated by signal %d.",
event->NDBStopCompleted.signum);
ndbout_c("Node shutdown completed%s.%s",
action_str.c_str(),
signum_str.c_str());
}
break;
case NDB_LE_NDBStopForced:
{
BaseString action_str("");
BaseString reason_str("");
BaseString sphase_str("");
int signum = event->NDBStopForced.signum;
int error = event->NDBStopForced.error;
int sphase = event->NDBStopForced.sphase;
int extra = event->NDBStopForced.extra;
getRestartAction(event->NDBStopForced.action, action_str);
if (signum)
reason_str.appfmt(" Initiated by signal %d.", signum);
if (error)
{
ndbd_exit_classification cl;
ndbd_exit_status st;
const char *msg = ndbd_exit_message(error, &cl);
const char *cl_msg = ndbd_exit_classification_message(cl, &st);
const char *st_msg = ndbd_exit_status_message(st);
reason_str.appfmt(" Caused by error %d: \'%s(%s). %s\'.",
error, msg, cl_msg, st_msg);
if (extra != 0)
reason_str.appfmt(" (extra info %d)", extra);
}
if (sphase < 255)
sphase_str.appfmt(" Occured during startphase %u.", sphase);
ndbout_c("Forced node shutdown completed%s.%s%s",
action_str.c_str(), sphase_str.c_str(),
reason_str.c_str());
}
break;
case NDB_LE_NDBStopAborted:
ndbout_c("Node shutdown aborted");
break;
/**
* default nothing to print
*/
default:
break;
}
}
//*****************************************************************************
//*****************************************************************************
......@@ -784,27 +892,21 @@ event_thread_run(void* p)
int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_BACKUP,
1, NDB_MGM_EVENT_CATEGORY_STARTUP,
0 };
int fd = ndb_mgm_listen_event(handle, filter);
if (fd != NDB_INVALID_SOCKET)
NdbLogEventHandle log_handle= NULL;
struct ndb_logevent log_event;
log_handle= ndb_mgm_create_logevent_handle(handle, filter);
if (log_handle)
{
do_event_thread= 1;
char *tmp= 0;
char buf[1024];
SocketInputStream in(fd,10);
do {
if (tmp == 0) NdbSleep_MilliSleep(10);
if((tmp = in.gets(buf, 1024)))
{
const char ping_token[]= "<PING>";
if (memcmp(ping_token,tmp,sizeof(ping_token)-1))
if(tmp && strlen(tmp))
{
Guard g(printmutex);
ndbout << tmp;
}
}
if (ndb_logevent_get_next(log_handle, &log_event, 2000) <= 0)
continue;
Guard g(printmutex);
printLogEvent(&log_event);
} while(do_event_thread);
NDB_CLOSE_SOCKET(fd);
ndb_mgm_destroy_logevent_handle(&log_handle);
}
else
{
......@@ -1054,7 +1156,7 @@ CommandInterpreter::execute_impl(const char *_line, bool interactive)
else if(strcasecmp(firstToken, "START") == 0 &&
allAfterFirstToken != NULL &&
strncasecmp(allAfterFirstToken, "BACKUP", sizeof("BACKUP") - 1) == 0){
m_error= executeStartBackup(allAfterFirstToken);
m_error= executeStartBackup(allAfterFirstToken, interactive);
DBUG_RETURN(true);
}
else if(strcasecmp(firstToken, "ABORT") == 0 &&
......@@ -2531,20 +2633,11 @@ CommandInterpreter::executeEventReporting(int processId,
* Backup
*****************************************************************************/
int
CommandInterpreter::executeStartBackup(char* parameters)
CommandInterpreter::executeStartBackup(char* parameters, bool interactive)
{
struct ndb_mgm_reply reply;
unsigned int backupId;
#if 0
int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_BACKUP, 0 };
int fd = ndb_mgm_listen_event(m_mgmsrv, filter);
if (fd < 0)
{
ndbout << "Initializing start of backup failed" << endl;
printError();
return fd;
}
#endif
Vector<BaseString> args;
{
BaseString(parameters).split(args);
......@@ -2557,25 +2650,20 @@ CommandInterpreter::executeStartBackup(char* parameters)
int sz= args.size();
int result;
if (sz == 2 &&
args[1] == "NOWAIT")
int flags = 2;
if (sz == 2 && args[1] == "NOWAIT")
{
result = ndb_mgm_start_backup(m_mgmsrv, 0, &backupId, &reply);
flags = 0;
}
else if (sz == 1 ||
(sz == 3 &&
args[1] == "WAIT" &&
args[2] == "COMPLETED"))
else if (sz == 1 || (sz == 3 && args[1] == "WAIT" && args[2] == "COMPLETED"))
{
flags = 2;
ndbout_c("Waiting for completed, this may take several minutes");
result = ndb_mgm_start_backup(m_mgmsrv, 2, &backupId, &reply);
}
else if (sz == 3 &&
args[1] == "WAIT" &&
args[2] == "STARTED")
else if (sz == 3 && args[1] == "WAIT" && args[2] == "STARTED")
{
ndbout_c("Waiting for started, this may take several minutes");
result = ndb_mgm_start_backup(m_mgmsrv, 1, &backupId, &reply);
flags = 1;
}
else
{
......@@ -2583,48 +2671,80 @@ CommandInterpreter::executeStartBackup(char* parameters)
return -1;
}
NdbLogEventHandle log_handle= NULL;
struct ndb_logevent log_event;
if (flags == 2 && !interactive)
{
int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_BACKUP, 0, 0 };
log_handle = ndb_mgm_create_logevent_handle(m_mgmsrv, filter);
if (!log_handle)
{
ndbout << "Initializing start of backup failed" << endl;
printError();
return -1;
}
}
result = ndb_mgm_start_backup(m_mgmsrv, flags, &backupId, &reply);
if (result != 0) {
ndbout << "Backup failed" << endl;
printError();
#if 0
close(fd);
#endif
if (log_handle)
ndb_mgm_destroy_logevent_handle(&log_handle);
return result;
}
#if 0
ndbout_c("Waiting for completed, this may take several minutes");
char *tmp;
char buf[1024];
/**
* If interactive, event listner thread is already running
*/
if (log_handle && !interactive)
{
SocketInputStream in(fd);
int count = 0;
int retry = 0;
do {
tmp = in.gets(buf, 1024);
if(tmp)
if (ndb_logevent_get_next(log_handle, &log_event, 60000) > 0)
{
ndbout << tmp;
unsigned int id;
if(sscanf(tmp, "%*[^:]: Backup %d ", &id) == 1 && id == backupId){
count++;
}
int print = 0;
switch (log_event.type) {
case NDB_LE_BackupStarted:
if (log_event.BackupStarted.backup_id == backupId)
print = 1;
break;
case NDB_LE_BackupCompleted:
if (log_event.BackupCompleted.backup_id == backupId)
print = 1;
break;
case NDB_LE_BackupAborted:
if (log_event.BackupAborted.backup_id == backupId)
print = 1;
break;
default:
break;
}
if (print)
{
Guard g(m_print_mutex);
printLogEvent(&log_event);
count++;
}
}
} while(count < 2);
}
else
{
retry++;
}
} while(count < 2 && retry < 3);
SocketInputStream in(fd, 10);
do {
tmp = in.gets(buf, 1024);
if(tmp && tmp[0] != 0)
{
ndbout << tmp;
}
} while(tmp && tmp[0] != 0);
if (retry >= 3)
ndbout << "get backup event failed for " << retry << " times" << endl;
ndb_mgm_destroy_logevent_handle(&log_handle);
}
close(fd);
#endif
return 0;
}
int
CommandInterpreter::executeAbortBackup(char* parameters)
{
......
......@@ -21,7 +21,8 @@ libndbmgmclient_la_LIBADD = ../mgmapi/libmgmapi.la \
../common/logger/liblogger.la \
../common/portlib/libportlib.la \
../common/util/libgeneral.la \
../common/portlib/libportlib.la
../common/portlib/libportlib.la \
../common/debugger/libtrace.la
ndb_mgm_SOURCES = main.cpp
......
......@@ -67,6 +67,8 @@ public:
int getRandomNodeOtherNodeGroup(int nodeId, int randomNumber);
int getRandomNotMasterNodeId(int randomNumber);
NdbMgmHandle handle;
protected:
int waitClusterState(ndb_mgm_node_status _status,
......@@ -89,7 +91,6 @@ protected:
bool connected;
BaseString addr;
NdbMgmHandle handle;
ndb_mgm_configuration * m_config;
protected:
ndb_mgm_configuration * getConfig();
......
......@@ -1422,6 +1422,10 @@ TESTCASE("Scan_4006",
INITIALIZER(runScan_4006);
FINALIZER(runClearTable);
}
TESTCASE("Bug28443",
""){
INITIALIZER(runBug28443);
}
TESTCASE("ExecuteAsynch",
"Check that executeAsync() works (BUG#27495)\n"){
INITIALIZER(runTestExecuteAsynch);
......
......@@ -1215,6 +1215,84 @@ runBug27283(NDBT_Context* ctx, NDBT_Step* step)
}
pos = 0;
}
return NDBT_OK;
}
int
runBug28717(NDBT_Context* ctx, NDBT_Step* step)
{
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int records = ctx->getNumRecords();
Ndb* pNdb = GETNDB(step);
NdbRestarter res;
if (res.getNumDbNodes() < 4)
{
return NDBT_OK;
}
int master = res.getMasterNodeId();
int node0 = res.getRandomNodeOtherNodeGroup(master, rand());
int node1 = res.getRandomNodeSameNodeGroup(node0, rand());
ndbout_c("master: %d node0: %d node1: %d", master, node0, node1);
if (res.restartOneDbNode(node0, false, true, true))
{
return NDBT_FAILED;
}
{
int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
NdbLogEventHandle handle =
ndb_mgm_create_logevent_handle(res.handle, filter);
int dump[] = { DumpStateOrd::DihStartLcpImmediately };
struct ndb_logevent event;
for (Uint32 i = 0; i<3; i++)
{
res.dumpStateOneNode(master, dump, 1);
while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
event.type != NDB_LE_LocalCheckpointStarted);
while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
event.type != NDB_LE_LocalCheckpointCompleted);
}
}
if (res.waitNodesNoStart(&node0, 1))
return NDBT_FAILED;
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
if (res.dumpStateOneNode(node0, val2, 2))
return NDBT_FAILED;
if (res.insertErrorInNode(node0, 5010))
return NDBT_FAILED;
if (res.insertErrorInNode(node1, 1001))
return NDBT_FAILED;
if (res.startNodes(&node0, 1))
return NDBT_FAILED;
NdbSleep_SecSleep(3);
if (res.insertErrorInNode(node1, 0))
return NDBT_FAILED;
if (res.waitNodesNoStart(&node0, 1))
return NDBT_FAILED;
if (res.startNodes(&node0, 1))
return NDBT_FAILED;
if (res.waitClusterStarted())
return NDBT_FAILED;
return NDBT_OK;
}
......@@ -1552,6 +1630,9 @@ TESTCASE("Bug27003", ""){
TESTCASE("Bug27283", ""){
INITIALIZER(runBug27283);
}
TESTCASE("Bug28717", ""){
INITIALIZER(runBug28717);
}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
......
......@@ -508,6 +508,10 @@ max-time: 1500
cmd: testDict
args: -n CreateAndDrop
max-time: 1000
cmd: testNodeRestart
args: -n Bug28717 T1
max-time: 1500
cmd: testDict
args: -n CreateAndDropAtRandom -l 200 T1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment