Commit a1a803b4 authored by unknown's avatar unknown

BUG#15425 Small window for NF during backup failing without error


ndb/src/kernel/blocks/backup/Backup.cpp:
  If crash 10018 is inserted on a 2 node cluster with fast CPU and slower disk,
  all nodes can respond with BACKUP_FRAGMENT_CONF for all fragments before the
  error in FSAPPENDCONF is hit.
  
  This would mean that no error code was set for the backup and that it would
  be incomplete as not all IO had been written to disk before the node crash.
  This would not be reported to the user.
  
  So the backup would appear to succeed but it really didn't.
  
  The window for this is rather small though.
ndb/src/kernel/blocks/dbdict/Dbdict.cpp:
  Assert is incorrect for testBackup NFSlave test (causes crash insert 10015 to fail)
parent a050707b
...@@ -786,13 +786,17 @@ Backup::checkNodeFail(Signal* signal, ...@@ -786,13 +786,17 @@ Backup::checkNodeFail(Signal* signal,
pos= &ref->nodeId - signal->getDataPtr(); pos= &ref->nodeId - signal->getDataPtr();
break; break;
} }
case GSN_WAIT_GCP_REQ:
case GSN_DROP_TRIG_REQ:
case GSN_CREATE_TRIG_REQ: case GSN_CREATE_TRIG_REQ:
case GSN_ALTER_TRIG_REQ: case GSN_ALTER_TRIG_REQ:
case GSN_WAIT_GCP_REQ: ptr.p->setErrorCode(AbortBackupOrd::BackupFailureDueToNodeFail);
return;
case GSN_UTIL_SEQUENCE_REQ: case GSN_UTIL_SEQUENCE_REQ:
case GSN_UTIL_LOCK_REQ: case GSN_UTIL_LOCK_REQ:
case GSN_DROP_TRIG_REQ:
return; return;
default:
ndbrequire(false);
} }
for(Uint32 i = 0; (i = mask.find(i+1)) != NdbNodeBitmask::NotFound; ) for(Uint32 i = 0; (i = mask.find(i+1)) != NdbNodeBitmask::NotFound; )
...@@ -1880,7 +1884,7 @@ Backup::execBACKUP_FRAGMENT_REF(Signal* signal) ...@@ -1880,7 +1884,7 @@ Backup::execBACKUP_FRAGMENT_REF(Signal* signal)
} }
} }
} }
ndbrequire(false); goto err;
done: done:
ptr.p->masterData.sendCounter--; ptr.p->masterData.sendCounter--;
...@@ -1893,6 +1897,7 @@ done: ...@@ -1893,6 +1897,7 @@ done:
return; return;
}//if }//if
err:
AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend(); AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
ord->backupId = ptr.p->backupId; ord->backupId = ptr.p->backupId;
ord->backupPtr = ptr.i; ord->backupPtr = ptr.i;
......
...@@ -11694,7 +11694,6 @@ Dbdict::alterTrigger_toDropLocal(Signal* signal, OpAlterTriggerPtr opPtr) ...@@ -11694,7 +11694,6 @@ Dbdict::alterTrigger_toDropLocal(Signal* signal, OpAlterTriggerPtr opPtr)
// broken trigger allowed if force // broken trigger allowed if force
if (! (triggerPtr.p->triggerLocal & TriggerRecord::TL_CREATED_LQH)) { if (! (triggerPtr.p->triggerLocal & TriggerRecord::TL_CREATED_LQH)) {
jam(); jam();
ndbrequire(opPtr.p->m_requestFlag & RequestFlag::RF_FORCE);
alterTrigger_sendReply(signal, opPtr, false); alterTrigger_sendReply(signal, opPtr, false);
return; return;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment