Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
mariadb
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
mariadb
Commits
72765abf
Commit
72765abf
authored
Sep 26, 2006
by
jonas@perch.ndb.mysql.com
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ndb - bug#20895
Fix occational LCP hang!!! Make sure only to consider alive nodes in startNextChkpt
parent
0d360045
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
71 additions
and
60 deletions
+71
-60
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+71
-60
No files found.
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
View file @
72765abf
...
@@ -9561,73 +9561,84 @@ void Dbdih::startNextChkpt(Signal* signal)
...
@@ -9561,73 +9561,84 @@ void Dbdih::startNextChkpt(Signal* signal)
nodePtr
.
i
=
replicaPtr
.
p
->
procNode
;
nodePtr
.
i
=
replicaPtr
.
p
->
procNode
;
ptrCheckGuard
(
nodePtr
,
MAX_NDB_NODES
,
nodeRecord
);
ptrCheckGuard
(
nodePtr
,
MAX_NDB_NODES
,
nodeRecord
);
if
(
replicaPtr
.
p
->
lcpOngoingFlag
&&
if
(
c_lcpState
.
m_participatingLQH
.
get
(
nodePtr
.
i
))
replicaPtr
.
p
->
lcpIdStarted
<
lcpId
)
{
{
jam
();
if
(
replicaPtr
.
p
->
lcpOngoingFlag
&&
//-------------------------------------------------------------------
replicaPtr
.
p
->
lcpIdStarted
<
lcpId
)
// We have found a replica on a node that performs local checkpoint
{
// that is alive and that have not yet been started.
jam
();
//-------------------------------------------------------------------
//-------------------------------------------------------------------
// We have found a replica on a node that performs local checkpoint
if
(
nodePtr
.
p
->
noOfStartedChkpt
<
2
)
{
// that is alive and that have not yet been started.
jam
();
//-------------------------------------------------------------------
/**
* Send LCP_FRAG_ORD to LQH
*/
/**
* Mark the replica so with lcpIdStarted == true
*/
replicaPtr
.
p
->
lcpIdStarted
=
lcpId
;
Uint32
i
=
nodePtr
.
p
->
noOfStartedChkpt
;
nodePtr
.
p
->
startedChkpt
[
i
].
tableId
=
tabPtr
.
i
;
nodePtr
.
p
->
startedChkpt
[
i
].
fragId
=
curr
.
fragmentId
;
nodePtr
.
p
->
startedChkpt
[
i
].
replicaPtr
=
replicaPtr
.
i
;
nodePtr
.
p
->
noOfStartedChkpt
=
i
+
1
;
sendLCP_FRAG_ORD
(
signal
,
nodePtr
.
p
->
startedChkpt
[
i
]);
}
else
if
(
nodePtr
.
p
->
noOfQueuedChkpt
<
2
)
{
jam
();
/**
* Put LCP_FRAG_ORD "in queue"
*/
/**
* Mark the replica so with lcpIdStarted == true
*/
replicaPtr
.
p
->
lcpIdStarted
=
lcpId
;
Uint32
i
=
nodePtr
.
p
->
noOfQueuedChkpt
;
if
(
nodePtr
.
p
->
noOfStartedChkpt
<
2
)
nodePtr
.
p
->
queuedChkpt
[
i
].
tableId
=
tabPtr
.
i
;
{
nodePtr
.
p
->
queuedChkpt
[
i
].
fragId
=
curr
.
fragmentId
;
jam
();
nodePtr
.
p
->
queuedChkpt
[
i
].
replicaPtr
=
replicaPtr
.
i
;
/**
nodePtr
.
p
->
noOfQueuedChkpt
=
i
+
1
;
* Send LCP_FRAG_ORD to LQH
}
else
{
*/
jam
();
/**
* Mark the replica so with lcpIdStarted == true
*/
replicaPtr
.
p
->
lcpIdStarted
=
lcpId
;
if
(
save
){
Uint32
i
=
nodePtr
.
p
->
noOfStartedChkpt
;
nodePtr
.
p
->
startedChkpt
[
i
].
tableId
=
tabPtr
.
i
;
nodePtr
.
p
->
startedChkpt
[
i
].
fragId
=
curr
.
fragmentId
;
nodePtr
.
p
->
startedChkpt
[
i
].
replicaPtr
=
replicaPtr
.
i
;
nodePtr
.
p
->
noOfStartedChkpt
=
i
+
1
;
sendLCP_FRAG_ORD
(
signal
,
nodePtr
.
p
->
startedChkpt
[
i
]);
}
else
if
(
nodePtr
.
p
->
noOfQueuedChkpt
<
2
)
{
jam
();
/**
/**
*
Stop increasing value on first that was "full
"
*
Put LCP_FRAG_ORD "in queue
"
*/
*/
c_lcpState
.
currentFragment
=
curr
;
save
=
false
;
}
busyNodes
.
set
(
nodePtr
.
i
);
if
(
busyNodes
.
count
()
==
lcpNodes
){
/**
/**
* There were no possibility to start the local checkpoint
* Mark the replica so with lcpIdStarted == true
* and it was not possible to queue it up. In this case we
* stop the start of local checkpoints until the nodes with a
* backlog have performed more checkpoints. We will return and
* will not continue the process of starting any more checkpoints.
*/
*/
return
;
replicaPtr
.
p
->
lcpIdStarted
=
lcpId
;
Uint32
i
=
nodePtr
.
p
->
noOfQueuedChkpt
;
nodePtr
.
p
->
queuedChkpt
[
i
].
tableId
=
tabPtr
.
i
;
nodePtr
.
p
->
queuedChkpt
[
i
].
fragId
=
curr
.
fragmentId
;
nodePtr
.
p
->
queuedChkpt
[
i
].
replicaPtr
=
replicaPtr
.
i
;
nodePtr
.
p
->
noOfQueuedChkpt
=
i
+
1
;
}
else
{
jam
();
if
(
save
)
{
/**
* Stop increasing value on first that was "full"
*/
c_lcpState
.
currentFragment
=
curr
;
save
=
false
;
}
busyNodes
.
set
(
nodePtr
.
i
);
if
(
busyNodes
.
count
()
==
lcpNodes
)
{
/**
* There were no possibility to start the local checkpoint
* and it was not possible to queue it up. In this case we
* stop the start of local checkpoints until the nodes with a
* backlog have performed more checkpoints. We will return and
* will not continue the process of starting any more checkpoints.
*/
return
;
}
//if
}
//if
}
//if
}
//if
}
}
}
//while
}
//while
}
curr
.
fragmentId
++
;
curr
.
fragmentId
++
;
if
(
curr
.
fragmentId
>=
tabPtr
.
p
->
totalfragments
)
{
if
(
curr
.
fragmentId
>=
tabPtr
.
p
->
totalfragments
)
{
jam
();
jam
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment