Commit 68e84899 authored by Rusty Russell's avatar Rusty Russell

Implement timeout for the deadlock of traverse & transactions.

This has proven to be intractible: various attempts to eliminate have failed, so detect at runtime and cease the traversal (and do the remaining ops outside a traverse).
parent 828331b8
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
/* Avoid mod by zero */ /* Avoid mod by zero */
static unsigned int total_keys = 1; static unsigned int total_keys = 1;
#define DEBUG_DEPS 1 /* #define DEBUG_DEPS 1 */
/* Traversals block transactions in the current implementation. */ /* Traversals block transactions in the current implementation. */
#define TRAVERSALS_TAKE_TRANSACTION_LOCK 1 #define TRAVERSALS_TAKE_TRANSACTION_LOCK 1
...@@ -425,9 +425,10 @@ static void dump_pre(char *filename[], struct op *op[], ...@@ -425,9 +425,10 @@ static void dump_pre(char *filename[], struct op *op[],
} }
/* We simply read/write pointers, since we all are children. */ /* We simply read/write pointers, since we all are children. */
static void do_pre(struct tdb_context *tdb, static bool do_pre(struct tdb_context *tdb,
char *filename[], struct op *op[], char *filename[], struct op *op[],
unsigned int file, int pre_fd, unsigned int i) unsigned int file, int pre_fd, unsigned int i,
bool backoff)
{ {
while (!list_empty(&op[file][i].pre)) { while (!list_empty(&op[file][i].pre)) {
struct depend *dep; struct depend *dep;
...@@ -436,9 +437,17 @@ static void do_pre(struct tdb_context *tdb, ...@@ -436,9 +437,17 @@ static void do_pre(struct tdb_context *tdb,
printf("%s:%u:waiting for pre\n", filename[file], i+1); printf("%s:%u:waiting for pre\n", filename[file], i+1);
fflush(stdout); fflush(stdout);
#endif #endif
alarm(10); if (backoff)
alarm(2);
else
alarm(10);
while (read(pre_fd, &dep, sizeof(dep)) != sizeof(dep)) { while (read(pre_fd, &dep, sizeof(dep)) != sizeof(dep)) {
if (errno == EINTR) { if (errno == EINTR) {
if (backoff) {
warnx("%s:%u:avoiding deadlock",
filename[file], i+1);
return false;
}
dump_pre(filename, op, file, i); dump_pre(filename, op, file, i);
exit(1); exit(1);
} else } else
...@@ -455,6 +464,7 @@ static void do_pre(struct tdb_context *tdb, ...@@ -455,6 +464,7 @@ static void do_pre(struct tdb_context *tdb,
/* This could be any op, not just this one. */ /* This could be any op, not just this one. */
talloc_free(dep); talloc_free(dep);
} }
return true;
} }
static void do_post(char *filename[], struct op *op[], static void do_post(char *filename[], struct op *op[],
...@@ -484,7 +494,8 @@ static unsigned run_ops(struct tdb_context *tdb, ...@@ -484,7 +494,8 @@ static unsigned run_ops(struct tdb_context *tdb,
char *filename[], char *filename[],
struct op *op[], struct op *op[],
unsigned int file, unsigned int file,
unsigned int start, unsigned int stop); unsigned int start, unsigned int stop,
bool backoff);
struct traverse_info { struct traverse_info {
struct op **op; struct op **op;
...@@ -502,6 +513,7 @@ static int nontrivial_traverse(struct tdb_context *tdb, ...@@ -502,6 +513,7 @@ static int nontrivial_traverse(struct tdb_context *tdb,
{ {
struct traverse_info *tinfo = _tinfo; struct traverse_info *tinfo = _tinfo;
unsigned int trav_len = tinfo->op[tinfo->file][tinfo->start].group_len; unsigned int trav_len = tinfo->op[tinfo->file][tinfo->start].group_len;
bool avoid_deadlock = false;
if (tinfo->i == tinfo->start + trav_len) { if (tinfo->i == tinfo->start + trav_len) {
/* This can happen if traverse expects to be empty. */ /* This can happen if traverse expects to be empty. */
...@@ -515,11 +527,17 @@ static int nontrivial_traverse(struct tdb_context *tdb, ...@@ -515,11 +527,17 @@ static int nontrivial_traverse(struct tdb_context *tdb,
fail(tinfo->filename[tinfo->file], tinfo->start + 1, fail(tinfo->filename[tinfo->file], tinfo->start + 1,
"%s:%u:traverse terminated early"); "%s:%u:traverse terminated early");
#if TRAVERSALS_TAKE_TRANSACTION_LOCK
avoid_deadlock = true;
#endif
/* Run any normal ops. */ /* Run any normal ops. */
tinfo->i = run_ops(tdb, tinfo->pre_fd, tinfo->filename, tinfo->op, tinfo->i = run_ops(tdb, tinfo->pre_fd, tinfo->filename, tinfo->op,
tinfo->file, tinfo->i+1, tinfo->start + trav_len); tinfo->file, tinfo->i+1, tinfo->start + trav_len,
avoid_deadlock);
if (tinfo->i == tinfo->start + trav_len) /* We backed off, or we hit OP_TDB_TRAVERSE_END. */
if (tinfo->op[tinfo->file][tinfo->i].op != OP_TDB_TRAVERSE)
return 1; return 1;
return 0; return 0;
...@@ -548,7 +566,8 @@ static unsigned op_traverse(struct tdb_context *tdb, ...@@ -548,7 +566,8 @@ static unsigned op_traverse(struct tdb_context *tdb,
else else
tinfo.i = run_ops(tdb, pre_fd, filename, op, file, tinfo.i = run_ops(tdb, pre_fd, filename, op, file,
tinfo.i, tinfo.i,
start + op[file][start].group_len); start + op[file][start].group_len,
false);
} }
return tinfo.i; return tinfo.i;
...@@ -564,7 +583,8 @@ unsigned run_ops(struct tdb_context *tdb, ...@@ -564,7 +583,8 @@ unsigned run_ops(struct tdb_context *tdb,
char *filename[], char *filename[],
struct op *op[], struct op *op[],
unsigned int file, unsigned int file,
unsigned int start, unsigned int stop) unsigned int start, unsigned int stop,
bool backoff)
{ {
unsigned int i; unsigned int i;
struct sigaction sa; struct sigaction sa;
...@@ -574,7 +594,8 @@ unsigned run_ops(struct tdb_context *tdb, ...@@ -574,7 +594,8 @@ unsigned run_ops(struct tdb_context *tdb,
sigaction(SIGALRM, &sa, NULL); sigaction(SIGALRM, &sa, NULL);
for (i = start; i < stop; i++) { for (i = start; i < stop; i++) {
do_pre(tdb, filename, op, file, pre_fd, i); if (!do_pre(tdb, filename, op, file, pre_fd, i, backoff))
return i;
switch (op[file][i].op) { switch (op[file][i].op) {
case OP_TDB_LOCKALL: case OP_TDB_LOCKALL:
...@@ -1453,7 +1474,8 @@ int main(int argc, char *argv[]) ...@@ -1453,7 +1474,8 @@ int main(int argc, char *argv[])
printf("Single threaded run..."); printf("Single threaded run...");
fflush(stdout); fflush(stdout);
run_ops(tdb, pipes[0].fd[0], argv+2, op, 0, 1, num_ops[0]); run_ops(tdb, pipes[0].fd[0], argv+2, op, 0, 1, num_ops[0],
false);
check_deps(argv[2], op[0], num_ops[0]); check_deps(argv[2], op[0], num_ops[0]);
printf("done\n"); printf("done\n");
...@@ -1481,7 +1503,7 @@ int main(int argc, char *argv[]) ...@@ -1481,7 +1503,7 @@ int main(int argc, char *argv[])
if (read(fds[0], &c, 1) != 1) if (read(fds[0], &c, 1) != 1)
exit(1); exit(1);
run_ops(tdb, pipes[i].fd[0], argv+2, op, i, 1, run_ops(tdb, pipes[i].fd[0], argv+2, op, i, 1,
num_ops[i]); num_ops[i], false);
check_deps(argv[2+i], op[i], num_ops[i]); check_deps(argv[2+i], op[i], num_ops[i]);
exit(0); exit(0);
default: default:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment