btree_locking.c 21.1 KB
Newer Older
1 2 3 4 5 6
// SPDX-License-Identifier: GPL-2.0

#include "bcachefs.h"
#include "btree_locking.h"
#include "btree_types.h"

7 8
static struct lock_class_key bch2_btree_node_lock_key;

9 10
void bch2_btree_lock_init(struct btree_bkey_cached_common *b,
			  enum six_lock_init_flags flags)
11
{
12
	__six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags);
13 14 15 16 17 18 19 20 21 22 23 24
	lockdep_set_novalidate_class(&b->lock);
}

#ifdef CONFIG_LOCKDEP
void bch2_assert_btree_nodes_not_locked(void)
{
#if 0
	//Re-enable when lock_class_is_held() is merged:
	BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key));
#endif
}
#endif
25 26 27 28 29

/* Btree node locking: */

struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *trans,
						  struct btree_path *skip,
30
						  struct btree_bkey_cached_common *b,
31 32 33
						  unsigned level)
{
	struct btree_path *path;
34
	struct six_lock_count ret;
35
	unsigned i;
36 37

	memset(&ret, 0, sizeof(ret));
38 39 40 41

	if (IS_ERR_OR_NULL(b))
		return ret;

42
	trans_for_each_path(trans, path, i)
43
		if (path != skip && &path->l[level].b->c == b) {
44 45 46 47
			int t = btree_node_locked_type(path, level);

			if (t != BTREE_NODE_UNLOCKED)
				ret.n[t]++;
48 49 50 51 52
		}

	return ret;
}

53 54 55 56
/* unlock */

void bch2_btree_node_unlock_write(struct btree_trans *trans,
			struct btree_path *path, struct btree *b)
57
{
58
	bch2_btree_node_unlock_write_inlined(trans, path, b);
59 60
}

61 62
/* lock */

63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
/*
 * @trans wants to lock @b with type @type
 */
struct trans_waiting_for_lock {
	struct btree_trans		*trans;
	struct btree_bkey_cached_common	*node_want;
	enum six_lock_type		lock_want;

	/* for iterating over held locks :*/
	u8				path_idx;
	u8				level;
	u64				lock_start_time;
};

struct lock_graph {
	struct trans_waiting_for_lock	g[8];
	unsigned			nr;
};

82 83 84 85
static noinline void print_cycle(struct printbuf *out, struct lock_graph *g)
{
	struct trans_waiting_for_lock *i;

86
	prt_printf(out, "Found lock cycle (%u entries):\n", g->nr);
87

88 89 90 91 92
	for (i = g->g; i < g->g + g->nr; i++) {
		struct task_struct *task = READ_ONCE(i->trans->locking_wait.task);
		if (!task)
			continue;

93
		bch2_btree_trans_to_text(out, i->trans);
94
		bch2_prt_task_backtrace(out, task, i == g->g ? 5 : 1, GFP_NOWAIT);
95
	}
96 97
}

98 99 100 101 102
static noinline void print_chain(struct printbuf *out, struct lock_graph *g)
{
	struct trans_waiting_for_lock *i;

	for (i = g->g; i != g->g + g->nr; i++) {
103
		struct task_struct *task = i->trans->locking_wait.task;
104 105
		if (i != g->g)
			prt_str(out, "<- ");
106
		prt_printf(out, "%u ", task ?task->pid : 0);
107 108 109 110
	}
	prt_newline(out);
}

111 112 113 114 115
static void lock_graph_up(struct lock_graph *g)
{
	closure_put(&g->g[--g->nr].trans->ref);
}

116 117 118 119 120 121
static noinline void lock_graph_pop_all(struct lock_graph *g)
{
	while (g->nr)
		lock_graph_up(g);
}

122
static void __lock_graph_down(struct lock_graph *g, struct btree_trans *trans)
123 124 125 126 127 128 129 130
{
	g->g[g->nr++] = (struct trans_waiting_for_lock) {
		.trans		= trans,
		.node_want	= trans->locking,
		.lock_want	= trans->locking_wait.lock_want,
	};
}

131 132 133 134 135 136
static void lock_graph_down(struct lock_graph *g, struct btree_trans *trans)
{
	closure_get(&trans->ref);
	__lock_graph_down(g, trans);
}

137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
static bool lock_graph_remove_non_waiters(struct lock_graph *g)
{
	struct trans_waiting_for_lock *i;

	for (i = g->g + 1; i < g->g + g->nr; i++)
		if (i->trans->locking != i->node_want ||
		    i->trans->locking_wait.start_time != i[-1].lock_start_time) {
			while (g->g + g->nr > i)
				lock_graph_up(g);
			return true;
		}

	return false;
}

152
static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans)
153 154 155 156 157 158 159 160 161 162 163
{
	struct bch_fs *c = trans->c;

	count_event(c, trans_restart_would_deadlock);

	if (trace_trans_restart_would_deadlock_enabled()) {
		struct printbuf buf = PRINTBUF;

		buf.atomic++;
		print_cycle(&buf, g);

164
		trace_trans_restart_would_deadlock(trans, buf.buf);
165 166 167 168
		printbuf_exit(&buf);
	}
}

169 170 171
static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
{
	if (i == g->g) {
172
		trace_would_deadlock(g, i->trans);
173
		return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
174 175 176
	} else {
		i->trans->lock_must_abort = true;
		wake_up_process(i->trans->locking_wait.task);
177 178
		return 0;
	}
179 180
}

181
static int btree_trans_abort_preference(struct btree_trans *trans)
182
{
183 184 185 186 187 188 189 190
	if (trans->lock_may_not_fail)
		return 0;
	if (trans->locking_wait.lock_want == SIX_LOCK_write)
		return 1;
	if (!trans->in_traverse_all)
		return 2;
	return 3;
}
191

192 193 194 195 196
static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle)
{
	struct trans_waiting_for_lock *i, *abort = NULL;
	unsigned best = 0, pref;
	int ret;
197

198 199
	if (lock_graph_remove_non_waiters(g))
		return 0;
200

201 202 203 204 205
	/* Only checking, for debugfs: */
	if (cycle) {
		print_cycle(cycle, g);
		ret = -1;
		goto out;
206 207
	}

208 209 210 211 212 213
	for (i = g->g; i < g->g + g->nr; i++) {
		pref = btree_trans_abort_preference(i->trans);
		if (pref > best) {
			abort = i;
			best = pref;
		}
214 215
	}

216
	if (unlikely(!best)) {
217
		struct printbuf buf = PRINTBUF;
218
		buf.atomic++;
219

220
		prt_printf(&buf, bch2_fmt(g->g->trans->c, "cycle of nofail locks"));
221 222 223 224 225 226

		for (i = g->g; i < g->g + g->nr; i++) {
			struct btree_trans *trans = i->trans;

			bch2_btree_trans_to_text(&buf, trans);

227
			prt_printf(&buf, "backtrace:\n");
228
			printbuf_indent_add(&buf, 2);
229
			bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2, GFP_NOWAIT);
230 231 232 233 234 235 236 237
			printbuf_indent_sub(&buf, 2);
			prt_newline(&buf);
		}

		bch2_print_string_as_lines(KERN_ERR, buf.buf);
		printbuf_exit(&buf);
		BUG();
	}
238

239 240 241 242 243 244
	ret = abort_lock(g, abort);
out:
	if (ret)
		while (g->nr)
			lock_graph_up(g);
	return ret;
245 246
}

247 248
static int lock_graph_descend(struct lock_graph *g, struct btree_trans *trans,
			      struct printbuf *cycle)
249 250 251 252
{
	struct btree_trans *orig_trans = g->g->trans;
	struct trans_waiting_for_lock *i;

253
	for (i = g->g; i < g->g + g->nr; i++)
254 255
		if (i->trans == trans) {
			closure_put(&trans->ref);
256
			return break_cycle(g, cycle);
257
		}
258 259

	if (g->nr == ARRAY_SIZE(g->g)) {
260 261
		closure_put(&trans->ref);

262 263 264
		if (orig_trans->lock_may_not_fail)
			return 0;

265 266
		while (g->nr)
			lock_graph_up(g);
267 268 269 270

		if (cycle)
			return 0;

271
		trace_and_count(trans->c, trans_restart_would_deadlock_recursion_limit, trans, _RET_IP_);
272
		return btree_trans_restart(orig_trans, BCH_ERR_transaction_restart_deadlock_recursion_limit);
273 274
	}

275
	__lock_graph_down(g, trans);
276 277 278 279 280 281 282 283
	return 0;
}

static bool lock_type_conflicts(enum six_lock_type t1, enum six_lock_type t2)
{
	return t1 + t2 > 1;
}

284
int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle)
285 286 287 288
{
	struct lock_graph g;
	struct trans_waiting_for_lock *top;
	struct btree_bkey_cached_common *b;
289 290
	btree_path_idx_t path_idx;
	int ret = 0;
291

292 293
	g.nr = 0;

294
	if (trans->lock_must_abort) {
295 296 297
		if (cycle)
			return -1;

298
		trace_would_deadlock(&g, trans);
299
		return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
300
	}
301

302
	lock_graph_down(&g, trans);
303 304 305 306 307

	/* trans->paths is rcu protected vs. freeing */
	rcu_read_lock();
	if (cycle)
		cycle->atomic++;
308 309
next:
	if (!g.nr)
310
		goto out;
311 312 313

	top = &g.g[g.nr - 1];

314 315 316 317 318 319 320 321 322
	struct btree_path *paths = rcu_dereference(top->trans->paths);
	if (!paths)
		goto up;

	unsigned long *paths_allocated = trans_paths_allocated(paths);

	trans_for_each_path_idx_from(paths_allocated, *trans_paths_nr(paths),
				     path_idx, top->path_idx) {
		struct btree_path *path = paths + path_idx;
323 324 325
		if (!path->nodes_locked)
			continue;

326 327
		if (path_idx != top->path_idx) {
			top->path_idx		= path_idx;
328 329 330 331 332 333 334 335 336 337 338 339 340 341
			top->level		= 0;
			top->lock_start_time	= 0;
		}

		for (;
		     top->level < BTREE_MAX_DEPTH;
		     top->level++, top->lock_start_time = 0) {
			int lock_held = btree_node_locked_type(path, top->level);

			if (lock_held == BTREE_NODE_UNLOCKED)
				continue;

			b = &READ_ONCE(path->l[top->level].b)->c;

342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
			if (IS_ERR_OR_NULL(b)) {
				/*
				 * If we get here, it means we raced with the
				 * other thread updating its btree_path
				 * structures - which means it can't be blocked
				 * waiting on a lock:
				 */
				if (!lock_graph_remove_non_waiters(&g)) {
					/*
					 * If lock_graph_remove_non_waiters()
					 * didn't do anything, it must be
					 * because we're being called by debugfs
					 * checking for lock cycles, which
					 * invokes us on btree_transactions that
					 * aren't actually waiting on anything.
					 * Just bail out:
					 */
					lock_graph_pop_all(&g);
				}

362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
				goto next;
			}

			if (list_empty_careful(&b->lock.wait_list))
				continue;

			raw_spin_lock(&b->lock.wait_lock);
			list_for_each_entry(trans, &b->lock.wait_list, locking_wait.list) {
				BUG_ON(b != trans->locking);

				if (top->lock_start_time &&
				    time_after_eq64(top->lock_start_time, trans->locking_wait.start_time))
					continue;

				top->lock_start_time = trans->locking_wait.start_time;

				/* Don't check for self deadlock: */
				if (trans == top->trans ||
				    !lock_type_conflicts(lock_held, trans->locking_wait.lock_want))
					continue;

383
				closure_get(&trans->ref);
384 385
				raw_spin_unlock(&b->lock.wait_lock);

386
				ret = lock_graph_descend(&g, trans, cycle);
387
				if (ret)
388
					goto out;
389 390 391 392 393 394
				goto next;

			}
			raw_spin_unlock(&b->lock.wait_lock);
		}
	}
395
up:
396 397
	if (g.nr > 1 && cycle)
		print_chain(cycle, &g);
398
	lock_graph_up(&g);
399
	goto next;
400 401 402 403 404
out:
	if (cycle)
		--cycle->atomic;
	rcu_read_unlock();
	return ret;
405 406 407 408 409 410
}

int bch2_six_check_for_deadlock(struct six_lock *lock, void *p)
{
	struct btree_trans *trans = p;

411
	return bch2_check_for_deadlock(trans, NULL);
412 413
}

414
int __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree_path *path,
415 416
				 struct btree_bkey_cached_common *b,
				 bool lock_may_not_fail)
417
{
418
	int readers = bch2_btree_node_lock_counts(trans, NULL, b, b->level).n[SIX_LOCK_read];
419
	int ret;
420 421 422 423 424 425 426

	/*
	 * Must drop our read locks before calling six_lock_write() -
	 * six_unlock() won't do wakeups until the reader count
	 * goes to 0, and it's safe because we have the node intent
	 * locked:
	 */
427
	six_lock_readers_add(&b->lock, -readers);
428 429
	ret = __btree_node_lock_nopath(trans, b, SIX_LOCK_write,
				       lock_may_not_fail, _RET_IP_);
430
	six_lock_readers_add(&b->lock, readers);
431

432
	if (ret)
433
		mark_btree_node_locked_noreset(path, b->level, BTREE_NODE_INTENT_LOCKED);
434

435
	return ret;
436 437
}

438 439 440 441
void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
				       struct btree_path *path,
				       struct btree_bkey_cached_common *b)
{
442
	int ret = __btree_node_lock_write(trans, path, b, true);
443 444 445
	BUG_ON(ret);
}

446
/* relock */
447

448 449
static inline bool btree_path_get_locks(struct btree_trans *trans,
					struct btree_path *path,
450 451
					bool upgrade,
					struct get_locks_fail *f)
452
{
453 454
	unsigned l = path->level;
	int fail_idx = -1;
455

456 457 458 459 460 461
	do {
		if (!btree_path_node(path, l))
			break;

		if (!(upgrade
		      ? bch2_btree_node_upgrade(trans, path, l)
462 463 464 465 466 467 468 469
		      : bch2_btree_node_relock(trans, path, l))) {
			fail_idx	= l;

			if (f) {
				f->l	= l;
				f->b	= path->l[l].b;
			}
		}
470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488

		l++;
	} while (l < path->locks_want);

	/*
	 * When we fail to get a lock, we have to ensure that any child nodes
	 * can't be relocked so bch2_btree_path_traverse has to walk back up to
	 * the node that we failed to relock:
	 */
	if (fail_idx >= 0) {
		__bch2_btree_path_unlock(trans, path);
		btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);

		do {
			path->l[fail_idx].b = upgrade
				? ERR_PTR(-BCH_ERR_no_btree_node_upgrade)
				: ERR_PTR(-BCH_ERR_no_btree_node_relock);
			--fail_idx;
		} while (fail_idx >= 0);
489 490
	}

491 492 493 494
	if (path->uptodate == BTREE_ITER_NEED_RELOCK)
		path->uptodate = BTREE_ITER_UPTODATE;

	return path->uptodate < BTREE_ITER_NEED_RELOCK;
495 496
}

497
bool __bch2_btree_node_relock(struct btree_trans *trans,
498 499
			      struct btree_path *path, unsigned level,
			      bool trace)
500
{
501 502
	struct btree *b = btree_path_node(path, level);
	int want = __btree_lock_want(path, level);
503

504 505 506 507 508
	if (race_fault())
		goto fail;

	if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) ||
	    (btree_node_lock_seq_matches(path, b, level) &&
509
	     btree_node_lock_increment(trans, &b->c, level, want))) {
510 511 512 513
		mark_btree_node_locked(trans, path, level, want);
		return true;
	}
fail:
514
	if (trace && !trans->notrace_relock_fail)
515
		trace_and_count(trans->c, btree_path_relock_fail, trans, _RET_IP_, path, level);
516
	return false;
517 518
}

519 520 521 522 523 524
/* upgrade */

bool bch2_btree_node_upgrade(struct btree_trans *trans,
			     struct btree_path *path, unsigned level)
{
	struct btree *b = path->l[level].b;
525
	struct six_lock_count count = bch2_btree_node_lock_counts(trans, path, &b->c, level);
526 527 528 529 530 531 532 533 534 535 536 537 538

	if (!is_btree_node(path, level))
		return false;

	switch (btree_lock_want(path, level)) {
	case BTREE_NODE_UNLOCKED:
		BUG_ON(btree_node_locked(path, level));
		return true;
	case BTREE_NODE_READ_LOCKED:
		BUG_ON(btree_node_intent_locked(path, level));
		return bch2_btree_node_relock(trans, path, level);
	case BTREE_NODE_INTENT_LOCKED:
		break;
539 540
	case BTREE_NODE_WRITE_LOCKED:
		BUG();
541 542 543 544 545 546 547 548
	}

	if (btree_node_intent_locked(path, level))
		return true;

	if (race_fault())
		return false;

549 550 551 552 553 554 555 556 557 558 559 560 561
	if (btree_node_locked(path, level)) {
		bool ret;

		six_lock_readers_add(&b->c.lock, -count.n[SIX_LOCK_read]);
		ret = six_lock_tryupgrade(&b->c.lock);
		six_lock_readers_add(&b->c.lock, count.n[SIX_LOCK_read]);

		if (ret)
			goto success;
	} else {
		if (six_relock_type(&b->c.lock, SIX_LOCK_intent, path->l[level].lock_seq))
			goto success;
	}
562

563 564 565 566
	/*
	 * Do we already have an intent lock via another path? If so, just bump
	 * lock count:
	 */
567
	if (btree_node_lock_seq_matches(path, b, level) &&
568
	    btree_node_lock_increment(trans, &b->c, level, BTREE_NODE_INTENT_LOCKED)) {
569 570 571 572
		btree_node_unlock(trans, path, level);
		goto success;
	}

573
	trace_and_count(trans->c, btree_path_upgrade_fail, trans, _RET_IP_, path, level);
574 575
	return false;
success:
576
	mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED);
577 578
	return true;
}
579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595

/* Btree path locking: */

/*
 * Only for btree_cache.c - only relocks intent locks
 */
int bch2_btree_path_relock_intent(struct btree_trans *trans,
				  struct btree_path *path)
{
	unsigned l;

	for (l = path->level;
	     l < path->locks_want && btree_path_node(path, l);
	     l++) {
		if (!bch2_btree_node_relock(trans, path, l)) {
			__bch2_btree_path_unlock(trans, path);
			btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
596
			trace_and_count(trans->c, trans_restart_relock_path_intent, trans, _RET_IP_, path);
597 598 599 600 601 602 603 604
			return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path_intent);
		}
	}

	return 0;
}

__flatten
605
bool bch2_btree_path_relock_norestart(struct btree_trans *trans, struct btree_path *path)
606
{
607 608
	struct get_locks_fail f;

Kent Overstreet's avatar
Kent Overstreet committed
609 610 611
	bool ret = btree_path_get_locks(trans, path, false, &f);
	bch2_trans_verify_locks(trans);
	return ret;
612 613 614 615 616
}

int __bch2_btree_path_relock(struct btree_trans *trans,
			struct btree_path *path, unsigned long trace_ip)
{
617
	if (!bch2_btree_path_relock_norestart(trans, path)) {
618
		trace_and_count(trans->c, trans_restart_relock_path, trans, trace_ip, path);
619 620 621 622 623 624 625 626
		return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path);
	}

	return 0;
}

bool bch2_btree_path_upgrade_noupgrade_sibs(struct btree_trans *trans,
			       struct btree_path *path,
627 628
			       unsigned new_locks_want,
			       struct get_locks_fail *f)
629 630 631 632 633
{
	EBUG_ON(path->locks_want >= new_locks_want);

	path->locks_want = new_locks_want;

Kent Overstreet's avatar
Kent Overstreet committed
634 635 636
	bool ret = btree_path_get_locks(trans, path, true, f);
	bch2_trans_verify_locks(trans);
	return ret;
637 638 639 640
}

bool __bch2_btree_path_upgrade(struct btree_trans *trans,
			       struct btree_path *path,
641 642
			       unsigned new_locks_want,
			       struct get_locks_fail *f)
643
{
Kent Overstreet's avatar
Kent Overstreet committed
644 645 646
	bool ret = bch2_btree_path_upgrade_noupgrade_sibs(trans, path, new_locks_want, f);
	if (ret)
		goto out;
647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666

	/*
	 * XXX: this is ugly - we'd prefer to not be mucking with other
	 * iterators in the btree_trans here.
	 *
	 * On failure to upgrade the iterator, setting iter->locks_want and
	 * calling get_locks() is sufficient to make bch2_btree_path_traverse()
	 * get the locks we want on transaction restart.
	 *
	 * But if this iterator was a clone, on transaction restart what we did
	 * to this iterator isn't going to be preserved.
	 *
	 * Possibly we could add an iterator field for the parent iterator when
	 * an iterator is a copy - for now, we'll just upgrade any other
	 * iterators with the same btree id.
	 *
	 * The code below used to be needed to ensure ancestor nodes get locked
	 * before interior nodes - now that's handled by
	 * bch2_btree_path_traverse_all().
	 */
667 668 669 670 671
	if (!path->cached && !trans->in_traverse_all) {
		struct btree_path *linked;
		unsigned i;

		trans_for_each_path(trans, linked, i)
672 673 674 675 676
			if (linked != path &&
			    linked->cached == path->cached &&
			    linked->btree_id == path->btree_id &&
			    linked->locks_want < new_locks_want) {
				linked->locks_want = new_locks_want;
677
				btree_path_get_locks(trans, linked, true, NULL);
678
			}
679
	}
Kent Overstreet's avatar
Kent Overstreet committed
680 681 682
out:
	bch2_trans_verify_locks(trans);
	return ret;
683 684 685 686 687 688
}

void __bch2_btree_path_downgrade(struct btree_trans *trans,
				 struct btree_path *path,
				 unsigned new_locks_want)
{
689
	unsigned l, old_locks_want = path->locks_want;
690

691 692 693
	if (trans->restarted)
		return;

694 695 696 697 698
	EBUG_ON(path->locks_want < new_locks_want);

	path->locks_want = new_locks_want;

	while (path->nodes_locked &&
699
	       (l = btree_path_highest_level_locked(path)) >= path->locks_want) {
700 701 702 703 704
		if (l > path->level) {
			btree_node_unlock(trans, path, l);
		} else {
			if (btree_node_intent_locked(path, l)) {
				six_lock_downgrade(&path->l[l].b->c.lock);
705
				mark_btree_node_locked_noreset(path, l, BTREE_NODE_READ_LOCKED);
706 707 708 709 710 711
			}
			break;
		}
	}

	bch2_btree_path_verify_locks(path);
712

713
	trace_path_downgrade(trans, _RET_IP_, path, old_locks_want);
714 715
}

716 717
/* Btree transaction locking: */

718 719 720
void bch2_trans_downgrade(struct btree_trans *trans)
{
	struct btree_path *path;
721
	unsigned i;
722

723 724 725
	if (trans->restarted)
		return;

726
	trans_for_each_path(trans, path, i)
727 728
		if (path->ref)
			bch2_btree_path_downgrade(trans, path);
729 730
}

731
static inline void __bch2_trans_unlock(struct btree_trans *trans)
732 733
{
	struct btree_path *path;
734
	unsigned i;
735

736 737 738
	trans_for_each_path(trans, path, i)
		__bch2_btree_path_unlock(trans, path);
}
739

740 741 742 743 744
static noinline __cold int bch2_trans_relock_fail(struct btree_trans *trans, struct btree_path *path,
						  struct get_locks_fail *f, bool trace)
{
	if (!trace)
		goto out;
745

746 747
	if (trace_trans_restart_relock_enabled()) {
		struct printbuf buf = PRINTBUF;
748

749 750 751 752 753 754
		bch2_bpos_to_text(&buf, path->pos);
		prt_printf(&buf, " l=%u seq=%u node seq=", f->l, path->l[f->l].lock_seq);
		if (IS_ERR_OR_NULL(f->b)) {
			prt_str(&buf, bch2_err_str(PTR_ERR(f->b)));
		} else {
			prt_printf(&buf, "%u", f->b->c.lock.seq);
755

756 757 758 759 760 761
			struct six_lock_count c =
				bch2_btree_node_lock_counts(trans, NULL, &f->b->c, f->l);
			prt_printf(&buf, " self locked %u.%u.%u", c.n[0], c.n[1], c.n[2]);

			c = six_lock_counts(&f->b->c.lock);
			prt_printf(&buf, " total locked %u.%u.%u", c.n[0], c.n[1], c.n[2]);
762
		}
763 764 765

		trace_trans_restart_relock(trans, _RET_IP_, buf.buf);
		printbuf_exit(&buf);
766 767
	}

768 769 770 771 772
	count_event(trans->c, trans_restart_relock);
out:
	__bch2_trans_unlock(trans);
	bch2_trans_verify_locks(trans);
	return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
773 774
}

775
static inline int __bch2_trans_relock(struct btree_trans *trans, bool trace)
776
{
777
	bch2_trans_verify_locks(trans);
778 779 780

	if (unlikely(trans->restarted))
		return -((int) trans->restarted);
Kent Overstreet's avatar
Kent Overstreet committed
781 782
	if (unlikely(trans->locked))
		goto out;
783

784 785 786
	struct btree_path *path;
	unsigned i;

787 788 789
	trans_for_each_path(trans, path, i) {
		struct get_locks_fail f;

790
		if (path->should_be_locked &&
791 792 793 794
		    !btree_path_get_locks(trans, path, false, &f))
			return bch2_trans_relock_fail(trans, path, &f, trace);
	}

Kent Overstreet's avatar
Kent Overstreet committed
795 796
	trans->locked = true;
out:
797
	bch2_trans_verify_locks(trans);
798 799 800
	return 0;
}

801
int bch2_trans_relock(struct btree_trans *trans)
802
{
803 804
	return __bch2_trans_relock(trans, true);
}
805

806 807 808
int bch2_trans_relock_notrace(struct btree_trans *trans)
{
	return __bch2_trans_relock(trans, false);
809 810
}

811
void bch2_trans_unlock_noassert(struct btree_trans *trans)
812
{
813
	__bch2_trans_unlock(trans);
Kent Overstreet's avatar
Kent Overstreet committed
814 815 816

	trans->locked = false;
	trans->last_unlock_ip = _RET_IP_;
817
}
818

819 820 821
void bch2_trans_unlock(struct btree_trans *trans)
{
	__bch2_trans_unlock(trans);
Kent Overstreet's avatar
Kent Overstreet committed
822 823 824

	trans->locked = false;
	trans->last_unlock_ip = _RET_IP_;
825
}
826

827 828 829 830 831 832
void bch2_trans_unlock_long(struct btree_trans *trans)
{
	bch2_trans_unlock(trans);
	bch2_trans_srcu_unlock(trans);
}

833 834 835
int __bch2_trans_mutex_lock(struct btree_trans *trans,
			    struct mutex *lock)
{
836
	int ret = drop_locks_do(trans, (mutex_lock(lock), 0));
837 838 839 840 841 842

	if (ret)
		mutex_unlock(lock);
	return ret;
}

843 844 845 846 847 848
/* Debug */

#ifdef CONFIG_BCACHEFS_DEBUG

void bch2_btree_path_verify_locks(struct btree_path *path)
{
849 850 851 852 853
	/*
	 * A path may be uptodate and yet have nothing locked if and only if
	 * there is no node at path->level, which generally means we were
	 * iterating over all nodes and got to the end of the btree
	 */
854 855 856 857 858
	BUG_ON(path->uptodate == BTREE_ITER_UPTODATE &&
	       btree_path_node(path, path->level) &&
	       !path->nodes_locked);

	if (!path->nodes_locked)
859 860
		return;

861
	for (unsigned l = 0; l < BTREE_MAX_DEPTH; l++) {
862 863 864 865 866 867 868 869 870 871
		int want = btree_lock_want(path, l);
		int have = btree_node_locked_type(path, l);

		BUG_ON(!is_btree_node(path, l) && have != BTREE_NODE_UNLOCKED);

		BUG_ON(is_btree_node(path, l) &&
		       (want == BTREE_NODE_UNLOCKED ||
			have != BTREE_NODE_WRITE_LOCKED) &&
		       want != have);
	}
872 873
}

Kent Overstreet's avatar
Kent Overstreet committed
874 875 876 877 878 879 880 881 882 883 884
static bool bch2_trans_locked(struct btree_trans *trans)
{
	struct btree_path *path;
	unsigned i;

	trans_for_each_path(trans, path, i)
		if (path->nodes_locked)
			return true;
	return false;
}

885 886
void bch2_trans_verify_locks(struct btree_trans *trans)
{
887 888 889 890 891
	if (!trans->locked) {
		BUG_ON(bch2_trans_locked(trans));
		return;
	}

892
	struct btree_path *path;
893
	unsigned i;
894

895
	trans_for_each_path(trans, path, i)
896 897 898 899
		bch2_btree_path_verify_locks(path);
}

#endif