super.c 62.5 KB
Newer Older
Coly Li's avatar
Coly Li committed
1
// SPDX-License-Identifier: GPL-2.0
2 3 4 5 6 7 8 9 10 11 12
/*
 * bcache setup/teardown code, and some metadata io - read a superblock and
 * figure out what to do with it.
 *
 * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com>
 * Copyright 2012 Google, Inc.
 */

#include "bcache.h"
#include "btree.h"
#include "debug.h"
13
#include "extents.h"
14
#include "request.h"
15
#include "writeback.h"
16

17
#include <linux/blkdev.h>
18 19 20
#include <linux/buffer_head.h>
#include <linux/debugfs.h>
#include <linux/genhd.h>
21
#include <linux/idr.h>
22
#include <linux/kthread.h>
23 24 25 26 27
#include <linux/module.h>
#include <linux/random.h>
#include <linux/reboot.h>
#include <linux/sysfs.h>

28 29 30
unsigned int bch_cutoff_writeback;
unsigned int bch_cutoff_writeback_sync;

31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
static const char bcache_magic[] = {
	0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca,
	0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81
};

static const char invalid_uuid[] = {
	0xa0, 0x3e, 0xf8, 0xed, 0x3e, 0xe1, 0xb8, 0x78,
	0xc8, 0x50, 0xfc, 0x5e, 0xcb, 0x16, 0xcd, 0x99
};

static struct kobject *bcache_kobj;
struct mutex bch_register_lock;
LIST_HEAD(bch_cache_sets);
static LIST_HEAD(uncached_devices);

46
static int bcache_major;
47
static DEFINE_IDA(bcache_device_idx);
48 49
static wait_queue_head_t unregister_wait;
struct workqueue_struct *bcache_wq;
50
struct workqueue_struct *bch_journal_wq;
51 52

#define BTREE_MAX_PAGES		(256 * 1024 / PAGE_SIZE)
53 54 55 56
/* limitation of partitions number on single bcache device */
#define BCACHE_MINORS		128
/* limitation of bcache devices number on single system */
#define BCACHE_DEVICE_IDX_MAX	((1U << MINORBITS)/BCACHE_MINORS)
57 58 59 60 61 62 63 64 65

/* Superblock */

static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
			      struct page **res)
{
	const char *err;
	struct cache_sb *s;
	struct buffer_head *bh = __bread(bdev, 1, SB_SIZE);
66
	unsigned int i;
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108

	if (!bh)
		return "IO error";

	s = (struct cache_sb *) bh->b_data;

	sb->offset		= le64_to_cpu(s->offset);
	sb->version		= le64_to_cpu(s->version);

	memcpy(sb->magic,	s->magic, 16);
	memcpy(sb->uuid,	s->uuid, 16);
	memcpy(sb->set_uuid,	s->set_uuid, 16);
	memcpy(sb->label,	s->label, SB_LABEL_SIZE);

	sb->flags		= le64_to_cpu(s->flags);
	sb->seq			= le64_to_cpu(s->seq);
	sb->last_mount		= le32_to_cpu(s->last_mount);
	sb->first_bucket	= le16_to_cpu(s->first_bucket);
	sb->keys		= le16_to_cpu(s->keys);

	for (i = 0; i < SB_JOURNAL_BUCKETS; i++)
		sb->d[i] = le64_to_cpu(s->d[i]);

	pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u",
		 sb->version, sb->flags, sb->seq, sb->keys);

	err = "Not a bcache superblock";
	if (sb->offset != SB_SECTOR)
		goto err;

	if (memcmp(sb->magic, bcache_magic, 16))
		goto err;

	err = "Too many journal buckets";
	if (sb->keys > SB_JOURNAL_BUCKETS)
		goto err;

	err = "Bad checksum";
	if (s->csum != csum_set(s))
		goto err;

	err = "Bad UUID";
109
	if (bch_is_zero(sb->uuid, 16))
110 111
		goto err;

112 113 114 115 116 117
	sb->block_size	= le16_to_cpu(s->block_size);

	err = "Superblock block size smaller than device block size";
	if (sb->block_size << 9 < bdev_logical_block_size(bdev))
		goto err;

118 119 120 121 122 123 124 125 126 127
	switch (sb->version) {
	case BCACHE_SB_VERSION_BDEV:
		sb->data_offset	= BDEV_DATA_START_DEFAULT;
		break;
	case BCACHE_SB_VERSION_BDEV_WITH_OFFSET:
		sb->data_offset	= le64_to_cpu(s->data_offset);

		err = "Bad data offset";
		if (sb->data_offset < BDEV_DATA_START_DEFAULT)
			goto err;
128

129 130 131 132 133
		break;
	case BCACHE_SB_VERSION_CDEV:
	case BCACHE_SB_VERSION_CDEV_WITH_UUID:
		sb->nbuckets	= le64_to_cpu(s->nbuckets);
		sb->bucket_size	= le16_to_cpu(s->bucket_size);
134

135 136
		sb->nr_in_set	= le16_to_cpu(s->nr_in_set);
		sb->nr_this_dev	= le16_to_cpu(s->nr_this_dev);
137

138 139 140
		err = "Too many buckets";
		if (sb->nbuckets > LONG_MAX)
			goto err;
141

142 143 144
		err = "Not enough buckets";
		if (sb->nbuckets < 1 << 7)
			goto err;
145

146 147 148 149 150 151
		err = "Bad block/bucket size";
		if (!is_power_of_2(sb->block_size) ||
		    sb->block_size > PAGE_SECTORS ||
		    !is_power_of_2(sb->bucket_size) ||
		    sb->bucket_size < PAGE_SECTORS)
			goto err;
152

153
		err = "Invalid superblock: device too small";
154 155
		if (get_capacity(bdev->bd_disk) <
		    sb->bucket_size * sb->nbuckets)
156
			goto err;
157

158 159 160
		err = "Bad UUID";
		if (bch_is_zero(sb->set_uuid, 16))
			goto err;
161

162 163 164 165
		err = "Bad cache device number in set";
		if (!sb->nr_in_set ||
		    sb->nr_in_set <= sb->nr_this_dev ||
		    sb->nr_in_set > MAX_CACHES_PER_SET)
166 167
			goto err;

168 169 170 171
		err = "Journal buckets not sequential";
		for (i = 0; i < sb->keys; i++)
			if (sb->d[i] != sb->first_bucket + i)
				goto err;
172

173 174 175 176 177 178 179 180 181 182 183
		err = "Too many journal buckets";
		if (sb->first_bucket + sb->keys > sb->nbuckets)
			goto err;

		err = "Invalid superblock: first bucket comes before end of super";
		if (sb->first_bucket * sb->bucket_size < 16)
			goto err;

		break;
	default:
		err = "Unsupported superblock version";
184
		goto err;
185 186
	}

187
	sb->last_mount = (u32)ktime_get_real_seconds();
188 189 190 191 192 193 194 195 196
	err = NULL;

	get_page(bh->b_page);
	*res = bh->b_page;
err:
	put_bh(bh);
	return err;
}

197
static void write_bdev_super_endio(struct bio *bio)
198 199 200 201
{
	struct cached_dev *dc = bio->bi_private;
	/* XXX: error checking */

202
	closure_put(&dc->sb_write);
203 204 205 206
}

static void __write_super(struct cache_sb *sb, struct bio *bio)
{
207
	struct cache_sb *out = page_address(bio_first_page_all(bio));
208
	unsigned int i;
209

210 211
	bio->bi_iter.bi_sector	= SB_SECTOR;
	bio->bi_iter.bi_size	= SB_SIZE;
212
	bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META);
213
	bch_bio_map(bio, NULL);
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236

	out->offset		= cpu_to_le64(sb->offset);
	out->version		= cpu_to_le64(sb->version);

	memcpy(out->uuid,	sb->uuid, 16);
	memcpy(out->set_uuid,	sb->set_uuid, 16);
	memcpy(out->label,	sb->label, SB_LABEL_SIZE);

	out->flags		= cpu_to_le64(sb->flags);
	out->seq		= cpu_to_le64(sb->seq);

	out->last_mount		= cpu_to_le32(sb->last_mount);
	out->first_bucket	= cpu_to_le16(sb->first_bucket);
	out->keys		= cpu_to_le16(sb->keys);

	for (i = 0; i < sb->keys; i++)
		out->d[i] = cpu_to_le64(sb->d[i]);

	out->csum = csum_set(out);

	pr_debug("ver %llu, flags %llu, seq %llu",
		 sb->version, sb->flags, sb->seq);

237
	submit_bio(bio);
238 239
}

240 241 242 243 244 245 246
static void bch_write_bdev_super_unlock(struct closure *cl)
{
	struct cached_dev *dc = container_of(cl, struct cached_dev, sb_write);

	up(&dc->sb_write_mutex);
}

247 248
void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
{
249
	struct closure *cl = &dc->sb_write;
250 251
	struct bio *bio = &dc->sb_bio;

252 253
	down(&dc->sb_write_mutex);
	closure_init(cl, parent);
254 255

	bio_reset(bio);
256
	bio_set_dev(bio, dc->bdev);
257 258 259 260
	bio->bi_end_io	= write_bdev_super_endio;
	bio->bi_private = dc;

	closure_get(cl);
261
	/* I/O request sent to backing device */
262 263
	__write_super(&dc->sb, bio);

264
	closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
265 266
}

267
static void write_super_endio(struct bio *bio)
268 269 270
{
	struct cache *ca = bio->bi_private;

271 272 273
	/* is_read = 0 */
	bch_count_io_errors(ca, bio->bi_status, 0,
			    "writing superblock");
274 275 276 277 278 279 280 281
	closure_put(&ca->set->sb_write);
}

static void bcache_write_super_unlock(struct closure *cl)
{
	struct cache_set *c = container_of(cl, struct cache_set, sb_write);

	up(&c->sb_write_mutex);
282 283 284 285
}

void bcache_write_super(struct cache_set *c)
{
286
	struct closure *cl = &c->sb_write;
287
	struct cache *ca;
288
	unsigned int i;
289

290 291
	down(&c->sb_write_mutex);
	closure_init(cl, &c->cl);
292 293 294 295 296 297

	c->sb.seq++;

	for_each_cache(ca, c, i) {
		struct bio *bio = &ca->sb_bio;

298
		ca->sb.version		= BCACHE_SB_VERSION_CDEV_WITH_UUID;
299 300 301 302 303 304
		ca->sb.seq		= c->sb.seq;
		ca->sb.last_mount	= c->sb.last_mount;

		SET_CACHE_SYNC(&ca->sb, CACHE_SYNC(&c->sb));

		bio_reset(bio);
305
		bio_set_dev(bio, ca->bdev);
306 307 308 309 310 311 312
		bio->bi_end_io	= write_super_endio;
		bio->bi_private = ca;

		closure_get(cl);
		__write_super(&ca->sb, bio);
	}

313
	closure_return_with_destructor(cl, bcache_write_super_unlock);
314 315 316 317
}

/* UUID io */

318
static void uuid_endio(struct bio *bio)
319 320
{
	struct closure *cl = bio->bi_private;
321
	struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
322

323
	cache_set_err_on(bio->bi_status, c, "accessing uuids");
324 325 326 327
	bch_bbio_free(bio, c);
	closure_put(cl);
}

328 329 330 331 332 333 334
static void uuid_io_unlock(struct closure *cl)
{
	struct cache_set *c = container_of(cl, struct cache_set, uuid_write);

	up(&c->uuid_write_mutex);
}

335
static void uuid_io(struct cache_set *c, int op, unsigned long op_flags,
336 337
		    struct bkey *k, struct closure *parent)
{
338
	struct closure *cl = &c->uuid_write;
339
	struct uuid_entry *u;
340
	unsigned int i;
341
	char buf[80];
342 343

	BUG_ON(!parent);
344 345
	down(&c->uuid_write_mutex);
	closure_init(cl, parent);
346 347 348 349

	for (i = 0; i < KEY_PTRS(k); i++) {
		struct bio *bio = bch_bbio_alloc(c);

350
		bio->bi_opf = REQ_SYNC | REQ_META | op_flags;
351
		bio->bi_iter.bi_size = KEY_SIZE(k) << 9;
352 353 354

		bio->bi_end_io	= uuid_endio;
		bio->bi_private = cl;
355
		bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
356
		bch_bio_map(bio, c->uuids);
357 358 359

		bch_submit_bbio(bio, c, k, i);

360
		if (op != REQ_OP_WRITE)
361 362 363
			break;
	}

364
	bch_extent_to_text(buf, sizeof(buf), k);
365
	pr_debug("%s UUIDs at %s", op == REQ_OP_WRITE ? "wrote" : "read", buf);
366 367

	for (u = c->uuids; u < c->uuids + c->nr_uuids; u++)
368
		if (!bch_is_zero(u->uuid, 16))
369 370 371 372
			pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u",
				 u - c->uuids, u->uuid, u->label,
				 u->first_reg, u->last_reg, u->invalidated);

373
	closure_return_with_destructor(cl, uuid_io_unlock);
374 375 376 377 378 379
}

static char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl)
{
	struct bkey *k = &j->uuid_bucket;

380
	if (__bch_btree_ptr_invalid(c, k))
381 382 383
		return "bad uuid pointer";

	bkey_copy(&c->uuid_bucket, k);
384
	uuid_io(c, REQ_OP_READ, 0, k, cl);
385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420

	if (j->version < BCACHE_JSET_VERSION_UUIDv1) {
		struct uuid_entry_v0	*u0 = (void *) c->uuids;
		struct uuid_entry	*u1 = (void *) c->uuids;
		int i;

		closure_sync(cl);

		/*
		 * Since the new uuid entry is bigger than the old, we have to
		 * convert starting at the highest memory address and work down
		 * in order to do it in place
		 */

		for (i = c->nr_uuids - 1;
		     i >= 0;
		     --i) {
			memcpy(u1[i].uuid,	u0[i].uuid, 16);
			memcpy(u1[i].label,	u0[i].label, 32);

			u1[i].first_reg		= u0[i].first_reg;
			u1[i].last_reg		= u0[i].last_reg;
			u1[i].invalidated	= u0[i].invalidated;

			u1[i].flags	= 0;
			u1[i].sectors	= 0;
		}
	}

	return NULL;
}

static int __uuid_write(struct cache_set *c)
{
	BKEY_PADDED(key) k;
	struct closure cl;
421
	struct cache *ca;
422

423
	closure_init_stack(&cl);
424 425
	lockdep_assert_held(&bch_register_lock);

426
	if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, true))
427 428 429
		return 1;

	SET_KEY_SIZE(&k.key, c->sb.bucket_size);
430
	uuid_io(c, REQ_OP_WRITE, 0, &k.key, &cl);
431 432
	closure_sync(&cl);

433 434 435 436
	/* Only one bucket used for uuid write */
	ca = PTR_CACHE(c, &k.key, 0);
	atomic_long_add(ca->sb.bucket_size, &ca->meta_sectors_written);

437
	bkey_copy(&c->uuid_bucket, &k.key);
438
	bkey_put(c, &k.key);
439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466
	return 0;
}

int bch_uuid_write(struct cache_set *c)
{
	int ret = __uuid_write(c);

	if (!ret)
		bch_journal_meta(c, NULL);

	return ret;
}

static struct uuid_entry *uuid_find(struct cache_set *c, const char *uuid)
{
	struct uuid_entry *u;

	for (u = c->uuids;
	     u < c->uuids + c->nr_uuids; u++)
		if (!memcmp(u->uuid, uuid, 16))
			return u;

	return NULL;
}

static struct uuid_entry *uuid_find_empty(struct cache_set *c)
{
	static const char zero_uuid[16] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
467

468 469 470 471 472 473 474
	return uuid_find(c, zero_uuid);
}

/*
 * Bucket priorities/gens:
 *
 * For each bucket, we store on disk its
Coly Li's avatar
Coly Li committed
475 476
 *   8 bit gen
 *  16 bit priority
477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497
 *
 * See alloc.c for an explanation of the gen. The priority is used to implement
 * lru (and in the future other) cache replacement policies; for most purposes
 * it's just an opaque integer.
 *
 * The gens and the priorities don't have a whole lot to do with each other, and
 * it's actually the gens that must be written out at specific times - it's no
 * big deal if the priorities don't get written, if we lose them we just reuse
 * buckets in suboptimal order.
 *
 * On disk they're stored in a packed array, and in as many buckets are required
 * to fit them all. The buckets we use to store them form a list; the journal
 * header points to the first bucket, the first bucket points to the second
 * bucket, et cetera.
 *
 * This code is used by the allocation code; periodically (whenever it runs out
 * of buckets to allocate from) the allocation code will invalidate some
 * buckets, but it can't use those buckets until their new gens are safely on
 * disk.
 */

498
static void prio_endio(struct bio *bio)
499 500 501
{
	struct cache *ca = bio->bi_private;

502
	cache_set_err_on(bio->bi_status, ca->set, "accessing priorities");
503 504 505 506
	bch_bbio_free(bio, ca->set);
	closure_put(&ca->prio);
}

507 508
static void prio_io(struct cache *ca, uint64_t bucket, int op,
		    unsigned long op_flags)
509 510 511 512 513 514
{
	struct closure *cl = &ca->prio;
	struct bio *bio = bch_bbio_alloc(ca->set);

	closure_init_stack(cl);

515
	bio->bi_iter.bi_sector	= bucket * ca->sb.bucket_size;
516
	bio_set_dev(bio, ca->bdev);
517
	bio->bi_iter.bi_size	= bucket_bytes(ca);
518 519 520

	bio->bi_end_io	= prio_endio;
	bio->bi_private = ca;
521
	bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
522
	bch_bio_map(bio, ca->disk_buckets);
523

524
	closure_bio_submit(ca->set, bio, &ca->prio);
525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542
	closure_sync(cl);
}

void bch_prio_write(struct cache *ca)
{
	int i;
	struct bucket *b;
	struct closure cl;

	closure_init_stack(&cl);

	lockdep_assert_held(&ca->set->bucket_lock);

	ca->disk_buckets->seq++;

	atomic_long_add(ca->sb.bucket_size * prio_buckets(ca),
			&ca->meta_sectors_written);

543 544
	//pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free),
	//	 fifo_used(&ca->free_inc), fifo_used(&ca->unused));
545 546 547 548

	for (i = prio_buckets(ca) - 1; i >= 0; --i) {
		long bucket;
		struct prio_set *p = ca->disk_buckets;
549 550
		struct bucket_disk *d = p->data;
		struct bucket_disk *end = d + prios_per_bucket(ca);
551 552 553 554 555 556 557 558 559

		for (b = ca->buckets + i * prios_per_bucket(ca);
		     b < ca->buckets + ca->sb.nbuckets && d < end;
		     b++, d++) {
			d->prio = cpu_to_le16(b->prio);
			d->gen = b->gen;
		}

		p->next_bucket	= ca->prio_buckets[i + 1];
560
		p->magic	= pset_magic(&ca->sb);
561
		p->csum		= bch_crc64(&p->magic, bucket_bytes(ca) - 8);
562

563
		bucket = bch_bucket_alloc(ca, RESERVE_PRIO, true);
564 565 566
		BUG_ON(bucket == -1);

		mutex_unlock(&ca->set->bucket_lock);
567
		prio_io(ca, bucket, REQ_OP_WRITE, 0);
568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584
		mutex_lock(&ca->set->bucket_lock);

		ca->prio_buckets[i] = bucket;
		atomic_dec_bug(&ca->buckets[bucket].pin);
	}

	mutex_unlock(&ca->set->bucket_lock);

	bch_journal_meta(ca->set, &cl);
	closure_sync(&cl);

	mutex_lock(&ca->set->bucket_lock);

	/*
	 * Don't want the old priorities to get garbage collected until after we
	 * finish writing the new ones, and they're journalled
	 */
585 586 587 588 589
	for (i = 0; i < prio_buckets(ca); i++) {
		if (ca->prio_last_buckets[i])
			__bch_bucket_free(ca,
				&ca->buckets[ca->prio_last_buckets[i]]);

590
		ca->prio_last_buckets[i] = ca->prio_buckets[i];
591
	}
592 593 594 595 596 597 598
}

static void prio_read(struct cache *ca, uint64_t bucket)
{
	struct prio_set *p = ca->disk_buckets;
	struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d;
	struct bucket *b;
599
	unsigned int bucket_nr = 0;
600 601 602 603 604 605 606 607 608

	for (b = ca->buckets;
	     b < ca->buckets + ca->sb.nbuckets;
	     b++, d++) {
		if (d == end) {
			ca->prio_buckets[bucket_nr] = bucket;
			ca->prio_last_buckets[bucket_nr] = bucket;
			bucket_nr++;

609
			prio_io(ca, bucket, REQ_OP_READ, 0);
610

611 612
			if (p->csum !=
			    bch_crc64(&p->magic, bucket_bytes(ca) - 8))
613 614
				pr_warn("bad csum reading priorities");

615
			if (p->magic != pset_magic(&ca->sb))
616 617 618 619 620 621 622
				pr_warn("bad magic reading priorities");

			bucket = p->next_bucket;
			d = p->data;
		}

		b->prio = le16_to_cpu(d->prio);
623
		b->gen = b->last_gc = d->gen;
624 625 626 627 628 629 630 631
	}
}

/* Bcache device */

static int open_dev(struct block_device *b, fmode_t mode)
{
	struct bcache_device *d = b->bd_disk->private_data;
632

633
	if (test_bit(BCACHE_DEV_CLOSING, &d->flags))
634 635 636 637 638 639
		return -ENXIO;

	closure_get(&d->cl);
	return 0;
}

640
static void release_dev(struct gendisk *b, fmode_t mode)
641 642
{
	struct bcache_device *d = b->private_data;
643

644 645 646 647 648 649 650
	closure_put(&d->cl);
}

static int ioctl_dev(struct block_device *b, fmode_t mode,
		     unsigned int cmd, unsigned long arg)
{
	struct bcache_device *d = b->bd_disk->private_data;
651

652 653 654 655 656 657 658 659 660 661 662 663
	return d->ioctl(d, mode, cmd, arg);
}

static const struct block_device_operations bcache_ops = {
	.open		= open_dev,
	.release	= release_dev,
	.ioctl		= ioctl_dev,
	.owner		= THIS_MODULE,
};

void bcache_device_stop(struct bcache_device *d)
{
664
	if (!test_and_set_bit(BCACHE_DEV_CLOSING, &d->flags))
665 666 667 668 669
		/*
		 * closure_fn set to
		 * - cached device: cached_dev_flush()
		 * - flash dev: flash_dev_flush()
		 */
670 671 672
		closure_queue(&d->cl);
}

673 674
static void bcache_device_unlink(struct bcache_device *d)
{
675
	lockdep_assert_held(&bch_register_lock);
676

677
	if (d->c && !test_and_set_bit(BCACHE_DEV_UNLINK_DONE, &d->flags)) {
678
		unsigned int i;
679
		struct cache *ca;
680

681 682 683 684 685 686
		sysfs_remove_link(&d->c->kobj, d->name);
		sysfs_remove_link(&d->kobj, "cache");

		for_each_cache(ca, d->c, i)
			bd_unlink_disk_holder(ca->bdev, d->disk);
	}
687 688 689 690 691
}

static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
			       const char *name)
{
692
	unsigned int i;
693 694 695 696 697 698 699 700 701 702 703
	struct cache *ca;

	for_each_cache(ca, d->c, i)
		bd_link_disk_holder(ca->bdev, d->disk);

	snprintf(d->name, BCACHEDEVNAME_SIZE,
		 "%s%u", name, d->id);

	WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") ||
	     sysfs_create_link(&c->kobj, &d->kobj, d->name),
	     "Couldn't create device <-> cache set symlinks");
704 705

	clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags);
706 707
}

708 709 710 711
static void bcache_device_detach(struct bcache_device *d)
{
	lockdep_assert_held(&bch_register_lock);

712 713
	atomic_dec(&d->c->attached_dev_nr);

714
	if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) {
715 716 717 718
		struct uuid_entry *u = d->c->uuids + d->id;

		SET_UUID_FLASH_ONLY(u, 0);
		memcpy(u->uuid, invalid_uuid, 16);
719
		u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds());
720 721 722
		bch_uuid_write(d->c);
	}

723
	bcache_device_unlink(d);
724

725 726 727 728 729 730
	d->c->devices[d->id] = NULL;
	closure_put(&d->c->caching);
	d->c = NULL;
}

static void bcache_device_attach(struct bcache_device *d, struct cache_set *c,
731
				 unsigned int id)
732 733 734 735 736
{
	d->id = id;
	d->c = c;
	c->devices[id] = d;

737 738 739
	if (id >= c->devices_max_used)
		c->devices_max_used = id + 1;

740 741 742
	closure_get(&c->caching);
}

743 744 745 746 747 748 749 750 751 752
static inline int first_minor_to_idx(int first_minor)
{
	return (first_minor/BCACHE_MINORS);
}

static inline int idx_to_first_minor(int idx)
{
	return (idx * BCACHE_MINORS);
}

753 754 755 756 757 758 759 760
static void bcache_device_free(struct bcache_device *d)
{
	lockdep_assert_held(&bch_register_lock);

	pr_info("%s stopped", d->disk->disk_name);

	if (d->c)
		bcache_device_detach(d);
761
	if (d->disk && d->disk->flags & GENHD_FL_UP)
762 763 764
		del_gendisk(d->disk);
	if (d->disk && d->disk->queue)
		blk_cleanup_queue(d->disk->queue);
765
	if (d->disk) {
766 767
		ida_simple_remove(&bcache_device_idx,
				  first_minor_to_idx(d->disk->first_minor));
768
		put_disk(d->disk);
769
	}
770

771
	bioset_exit(&d->bio_split);
772 773
	kvfree(d->full_dirty_stripes);
	kvfree(d->stripe_sectors_dirty);
774 775 776 777

	closure_debug_destroy(&d->cl);
}

778
static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
779
			      sector_t sectors)
780 781
{
	struct request_queue *q;
782 783
	const size_t max_stripes = min_t(size_t, INT_MAX,
					 SIZE_MAX / sizeof(atomic_t));
784
	size_t n;
785
	int idx;
786

787 788
	if (!d->stripe_size)
		d->stripe_size = 1 << 31;
789

790
	d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size);
791

792
	if (!d->nr_stripes || d->nr_stripes > max_stripes) {
793
		pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)",
794
			(unsigned int)d->nr_stripes);
795
		return -ENOMEM;
796
	}
797 798

	n = d->nr_stripes * sizeof(atomic_t);
799
	d->stripe_sectors_dirty = kvzalloc(n, GFP_KERNEL);
800 801
	if (!d->stripe_sectors_dirty)
		return -ENOMEM;
802

803
	n = BITS_TO_LONGS(d->nr_stripes) * sizeof(unsigned long);
804
	d->full_dirty_stripes = kvzalloc(n, GFP_KERNEL);
805 806 807
	if (!d->full_dirty_stripes)
		return -ENOMEM;

808 809 810 811
	idx = ida_simple_get(&bcache_device_idx, 0,
				BCACHE_DEVICE_IDX_MAX, GFP_KERNEL);
	if (idx < 0)
		return idx;
812

813
	if (bioset_init(&d->bio_split, 4, offsetof(struct bbio, bio),
814 815 816 817 818 819
			BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER))
		goto err;

	d->disk = alloc_disk(BCACHE_MINORS);
	if (!d->disk)
		goto err;
820

821
	set_capacity(d->disk, sectors);
822
	snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", idx);
823 824

	d->disk->major		= bcache_major;
825
	d->disk->first_minor	= idx_to_first_minor(idx);
826 827 828
	d->disk->fops		= &bcache_ops;
	d->disk->private_data	= d;

829 830 831 832
	q = blk_alloc_queue(GFP_KERNEL);
	if (!q)
		return -ENOMEM;

833 834 835
	blk_queue_make_request(q, NULL);
	d->disk->queue			= q;
	q->queuedata			= d;
836
	q->backing_dev_info->congested_data = d;
837 838 839 840
	q->limits.max_hw_sectors	= UINT_MAX;
	q->limits.max_sectors		= UINT_MAX;
	q->limits.max_segment_size	= UINT_MAX;
	q->limits.max_segments		= BIO_MAX_PAGES;
841
	blk_queue_max_discard_sectors(q, UINT_MAX);
842
	q->limits.discard_granularity	= 512;
843 844 845
	q->limits.io_min		= block_size;
	q->limits.logical_block_size	= block_size;
	q->limits.physical_block_size	= block_size;
846 847 848
	blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue);
	blk_queue_flag_set(QUEUE_FLAG_DISCARD, d->disk->queue);
849

850
	blk_queue_write_cache(q, true, true);
851

852
	return 0;
853 854 855 856 857

err:
	ida_simple_remove(&bcache_device_idx, idx);
	return -ENOMEM;

858 859 860 861 862 863 864 865 866 867 868 869 870 871 872
}

/* Cached device */

static void calc_cached_dev_sectors(struct cache_set *c)
{
	uint64_t sectors = 0;
	struct cached_dev *dc;

	list_for_each_entry(dc, &c->cached_devs, list)
		sectors += bdev_sectors(dc->bdev);

	c->cached_dev_sectors = sectors;
}

873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910
#define BACKING_DEV_OFFLINE_TIMEOUT 5
static int cached_dev_status_update(void *arg)
{
	struct cached_dev *dc = arg;
	struct request_queue *q;

	/*
	 * If this delayed worker is stopping outside, directly quit here.
	 * dc->io_disable might be set via sysfs interface, so check it
	 * here too.
	 */
	while (!kthread_should_stop() && !dc->io_disable) {
		q = bdev_get_queue(dc->bdev);
		if (blk_queue_dying(q))
			dc->offline_seconds++;
		else
			dc->offline_seconds = 0;

		if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) {
			pr_err("%s: device offline for %d seconds",
			       dc->backing_dev_name,
			       BACKING_DEV_OFFLINE_TIMEOUT);
			pr_err("%s: disable I/O request due to backing "
			       "device offline", dc->disk.name);
			dc->io_disable = true;
			/* let others know earlier that io_disable is true */
			smp_mb();
			bcache_device_stop(&dc->disk);
			break;
		}
		schedule_timeout_interruptible(HZ);
	}

	wait_for_kthread_stop();
	return 0;
}


911 912 913
void bch_cached_dev_run(struct cached_dev *dc)
{
	struct bcache_device *d = &dc->disk;
914
	char *buf = kmemdup_nul(dc->sb.label, SB_LABEL_SIZE, GFP_KERNEL);
915 916 917
	char *env[] = {
		"DRIVER=bcache",
		kasprintf(GFP_KERNEL, "CACHED_UUID=%pU", dc->sb.uuid),
918
		kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf ? : ""),
919
		NULL,
920
	};
921

922 923 924
	if (atomic_xchg(&dc->running, 1)) {
		kfree(env[1]);
		kfree(env[2]);
925
		kfree(buf);
926
		return;
927
	}
928 929 930 931

	if (!d->c &&
	    BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) {
		struct closure cl;
932

933 934 935 936 937 938 939 940
		closure_init_stack(&cl);

		SET_BDEV_STATE(&dc->sb, BDEV_STATE_STALE);
		bch_write_bdev_super(dc, &cl);
		closure_sync(&cl);
	}

	add_disk(d->disk);
941
	bd_link_disk_holder(dc->bdev, dc->disk.disk);
Coly Li's avatar
Coly Li committed
942 943 944 945
	/*
	 * won't show up in the uevent file, use udevadm monitor -e instead
	 * only class / kset properties are persistent
	 */
946
	kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env);
947
	kfree(env[1]);
948
	kfree(env[2]);
949
	kfree(buf);
950

951 952 953
	if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
	    sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache"))
		pr_debug("error creating sysfs link");
954 955 956 957 958 959 960 961

	dc->status_update_thread = kthread_run(cached_dev_status_update,
					       dc, "bcache_status_update");
	if (IS_ERR(dc->status_update_thread)) {
		pr_warn("failed to create bcache_status_update kthread, "
			"continue to run without monitoring backing "
			"device status");
	}
962 963
}

964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988
/*
 * If BCACHE_DEV_RATE_DW_RUNNING is set, it means routine of the delayed
 * work dc->writeback_rate_update is running. Wait until the routine
 * quits (BCACHE_DEV_RATE_DW_RUNNING is clear), then continue to
 * cancel it. If BCACHE_DEV_RATE_DW_RUNNING is not clear after time_out
 * seconds, give up waiting here and continue to cancel it too.
 */
static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
{
	int time_out = WRITEBACK_RATE_UPDATE_SECS_MAX * HZ;

	do {
		if (!test_bit(BCACHE_DEV_RATE_DW_RUNNING,
			      &dc->disk.flags))
			break;
		time_out--;
		schedule_timeout_interruptible(1);
	} while (time_out > 0);

	if (time_out == 0)
		pr_warn("give up waiting for dc->writeback_write_update to quit");

	cancel_delayed_work_sync(&dc->writeback_rate_update);
}

989 990 991 992
static void cached_dev_detach_finish(struct work_struct *w)
{
	struct cached_dev *dc = container_of(w, struct cached_dev, detach);
	struct closure cl;
993

994 995
	closure_init_stack(&cl);

996
	BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags));
997
	BUG_ON(refcount_read(&dc->count));
998 999 1000

	mutex_lock(&bch_register_lock);

1001 1002 1003
	if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
		cancel_writeback_rate_update_dwork(dc);

1004 1005 1006 1007 1008
	if (!IS_ERR_OR_NULL(dc->writeback_thread)) {
		kthread_stop(dc->writeback_thread);
		dc->writeback_thread = NULL;
	}

1009 1010 1011 1012 1013 1014
	memset(&dc->sb.set_uuid, 0, 16);
	SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE);

	bch_write_bdev_super(dc, &cl);
	closure_sync(&cl);

1015
	calc_cached_dev_sectors(dc->disk.c);
1016 1017 1018
	bcache_device_detach(&dc->disk);
	list_move(&dc->list, &uncached_devices);

1019
	clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags);
1020
	clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags);
1021

1022 1023
	mutex_unlock(&bch_register_lock);

1024
	pr_info("Caching disabled for %s", dc->backing_dev_name);
1025 1026 1027 1028 1029 1030 1031 1032 1033

	/* Drop ref we took in cached_dev_detach() */
	closure_put(&dc->disk.cl);
}

void bch_cached_dev_detach(struct cached_dev *dc)
{
	lockdep_assert_held(&bch_register_lock);

1034
	if (test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
1035 1036
		return;

1037
	if (test_and_set_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
1038 1039 1040 1041 1042 1043 1044 1045 1046
		return;

	/*
	 * Block the device from being closed and freed until we're finished
	 * detaching
	 */
	closure_get(&dc->disk.cl);

	bch_writeback_queue(dc);
1047

1048 1049 1050
	cached_dev_put(dc);
}

1051 1052
int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
			  uint8_t *set_uuid)
1053
{
1054
	uint32_t rtime = cpu_to_le32((u32)ktime_get_real_seconds());
1055
	struct uuid_entry *u;
1056
	struct cached_dev *exist_dc, *t;
1057

1058 1059
	if ((set_uuid && memcmp(set_uuid, c->sb.set_uuid, 16)) ||
	    (!set_uuid && memcmp(dc->sb.set_uuid, c->sb.set_uuid, 16)))
1060 1061 1062
		return -ENOENT;

	if (dc->disk.c) {
1063 1064
		pr_err("Can't attach %s: already attached",
		       dc->backing_dev_name);
1065 1066 1067 1068
		return -EINVAL;
	}

	if (test_bit(CACHE_SET_STOPPING, &c->flags)) {
1069 1070
		pr_err("Can't attach %s: shutting down",
		       dc->backing_dev_name);
1071 1072 1073 1074 1075
		return -EINVAL;
	}

	if (dc->sb.block_size < c->sb.block_size) {
		/* Will die */
1076
		pr_err("Couldn't attach %s: block size less than set's block size",
1077
		       dc->backing_dev_name);
1078 1079 1080
		return -EINVAL;
	}

1081 1082 1083 1084
	/* Check whether already attached */
	list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) {
		if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) {
			pr_err("Tried to attach %s but duplicate UUID already attached",
1085
				dc->backing_dev_name);
1086 1087 1088 1089 1090

			return -EINVAL;
		}
	}

1091 1092 1093 1094 1095 1096
	u = uuid_find(c, dc->sb.uuid);

	if (u &&
	    (BDEV_STATE(&dc->sb) == BDEV_STATE_STALE ||
	     BDEV_STATE(&dc->sb) == BDEV_STATE_NONE)) {
		memcpy(u->uuid, invalid_uuid, 16);
1097
		u->invalidated = cpu_to_le32((u32)ktime_get_real_seconds());
1098 1099 1100 1101 1102
		u = NULL;
	}

	if (!u) {
		if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
1103 1104
			pr_err("Couldn't find uuid for %s in set",
			       dc->backing_dev_name);
1105 1106 1107 1108 1109
			return -ENOENT;
		}

		u = uuid_find_empty(c);
		if (!u) {
1110 1111
			pr_err("Not caching %s, no room for UUID",
			       dc->backing_dev_name);
1112 1113 1114 1115
			return -EINVAL;
		}
	}

Coly Li's avatar
Coly Li committed
1116 1117 1118
	/*
	 * Deadlocks since we're called via sysfs...
	 * sysfs_remove_file(&dc->kobj, &sysfs_attach);
1119 1120
	 */

1121
	if (bch_is_zero(u->uuid, 16)) {
1122
		struct closure cl;
1123

1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148
		closure_init_stack(&cl);

		memcpy(u->uuid, dc->sb.uuid, 16);
		memcpy(u->label, dc->sb.label, SB_LABEL_SIZE);
		u->first_reg = u->last_reg = rtime;
		bch_uuid_write(c);

		memcpy(dc->sb.set_uuid, c->sb.set_uuid, 16);
		SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN);

		bch_write_bdev_super(dc, &cl);
		closure_sync(&cl);
	} else {
		u->last_reg = rtime;
		bch_uuid_write(c);
	}

	bcache_device_attach(&dc->disk, c, u - c->uuids);
	list_move(&dc->list, &c->cached_devs);
	calc_cached_dev_sectors(c);

	/*
	 * dc->c must be set before dc->count != 0 - paired with the mb in
	 * cached_dev_get()
	 */
1149
	smp_wmb();
1150
	refcount_set(&dc->count, 1);
1151

1152 1153 1154 1155
	/* Block writeback thread, but spawn it */
	down_write(&dc->writeback_lock);
	if (bch_cached_dev_writeback_start(dc)) {
		up_write(&dc->writeback_lock);
1156
		return -ENOMEM;
1157
	}
1158

1159 1160 1161 1162 1163
	if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
		atomic_set(&dc->has_dirty, 1);
		bch_writeback_queue(dc);
	}

1164 1165
	bch_sectors_dirty_init(&dc->disk);

1166
	bch_cached_dev_run(dc);
1167
	bcache_device_link(&dc->disk, c, "bdev");
1168
	atomic_inc(&c->attached_dev_nr);
1169

1170 1171 1172
	/* Allow the writeback thread to proceed */
	up_write(&dc->writeback_lock);

1173
	pr_info("Caching %s as %s on set %pU",
1174 1175
		dc->backing_dev_name,
		dc->disk.disk->disk_name,
1176 1177 1178 1179
		dc->disk.c->sb.set_uuid);
	return 0;
}

1180
/* when dc->disk.kobj released */
1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192
void bch_cached_dev_release(struct kobject *kobj)
{
	struct cached_dev *dc = container_of(kobj, struct cached_dev,
					     disk.kobj);
	kfree(dc);
	module_put(THIS_MODULE);
}

static void cached_dev_free(struct closure *cl)
{
	struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);

1193 1194 1195 1196 1197
	mutex_lock(&bch_register_lock);

	if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags))
		cancel_writeback_rate_update_dwork(dc);

1198 1199
	if (!IS_ERR_OR_NULL(dc->writeback_thread))
		kthread_stop(dc->writeback_thread);
1200 1201
	if (dc->writeback_write_wq)
		destroy_workqueue(dc->writeback_write_wq);
1202 1203
	if (!IS_ERR_OR_NULL(dc->status_update_thread))
		kthread_stop(dc->status_update_thread);
1204

1205 1206
	if (atomic_read(&dc->running))
		bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
1207 1208 1209 1210 1211
	bcache_device_free(&dc->disk);
	list_del(&dc->list);

	mutex_unlock(&bch_register_lock);

1212
	if (!IS_ERR_OR_NULL(dc->bdev))
1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224
		blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);

	wake_up(&unregister_wait);

	kobject_put(&dc->disk.kobj);
}

static void cached_dev_flush(struct closure *cl)
{
	struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
	struct bcache_device *d = &dc->disk;

1225
	mutex_lock(&bch_register_lock);
1226
	bcache_device_unlink(d);
1227 1228
	mutex_unlock(&bch_register_lock);

1229 1230 1231 1232 1233 1234
	bch_cache_accounting_destroy(&dc->accounting);
	kobject_del(&d->kobj);

	continue_at(cl, cached_dev_free, system_wq);
}

1235
static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
1236
{
1237
	int ret;
1238
	struct io *io;
1239
	struct request_queue *q = bdev_get_queue(dc->bdev);
1240 1241 1242

	__module_get(THIS_MODULE);
	INIT_LIST_HEAD(&dc->list);
1243 1244
	closure_init(&dc->disk.cl, NULL);
	set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq);
1245 1246
	kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype);
	INIT_WORK(&dc->detach, cached_dev_detach_finish);
1247
	sema_init(&dc->sb_write_mutex, 1);
1248 1249 1250
	INIT_LIST_HEAD(&dc->io_lru);
	spin_lock_init(&dc->io_lock);
	bch_cache_accounting_init(&dc->accounting, &dc->disk.cl);
1251 1252 1253 1254 1255 1256 1257 1258

	dc->sequential_cutoff		= 4 << 20;

	for (io = dc->io; io < dc->io + RECENT_IO; io++) {
		list_add(&io->lru, &dc->io_lru);
		hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
	}

1259 1260 1261 1262 1263 1264
	dc->disk.stripe_size = q->limits.io_opt >> 9;

	if (dc->disk.stripe_size)
		dc->partial_stripes_expensive =
			q->limits.raid_partial_stripes_expensive;

1265 1266
	ret = bcache_device_init(&dc->disk, block_size,
			 dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
1267 1268 1269
	if (ret)
		return ret;

1270 1271 1272
	dc->disk.disk->queue->backing_dev_info->ra_pages =
		max(dc->disk.disk->queue->backing_dev_info->ra_pages,
		    q->backing_dev_info->ra_pages);
1273

1274 1275 1276
	atomic_set(&dc->io_errors, 0);
	dc->io_disable = false;
	dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
1277 1278 1279
	/* default to auto */
	dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO;

1280 1281
	bch_cached_dev_request_init(dc);
	bch_cached_dev_writeback_init(dc);
1282 1283 1284 1285 1286
	return 0;
}

/* Cached device - bcache superblock */

1287
static int register_bdev(struct cache_sb *sb, struct page *sb_page,
1288 1289 1290 1291 1292 1293
				 struct block_device *bdev,
				 struct cached_dev *dc)
{
	const char *err = "cannot allocate memory";
	struct cache_set *c;

1294
	bdevname(bdev, dc->backing_dev_name);
1295 1296 1297 1298
	memcpy(&dc->sb, sb, sizeof(struct cache_sb));
	dc->bdev = bdev;
	dc->bdev->bd_holder = dc;

1299
	bio_init(&dc->sb_bio, dc->sb_bio.bi_inline_vecs, 1);
1300
	bio_first_bvec_all(&dc->sb_bio)->bv_page = sb_page;
1301
	get_page(sb_page);
1302

1303

1304 1305
	if (cached_dev_init(dc, sb->block_size << 9))
		goto err;
1306 1307 1308 1309 1310 1311 1312 1313

	err = "error creating kobject";
	if (kobject_add(&dc->disk.kobj, &part_to_dev(bdev->bd_part)->kobj,
			"bcache"))
		goto err;
	if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj))
		goto err;

1314
	pr_info("registered backing device %s", dc->backing_dev_name);
1315

1316
	list_add(&dc->list, &uncached_devices);
Coly Li's avatar
Coly Li committed
1317
	/* attach to a matched cache set if it exists */
1318
	list_for_each_entry(c, &bch_cache_sets, list)
1319
		bch_cached_dev_attach(dc, c, NULL);
1320 1321 1322 1323 1324

	if (BDEV_STATE(&dc->sb) == BDEV_STATE_NONE ||
	    BDEV_STATE(&dc->sb) == BDEV_STATE_STALE)
		bch_cached_dev_run(dc);

1325
	return 0;
1326
err:
1327
	pr_notice("error %s: %s", dc->backing_dev_name, err);
1328
	bcache_device_stop(&dc->disk);
1329
	return -EIO;
1330 1331 1332 1333
}

/* Flash only volumes */

1334
/* When d->kobj released */
1335 1336 1337 1338 1339 1340 1341 1342 1343 1344
void bch_flash_dev_release(struct kobject *kobj)
{
	struct bcache_device *d = container_of(kobj, struct bcache_device,
					       kobj);
	kfree(d);
}

static void flash_dev_free(struct closure *cl)
{
	struct bcache_device *d = container_of(cl, struct bcache_device, cl);
1345

1346
	mutex_lock(&bch_register_lock);
1347 1348
	atomic_long_sub(bcache_dev_sectors_dirty(d),
			&d->c->flash_dev_dirty_sectors);
1349
	bcache_device_free(d);
1350
	mutex_unlock(&bch_register_lock);
1351 1352 1353 1354 1355 1356 1357
	kobject_put(&d->kobj);
}

static void flash_dev_flush(struct closure *cl)
{
	struct bcache_device *d = container_of(cl, struct bcache_device, cl);

1358
	mutex_lock(&bch_register_lock);
1359
	bcache_device_unlink(d);
1360
	mutex_unlock(&bch_register_lock);
1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376
	kobject_del(&d->kobj);
	continue_at(cl, flash_dev_free, system_wq);
}

static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
{
	struct bcache_device *d = kzalloc(sizeof(struct bcache_device),
					  GFP_KERNEL);
	if (!d)
		return -ENOMEM;

	closure_init(&d->cl, NULL);
	set_closure_fn(&d->cl, flash_dev_flush, system_wq);

	kobject_init(&d->kobj, &bch_flash_dev_ktype);

1377
	if (bcache_device_init(d, block_bytes(c), u->sectors))
1378 1379 1380
		goto err;

	bcache_device_attach(d, c, u - c->uuids);
1381
	bch_sectors_dirty_init(d);
1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401
	bch_flash_dev_request_init(d);
	add_disk(d->disk);

	if (kobject_add(&d->kobj, &disk_to_dev(d->disk)->kobj, "bcache"))
		goto err;

	bcache_device_link(d, c, "volume");

	return 0;
err:
	kobject_put(&d->kobj);
	return -ENOMEM;
}

static int flash_devs_run(struct cache_set *c)
{
	int ret = 0;
	struct uuid_entry *u;

	for (u = c->uuids;
1402
	     u < c->uuids + c->nr_uuids && !ret;
1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416
	     u++)
		if (UUID_FLASH_ONLY(u))
			ret = flash_dev_run(c, u);

	return ret;
}

int bch_flash_dev_create(struct cache_set *c, uint64_t size)
{
	struct uuid_entry *u;

	if (test_bit(CACHE_SET_STOPPING, &c->flags))
		return -EINTR;

1417 1418 1419
	if (!test_bit(CACHE_SET_RUNNING, &c->flags))
		return -EPERM;

1420 1421 1422 1423 1424 1425 1426 1427
	u = uuid_find_empty(c);
	if (!u) {
		pr_err("Can't create volume, no room for UUID");
		return -EINVAL;
	}

	get_random_bytes(u->uuid, 16);
	memset(u->label, 0, 32);
1428
	u->first_reg = u->last_reg = cpu_to_le32((u32)ktime_get_real_seconds());
1429 1430 1431 1432 1433 1434 1435 1436 1437

	SET_UUID_FLASH_ONLY(u, 1);
	u->sectors = size >> 9;

	bch_uuid_write(c);

	return flash_dev_run(c, u);
}

1438 1439
bool bch_cached_dev_error(struct cached_dev *dc)
{
1440 1441
	struct cache_set *c;

1442 1443 1444 1445 1446 1447 1448 1449
	if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
		return false;

	dc->io_disable = true;
	/* make others know io_disable is true earlier */
	smp_mb();

	pr_err("stop %s: too many IO errors on backing device %s\n",
1450
		dc->disk.disk->disk_name, dc->backing_dev_name);
1451

1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466
	/*
	 * If the cached device is still attached to a cache set,
	 * even dc->io_disable is true and no more I/O requests
	 * accepted, cache device internal I/O (writeback scan or
	 * garbage collection) may still prevent bcache device from
	 * being stopped. So here CACHE_SET_IO_DISABLE should be
	 * set to c->flags too, to make the internal I/O to cache
	 * device rejected and stopped immediately.
	 * If c is NULL, that means the bcache device is not attached
	 * to any cache set, then no CACHE_SET_IO_DISABLE bit to set.
	 */
	c = dc->disk.c;
	if (c && test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
		pr_info("CACHE_SET_IO_DISABLE already set");

1467 1468 1469 1470
	bcache_device_stop(&dc->disk);
	return true;
}

1471 1472 1473 1474 1475 1476 1477
/* Cache set */

__printf(2, 3)
bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
{
	va_list args;

1478 1479
	if (c->on_error != ON_ERROR_PANIC &&
	    test_bit(CACHE_SET_STOPPING, &c->flags))
1480 1481
		return false;

1482
	if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
1483
		pr_info("CACHE_SET_IO_DISABLE already set");
1484

Coly Li's avatar
Coly Li committed
1485 1486 1487 1488
	/*
	 * XXX: we can be called from atomic context
	 * acquire_console_sem();
	 */
1489

1490
	pr_err("bcache: error on %pU: ", c->sb.set_uuid);
1491 1492 1493 1494 1495

	va_start(args, fmt);
	vprintk(fmt, args);
	va_end(args);

1496
	pr_err(", disabling caching\n");
1497

1498 1499 1500
	if (c->on_error == ON_ERROR_PANIC)
		panic("panic forced after error\n");

1501 1502 1503 1504
	bch_cache_set_unregister(c);
	return true;
}

1505
/* When c->kobj released */
1506 1507 1508
void bch_cache_set_release(struct kobject *kobj)
{
	struct cache_set *c = container_of(kobj, struct cache_set, kobj);
1509

1510 1511 1512 1513 1514 1515 1516 1517
	kfree(c);
	module_put(THIS_MODULE);
}

static void cache_set_free(struct closure *cl)
{
	struct cache_set *c = container_of(cl, struct cache_set, cl);
	struct cache *ca;
1518
	unsigned int i;
1519

1520
	debugfs_remove(c->debug);
1521 1522 1523 1524 1525

	bch_open_buckets_free(c);
	bch_btree_cache_free(c);
	bch_journal_free(c);

1526
	mutex_lock(&bch_register_lock);
1527
	for_each_cache(ca, c, i)
1528 1529 1530
		if (ca) {
			ca->set = NULL;
			c->cache[ca->sb.nr_this_dev] = NULL;
1531
			kobject_put(&ca->kobj);
1532
		}
1533

1534
	bch_bset_sort_state_free(&c->sort);
1535 1536
	free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));

1537 1538
	if (c->moving_gc_wq)
		destroy_workqueue(c->moving_gc_wq);
1539 1540 1541 1542
	bioset_exit(&c->bio_split);
	mempool_exit(&c->fill_iter);
	mempool_exit(&c->bio_meta);
	mempool_exit(&c->search);
1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557
	kfree(c->devices);

	list_del(&c->list);
	mutex_unlock(&bch_register_lock);

	pr_info("Cache set %pU unregistered", c->sb.set_uuid);
	wake_up(&unregister_wait);

	closure_debug_destroy(&c->cl);
	kobject_put(&c->kobj);
}

static void cache_set_flush(struct closure *cl)
{
	struct cache_set *c = container_of(cl, struct cache_set, caching);
1558
	struct cache *ca;
1559
	struct btree *b;
1560
	unsigned int i;
1561 1562 1563 1564 1565 1566

	bch_cache_accounting_destroy(&c->accounting);

	kobject_put(&c->internal);
	kobject_del(&c->kobj);

1567 1568 1569
	if (c->gc_thread)
		kthread_stop(c->gc_thread);

1570 1571 1572 1573
	if (!IS_ERR_OR_NULL(c->root))
		list_add(&c->root->list, &c->btree_cache);

	/* Should skip this if we're unregistering because of an error */
1574 1575
	list_for_each_entry(b, &c->btree_cache, list) {
		mutex_lock(&b->write_lock);
1576
		if (btree_node_dirty(b))
1577 1578 1579
			__bch_btree_node_write(b, NULL);
		mutex_unlock(&b->write_lock);
	}
1580

1581 1582 1583 1584
	for_each_cache(ca, c, i)
		if (ca->alloc_thread)
			kthread_stop(ca->alloc_thread);

1585 1586 1587 1588 1589
	if (c->journal.cur) {
		cancel_delayed_work_sync(&c->journal.work);
		/* flush last journal entry if needed */
		c->journal.work.work.func(&c->journal.work.work);
	}
Kent Overstreet's avatar
Kent Overstreet committed
1590

1591 1592 1593
	closure_return(cl);
}

1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624
/*
 * This function is only called when CACHE_SET_IO_DISABLE is set, which means
 * cache set is unregistering due to too many I/O errors. In this condition,
 * the bcache device might be stopped, it depends on stop_when_cache_set_failed
 * value and whether the broken cache has dirty data:
 *
 * dc->stop_when_cache_set_failed    dc->has_dirty   stop bcache device
 *  BCH_CACHED_STOP_AUTO               0               NO
 *  BCH_CACHED_STOP_AUTO               1               YES
 *  BCH_CACHED_DEV_STOP_ALWAYS         0               YES
 *  BCH_CACHED_DEV_STOP_ALWAYS         1               YES
 *
 * The expected behavior is, if stop_when_cache_set_failed is configured to
 * "auto" via sysfs interface, the bcache device will not be stopped if the
 * backing device is clean on the broken cache device.
 */
static void conditional_stop_bcache_device(struct cache_set *c,
					   struct bcache_device *d,
					   struct cached_dev *dc)
{
	if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) {
		pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.",
			d->disk->disk_name, c->sb.set_uuid);
		bcache_device_stop(d);
	} else if (atomic_read(&dc->has_dirty)) {
		/*
		 * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
		 * and dc->has_dirty == 1
		 */
		pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.",
			d->disk->disk_name);
1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639
		/*
		 * There might be a small time gap that cache set is
		 * released but bcache device is not. Inside this time
		 * gap, regular I/O requests will directly go into
		 * backing device as no cache set attached to. This
		 * behavior may also introduce potential inconsistence
		 * data in writeback mode while cache is dirty.
		 * Therefore before calling bcache_device_stop() due
		 * to a broken cache device, dc->io_disable should be
		 * explicitly set to true.
		 */
		dc->io_disable = true;
		/* make others know io_disable is true earlier */
		smp_mb();
		bcache_device_stop(d);
1640 1641 1642 1643 1644 1645 1646 1647 1648 1649
	} else {
		/*
		 * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
		 * and dc->has_dirty == 0
		 */
		pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.",
			d->disk->disk_name);
	}
}

1650 1651 1652
static void __cache_set_unregister(struct closure *cl)
{
	struct cache_set *c = container_of(cl, struct cache_set, caching);
Kent Overstreet's avatar
Kent Overstreet committed
1653
	struct cached_dev *dc;
1654
	struct bcache_device *d;
1655 1656 1657 1658
	size_t i;

	mutex_lock(&bch_register_lock);

1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671
	for (i = 0; i < c->devices_max_used; i++) {
		d = c->devices[i];
		if (!d)
			continue;

		if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
		    test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
			dc = container_of(d, struct cached_dev, disk);
			bch_cached_dev_detach(dc);
			if (test_bit(CACHE_SET_IO_DISABLE, &c->flags))
				conditional_stop_bcache_device(c, d, dc);
		} else {
			bcache_device_stop(d);
Kent Overstreet's avatar
Kent Overstreet committed
1672
		}
1673
	}
1674 1675 1676 1677 1678 1679 1680 1681 1682

	mutex_unlock(&bch_register_lock);

	continue_at(cl, cache_set_flush, system_wq);
}

void bch_cache_set_stop(struct cache_set *c)
{
	if (!test_and_set_bit(CACHE_SET_STOPPING, &c->flags))
1683
		/* closure_fn set to __cache_set_unregister() */
1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699
		closure_queue(&c->caching);
}

void bch_cache_set_unregister(struct cache_set *c)
{
	set_bit(CACHE_SET_UNREGISTERING, &c->flags);
	bch_cache_set_stop(c);
}

#define alloc_bucket_pages(gfp, c)			\
	((void *) __get_free_pages(__GFP_ZERO|gfp, ilog2(bucket_pages(c))))

struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
{
	int iter_size;
	struct cache_set *c = kzalloc(sizeof(struct cache_set), GFP_KERNEL);
1700

1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727
	if (!c)
		return NULL;

	__module_get(THIS_MODULE);
	closure_init(&c->cl, NULL);
	set_closure_fn(&c->cl, cache_set_free, system_wq);

	closure_init(&c->caching, &c->cl);
	set_closure_fn(&c->caching, __cache_set_unregister, system_wq);

	/* Maybe create continue_at_noreturn() and use it here? */
	closure_set_stopped(&c->cl);
	closure_put(&c->cl);

	kobject_init(&c->kobj, &bch_cache_set_ktype);
	kobject_init(&c->internal, &bch_cache_set_internal_ktype);

	bch_cache_accounting_init(&c->accounting, &c->cl);

	memcpy(c->sb.set_uuid, sb->set_uuid, 16);
	c->sb.block_size	= sb->block_size;
	c->sb.bucket_size	= sb->bucket_size;
	c->sb.nr_in_set		= sb->nr_in_set;
	c->sb.last_mount	= sb->last_mount;
	c->bucket_bits		= ilog2(sb->bucket_size);
	c->block_bits		= ilog2(sb->block_size);
	c->nr_uuids		= bucket_bytes(c) / sizeof(struct uuid_entry);
1728
	c->devices_max_used	= 0;
1729
	atomic_set(&c->attached_dev_nr, 0);
1730
	c->btree_pages		= bucket_pages(c);
1731 1732 1733 1734
	if (c->btree_pages > BTREE_MAX_PAGES)
		c->btree_pages = max_t(int, c->btree_pages / 4,
				       BTREE_MAX_PAGES);

1735
	sema_init(&c->sb_write_mutex, 1);
1736
	mutex_init(&c->bucket_lock);
1737
	init_waitqueue_head(&c->btree_cache_wait);
1738
	init_waitqueue_head(&c->bucket_wait);
1739
	init_waitqueue_head(&c->gc_wait);
1740
	sema_init(&c->uuid_write_mutex, 1);
1741 1742 1743 1744

	spin_lock_init(&c->btree_gc_time.lock);
	spin_lock_init(&c->btree_split_time.lock);
	spin_lock_init(&c->btree_read_time.lock);
1745

1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757
	bch_moving_init_cache_set(c);

	INIT_LIST_HEAD(&c->list);
	INIT_LIST_HEAD(&c->cached_devs);
	INIT_LIST_HEAD(&c->btree_cache);
	INIT_LIST_HEAD(&c->btree_cache_freeable);
	INIT_LIST_HEAD(&c->btree_cache_freed);
	INIT_LIST_HEAD(&c->data_buckets);

	iter_size = (sb->bucket_size / sb->block_size + 1) *
		sizeof(struct btree_iter_set);

1758
	if (!(c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL)) ||
1759 1760
	    mempool_init_slab_pool(&c->search, 32, bch_search_cache) ||
	    mempool_init_kmalloc_pool(&c->bio_meta, 2,
1761 1762
				sizeof(struct bbio) + sizeof(struct bio_vec) *
				bucket_pages(c)) ||
1763 1764 1765
	    mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
	    bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio),
			BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) ||
1766
	    !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
1767 1768
	    !(c->moving_gc_wq = alloc_workqueue("bcache_gc",
						WQ_MEM_RECLAIM, 0)) ||
1769 1770
	    bch_journal_alloc(c) ||
	    bch_btree_cache_alloc(c) ||
1771 1772
	    bch_open_buckets_alloc(c) ||
	    bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages)))
1773 1774 1775 1776
		goto err;

	c->congested_read_threshold_us	= 2000;
	c->congested_write_threshold_us	= 20000;
1777
	c->error_limit	= DEFAULT_IO_ERROR_LIMIT;
1778
	WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
1779 1780 1781 1782 1783 1784 1785

	return c;
err:
	bch_cache_set_unregister(c);
	return NULL;
}

1786
static int run_cache_set(struct cache_set *c)
1787 1788 1789 1790
{
	const char *err = "cannot allocate memory";
	struct cached_dev *dc, *t;
	struct cache *ca;
1791
	struct closure cl;
1792
	unsigned int i;
1793 1794
	LIST_HEAD(journal);
	struct journal_replay *l;
1795

1796
	closure_init_stack(&cl);
1797 1798 1799

	for_each_cache(ca, c, i)
		c->nbuckets += ca->sb.nbuckets;
1800
	set_gc_sectors(c);
1801 1802 1803 1804 1805 1806 1807

	if (CACHE_SYNC(&c->sb)) {
		LIST_HEAD(journal);
		struct bkey *k;
		struct jset *j;

		err = "cannot allocate memory for journal";
1808
		if (bch_journal_read(c, &journal))
1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831
			goto err;

		pr_debug("btree_journal_read() done");

		err = "no journal entries found";
		if (list_empty(&journal))
			goto err;

		j = &list_entry(journal.prev, struct journal_replay, list)->j;

		err = "IO error reading priorities";
		for_each_cache(ca, c, i)
			prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev]);

		/*
		 * If prio_read() fails it'll call cache_set_error and we'll
		 * tear everything down right away, but if we perhaps checked
		 * sooner we could avoid journal replay.
		 */

		k = &j->btree_root;

		err = "bad btree root";
1832
		if (__bch_btree_ptr_invalid(c, k))
1833 1834 1835
			goto err;

		err = "error reading btree root";
1836 1837 1838
		c->root = bch_btree_node_get(c, NULL, k,
					     j->btree_level,
					     true, NULL);
1839 1840 1841 1842 1843 1844
		if (IS_ERR_OR_NULL(c->root))
			goto err;

		list_del_init(&c->root->list);
		rw_unlock(true, c->root);

1845
		err = uuid_read(c, j, &cl);
1846 1847 1848 1849
		if (err)
			goto err;

		err = "error in recovery";
1850
		if (bch_btree_check(c))
1851 1852 1853
			goto err;

		bch_journal_mark(c, &journal);
1854
		bch_initial_gc_finish(c);
1855 1856 1857 1858 1859 1860 1861 1862 1863
		pr_debug("btree_check() done");

		/*
		 * bcache_journal_next() can't happen sooner, or
		 * btree_gc_finish() will give spurious errors about last_gc >
		 * gc_gen - this is a hack but oh well.
		 */
		bch_journal_next(&c->journal);

1864
		err = "error starting allocator thread";
1865
		for_each_cache(ca, c, i)
1866 1867
			if (bch_cache_allocator_start(ca))
				goto err;
1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881

		/*
		 * First place it's safe to allocate: btree_check() and
		 * btree_gc_finish() have to run before we have buckets to
		 * allocate, and bch_bucket_alloc_set() might cause a journal
		 * entry to be written so bcache_journal_next() has to be called
		 * first.
		 *
		 * If the uuids were in the old format we have to rewrite them
		 * before the next journal entry is written:
		 */
		if (j->version < BCACHE_JSET_VERSION_UUID)
			__uuid_write(c);

1882 1883 1884
		err = "bcache: replay journal failed";
		if (bch_journal_replay(c, &journal))
			goto err;
1885 1886 1887 1888
	} else {
		pr_notice("invalidating existing data");

		for_each_cache(ca, c, i) {
1889
			unsigned int j;
1890 1891 1892 1893 1894 1895 1896 1897

			ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7,
					      2, SB_JOURNAL_BUCKETS);

			for (j = 0; j < ca->sb.keys; j++)
				ca->sb.d[j] = ca->sb.first_bucket + j;
		}

1898
		bch_initial_gc_finish(c);
1899

1900
		err = "error starting allocator thread";
1901
		for_each_cache(ca, c, i)
1902 1903
			if (bch_cache_allocator_start(ca))
				goto err;
1904 1905 1906 1907 1908 1909 1910 1911

		mutex_lock(&c->bucket_lock);
		for_each_cache(ca, c, i)
			bch_prio_write(ca);
		mutex_unlock(&c->bucket_lock);

		err = "cannot allocate new UUID bucket";
		if (__uuid_write(c))
1912
			goto err;
1913 1914

		err = "cannot allocate new btree root";
1915
		c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL);
1916
		if (IS_ERR_OR_NULL(c->root))
1917
			goto err;
1918

1919
		mutex_lock(&c->root->write_lock);
1920
		bkey_copy_key(&c->root->key, &MAX_KEY);
1921
		bch_btree_node_write(c->root, &cl);
1922
		mutex_unlock(&c->root->write_lock);
1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934

		bch_btree_set_root(c->root);
		rw_unlock(true, c->root);

		/*
		 * We don't want to write the first journal entry until
		 * everything is set up - fortunately journal entries won't be
		 * written until the SET_CACHE_SYNC() here:
		 */
		SET_CACHE_SYNC(&c->sb, true);

		bch_journal_next(&c->journal);
1935
		bch_journal_meta(c, &cl);
1936 1937
	}

1938 1939 1940 1941
	err = "error starting gc thread";
	if (bch_gc_thread_start(c))
		goto err;

1942
	closure_sync(&cl);
1943
	c->sb.last_mount = (u32)ktime_get_real_seconds();
1944 1945 1946
	bcache_write_super(c);

	list_for_each_entry_safe(dc, t, &uncached_devices, list)
1947
		bch_cached_dev_attach(dc, c, NULL);
1948 1949 1950

	flash_devs_run(c);

1951
	set_bit(CACHE_SET_RUNNING, &c->flags);
1952
	return 0;
1953
err:
1954 1955 1956 1957 1958 1959
	while (!list_empty(&journal)) {
		l = list_first_entry(&journal, struct journal_replay, list);
		list_del(&l->list);
		kfree(l);
	}

1960
	closure_sync(&cl);
1961
	/* XXX: test this, it's broken */
1962
	bch_cache_set_error(c, "%s", err);
1963 1964

	return -EIO;
1965 1966 1967 1968 1969
}

static bool can_attach_cache(struct cache *ca, struct cache_set *c)
{
	return ca->sb.block_size	== c->sb.block_size &&
1970
		ca->sb.bucket_size	== c->sb.bucket_size &&
1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022
		ca->sb.nr_in_set	== c->sb.nr_in_set;
}

static const char *register_cache_set(struct cache *ca)
{
	char buf[12];
	const char *err = "cannot allocate memory";
	struct cache_set *c;

	list_for_each_entry(c, &bch_cache_sets, list)
		if (!memcmp(c->sb.set_uuid, ca->sb.set_uuid, 16)) {
			if (c->cache[ca->sb.nr_this_dev])
				return "duplicate cache set member";

			if (!can_attach_cache(ca, c))
				return "cache sb does not match set";

			if (!CACHE_SYNC(&ca->sb))
				SET_CACHE_SYNC(&c->sb, false);

			goto found;
		}

	c = bch_cache_set_alloc(&ca->sb);
	if (!c)
		return err;

	err = "error creating kobject";
	if (kobject_add(&c->kobj, bcache_kobj, "%pU", c->sb.set_uuid) ||
	    kobject_add(&c->internal, &c->kobj, "internal"))
		goto err;

	if (bch_cache_accounting_add_kobjs(&c->accounting, &c->kobj))
		goto err;

	bch_debug_init_cache_set(c);

	list_add(&c->list, &bch_cache_sets);
found:
	sprintf(buf, "cache%i", ca->sb.nr_this_dev);
	if (sysfs_create_link(&ca->kobj, &c->kobj, "set") ||
	    sysfs_create_link(&c->kobj, &ca->kobj, buf))
		goto err;

	if (ca->sb.seq > c->sb.seq) {
		c->sb.version		= ca->sb.version;
		memcpy(c->sb.set_uuid, ca->sb.set_uuid, 16);
		c->sb.flags             = ca->sb.flags;
		c->sb.seq		= ca->sb.seq;
		pr_debug("set version = %llu", c->sb.version);
	}

2023
	kobject_get(&ca->kobj);
2024 2025 2026 2027
	ca->set = c;
	ca->set->cache[ca->sb.nr_this_dev] = ca;
	c->cache_by_alloc[c->caches_loaded++] = ca;

2028 2029 2030 2031 2032
	if (c->caches_loaded == c->sb.nr_in_set) {
		err = "failed to run cache set";
		if (run_cache_set(c) < 0)
			goto err;
	}
2033 2034 2035 2036 2037 2038 2039 2040 2041

	return NULL;
err:
	bch_cache_set_unregister(c);
	return err;
}

/* Cache device */

2042
/* When ca->kobj released */
2043 2044 2045
void bch_cache_release(struct kobject *kobj)
{
	struct cache *ca = container_of(kobj, struct cache, kobj);
2046
	unsigned int i;
2047

2048 2049
	if (ca->set) {
		BUG_ON(ca->set->cache[ca->sb.nr_this_dev] != ca);
2050
		ca->set->cache[ca->sb.nr_this_dev] = NULL;
2051
	}
2052 2053 2054 2055 2056 2057 2058

	free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca)));
	kfree(ca->prio_buckets);
	vfree(ca->buckets);

	free_heap(&ca->heap);
	free_fifo(&ca->free_inc);
2059 2060 2061

	for (i = 0; i < RESERVE_NR; i++)
		free_fifo(&ca->free[i]);
2062 2063

	if (ca->sb_bio.bi_inline_vecs[0].bv_page)
2064
		put_page(bio_first_page_all(&ca->sb_bio));
2065

2066
	if (!IS_ERR_OR_NULL(ca->bdev))
2067 2068 2069 2070 2071 2072
		blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);

	kfree(ca);
	module_put(THIS_MODULE);
}

2073
static int cache_alloc(struct cache *ca)
2074 2075
{
	size_t free;
2076
	size_t btree_buckets;
2077
	struct bucket *b;
2078 2079
	int ret = -ENOMEM;
	const char *err = NULL;
2080 2081 2082 2083

	__module_get(THIS_MODULE);
	kobject_init(&ca->kobj, &bch_cache_ktype);

2084
	bio_init(&ca->journal.bio, ca->journal.bio.bi_inline_vecs, 8);
2085

2086 2087 2088 2089 2090 2091 2092 2093 2094 2095
	/*
	 * when ca->sb.njournal_buckets is not zero, journal exists,
	 * and in bch_journal_replay(), tree node may split,
	 * so bucket of RESERVE_BTREE type is needed,
	 * the worst situation is all journal buckets are valid journal,
	 * and all the keys need to replay,
	 * so the number of  RESERVE_BTREE type buckets should be as much
	 * as journal buckets
	 */
	btree_buckets = ca->sb.njournal_buckets ?: 8;
2096
	free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
2097 2098 2099 2100 2101
	if (!free) {
		ret = -EPERM;
		err = "ca->sb.nbuckets is too small";
		goto err_free;
	}
2102

2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154
	if (!init_fifo(&ca->free[RESERVE_BTREE], btree_buckets,
						GFP_KERNEL)) {
		err = "ca->free[RESERVE_BTREE] alloc failed";
		goto err_btree_alloc;
	}

	if (!init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca),
							GFP_KERNEL)) {
		err = "ca->free[RESERVE_PRIO] alloc failed";
		goto err_prio_alloc;
	}

	if (!init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL)) {
		err = "ca->free[RESERVE_MOVINGGC] alloc failed";
		goto err_movinggc_alloc;
	}

	if (!init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL)) {
		err = "ca->free[RESERVE_NONE] alloc failed";
		goto err_none_alloc;
	}

	if (!init_fifo(&ca->free_inc, free << 2, GFP_KERNEL)) {
		err = "ca->free_inc alloc failed";
		goto err_free_inc_alloc;
	}

	if (!init_heap(&ca->heap, free << 3, GFP_KERNEL)) {
		err = "ca->heap alloc failed";
		goto err_heap_alloc;
	}

	ca->buckets = vzalloc(array_size(sizeof(struct bucket),
			      ca->sb.nbuckets));
	if (!ca->buckets) {
		err = "ca->buckets alloc failed";
		goto err_buckets_alloc;
	}

	ca->prio_buckets = kzalloc(array3_size(sizeof(uint64_t),
				   prio_buckets(ca), 2),
				   GFP_KERNEL);
	if (!ca->prio_buckets) {
		err = "ca->prio_buckets alloc failed";
		goto err_prio_buckets_alloc;
	}

	ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca);
	if (!ca->disk_buckets) {
		err = "ca->disk_buckets alloc failed";
		goto err_disk_buckets_alloc;
	}
2155 2156 2157 2158 2159 2160

	ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca);

	for_each_bucket(b, ca)
		atomic_set(&b->pin, 0);
	return 0;
2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178

err_disk_buckets_alloc:
	kfree(ca->prio_buckets);
err_prio_buckets_alloc:
	vfree(ca->buckets);
err_buckets_alloc:
	free_heap(&ca->heap);
err_heap_alloc:
	free_fifo(&ca->free_inc);
err_free_inc_alloc:
	free_fifo(&ca->free[RESERVE_NONE]);
err_none_alloc:
	free_fifo(&ca->free[RESERVE_MOVINGGC]);
err_movinggc_alloc:
	free_fifo(&ca->free[RESERVE_PRIO]);
err_prio_alloc:
	free_fifo(&ca->free[RESERVE_BTREE]);
err_btree_alloc:
2179
err_free:
2180 2181 2182 2183
	module_put(THIS_MODULE);
	if (err)
		pr_notice("error %s: %s", ca->cache_dev_name, err);
	return ret;
2184 2185
}

2186
static int register_cache(struct cache_sb *sb, struct page *sb_page,
2187
				struct block_device *bdev, struct cache *ca)
2188
{
2189
	const char *err = NULL; /* must be set for any error case */
2190
	int ret = 0;
2191

2192
	bdevname(bdev, ca->cache_dev_name);
2193
	memcpy(&ca->sb, sb, sizeof(struct cache_sb));
2194 2195 2196
	ca->bdev = bdev;
	ca->bdev->bd_holder = ca;

2197
	bio_init(&ca->sb_bio, ca->sb_bio.bi_inline_vecs, 1);
2198
	bio_first_bvec_all(&ca->sb_bio)->bv_page = sb_page;
2199 2200
	get_page(sb_page);

2201
	if (blk_queue_discard(bdev_get_queue(bdev)))
2202 2203
		ca->discard = CACHE_DISCARD(&ca->sb);

2204
	ret = cache_alloc(ca);
2205
	if (ret != 0) {
2206 2207 2208 2209 2210 2211
		/*
		 * If we failed here, it means ca->kobj is not initialized yet,
		 * kobject_put() won't be called and there is no chance to
		 * call blkdev_put() to bdev in bch_cache_release(). So we
		 * explicitly call blkdev_put() here.
		 */
2212
		blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
2213 2214
		if (ret == -ENOMEM)
			err = "cache_alloc(): -ENOMEM";
2215 2216
		else if (ret == -EPERM)
			err = "cache_alloc(): cache device is too small";
2217 2218
		else
			err = "cache_alloc(): unknown error";
2219
		goto err;
2220
	}
2221

2222 2223 2224
	if (kobject_add(&ca->kobj,
			&part_to_dev(bdev->bd_part)->kobj,
			"bcache")) {
2225 2226 2227 2228
		err = "error calling kobject_add";
		ret = -ENOMEM;
		goto out;
	}
2229

2230
	mutex_lock(&bch_register_lock);
2231
	err = register_cache_set(ca);
2232 2233
	mutex_unlock(&bch_register_lock);

2234 2235 2236 2237
	if (err) {
		ret = -ENODEV;
		goto out;
	}
2238

2239
	pr_info("registered cache device %s", ca->cache_dev_name);
2240

2241 2242
out:
	kobject_put(&ca->kobj);
2243

2244
err:
2245
	if (err)
2246
		pr_notice("error %s: %s", ca->cache_dev_name, err);
2247 2248

	return ret;
2249 2250 2251 2252
}

/* Global interfaces/init */

2253 2254
static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
			       const char *buffer, size_t size);
2255 2256 2257 2258

kobj_attribute_write(register,		register_bcache);
kobj_attribute_write(register_quiet,	register_bcache);

2259 2260
static bool bch_is_open_backing(struct block_device *bdev)
{
2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273
	struct cache_set *c, *tc;
	struct cached_dev *dc, *t;

	list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
		list_for_each_entry_safe(dc, t, &c->cached_devs, list)
			if (dc->bdev == bdev)
				return true;
	list_for_each_entry_safe(dc, t, &uncached_devices, list)
		if (dc->bdev == bdev)
			return true;
	return false;
}

2274 2275
static bool bch_is_open_cache(struct block_device *bdev)
{
2276 2277
	struct cache_set *c, *tc;
	struct cache *ca;
2278
	unsigned int i;
2279 2280 2281 2282 2283 2284 2285 2286

	list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
		for_each_cache(ca, c, i)
			if (ca->bdev == bdev)
				return true;
	return false;
}

2287 2288
static bool bch_is_open(struct block_device *bdev)
{
2289 2290 2291
	return bch_is_open_cache(bdev) || bch_is_open_backing(bdev);
}

2292 2293 2294
static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
			       const char *buffer, size_t size)
{
2295
	ssize_t ret = -EINVAL;
2296 2297 2298 2299 2300 2301 2302 2303 2304
	const char *err = "cannot allocate memory";
	char *path = NULL;
	struct cache_sb *sb = NULL;
	struct block_device *bdev = NULL;
	struct page *sb_page = NULL;

	if (!try_module_get(THIS_MODULE))
		return -EBUSY;

2305 2306 2307 2308 2309 2310
	path = kstrndup(buffer, size, GFP_KERNEL);
	if (!path)
		goto err;

	sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL);
	if (!sb)
2311 2312 2313 2314 2315 2316
		goto err;

	err = "failed to open device";
	bdev = blkdev_get_by_path(strim(path),
				  FMODE_READ|FMODE_WRITE|FMODE_EXCL,
				  sb);
2317
	if (IS_ERR(bdev)) {
2318 2319
		if (bdev == ERR_PTR(-EBUSY)) {
			bdev = lookup_bdev(strim(path));
2320
			mutex_lock(&bch_register_lock);
2321 2322 2323 2324
			if (!IS_ERR(bdev) && bch_is_open(bdev))
				err = "device already registered";
			else
				err = "device busy";
2325
			mutex_unlock(&bch_register_lock);
2326 2327
			if (!IS_ERR(bdev))
				bdput(bdev);
2328
			if (attr == &ksysfs_register_quiet)
2329
				goto quiet_out;
2330
		}
2331
		goto err;
2332 2333 2334 2335 2336
	}

	err = "failed to set blocksize";
	if (set_blocksize(bdev, 4096))
		goto err_close;
2337 2338 2339 2340 2341

	err = read_super(sb, bdev, &sb_page);
	if (err)
		goto err_close;

2342
	err = "failed to register device";
2343
	if (SB_IS_BDEV(sb)) {
2344
		struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
2345

2346 2347
		if (!dc)
			goto err_close;
2348

2349
		mutex_lock(&bch_register_lock);
2350
		ret = register_bdev(sb, sb_page, bdev, dc);
2351
		mutex_unlock(&bch_register_lock);
2352
		/* blkdev_put() will be called in cached_dev_free() */
2353 2354
		if (ret < 0)
			goto err;
2355 2356
	} else {
		struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
2357

2358 2359
		if (!ca)
			goto err_close;
2360

2361
		/* blkdev_put() will be called in bch_cache_release() */
2362
		if (register_cache(sb, sb_page, bdev, ca) != 0)
2363
			goto err;
2364
	}
2365 2366
quiet_out:
	ret = size;
2367 2368
out:
	if (sb_page)
2369 2370 2371 2372 2373
		put_page(sb_page);
	kfree(sb);
	kfree(path);
	module_put(THIS_MODULE);
	return ret;
2374 2375 2376 2377

err_close:
	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
err:
2378
	pr_info("error %s: %s", path, err);
2379
	goto out;
2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407
}

static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
{
	if (code == SYS_DOWN ||
	    code == SYS_HALT ||
	    code == SYS_POWER_OFF) {
		DEFINE_WAIT(wait);
		unsigned long start = jiffies;
		bool stopped = false;

		struct cache_set *c, *tc;
		struct cached_dev *dc, *tdc;

		mutex_lock(&bch_register_lock);

		if (list_empty(&bch_cache_sets) &&
		    list_empty(&uncached_devices))
			goto out;

		pr_info("Stopping all devices:");

		list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
			bch_cache_set_stop(c);

		list_for_each_entry_safe(dc, tdc, &uncached_devices, list)
			bcache_device_stop(&dc->disk);

Coly Li's avatar
Coly Li committed
2408 2409 2410 2411 2412 2413 2414 2415
		mutex_unlock(&bch_register_lock);

		/*
		 * Give an early chance for other kthreads and
		 * kworkers to stop themselves
		 */
		schedule();

2416 2417
		/* What's a condition variable? */
		while (1) {
Coly Li's avatar
Coly Li committed
2418
			long timeout = start + 10 * HZ - jiffies;
2419

Coly Li's avatar
Coly Li committed
2420
			mutex_lock(&bch_register_lock);
2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459
			stopped = list_empty(&bch_cache_sets) &&
				list_empty(&uncached_devices);

			if (timeout < 0 || stopped)
				break;

			prepare_to_wait(&unregister_wait, &wait,
					TASK_UNINTERRUPTIBLE);

			mutex_unlock(&bch_register_lock);
			schedule_timeout(timeout);
		}

		finish_wait(&unregister_wait, &wait);

		if (stopped)
			pr_info("All devices stopped");
		else
			pr_notice("Timeout waiting for devices to be closed");
out:
		mutex_unlock(&bch_register_lock);
	}

	return NOTIFY_DONE;
}

static struct notifier_block reboot = {
	.notifier_call	= bcache_reboot,
	.priority	= INT_MAX, /* before any real devices */
};

static void bcache_exit(void)
{
	bch_debug_exit();
	bch_request_exit();
	if (bcache_kobj)
		kobject_put(bcache_kobj);
	if (bcache_wq)
		destroy_workqueue(bcache_wq);
2460 2461 2462
	if (bch_journal_wq)
		destroy_workqueue(bch_journal_wq);

2463 2464
	if (bcache_major)
		unregister_blkdev(bcache_major, "bcache");
2465
	unregister_reboot_notifier(&reboot);
2466
	mutex_destroy(&bch_register_lock);
2467 2468
}

2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494
/* Check and fixup module parameters */
static void check_module_parameters(void)
{
	if (bch_cutoff_writeback_sync == 0)
		bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC;
	else if (bch_cutoff_writeback_sync > CUTOFF_WRITEBACK_SYNC_MAX) {
		pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u",
			bch_cutoff_writeback_sync, CUTOFF_WRITEBACK_SYNC_MAX);
		bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC_MAX;
	}

	if (bch_cutoff_writeback == 0)
		bch_cutoff_writeback = CUTOFF_WRITEBACK;
	else if (bch_cutoff_writeback > CUTOFF_WRITEBACK_MAX) {
		pr_warn("set bch_cutoff_writeback (%u) to max value %u",
			bch_cutoff_writeback, CUTOFF_WRITEBACK_MAX);
		bch_cutoff_writeback = CUTOFF_WRITEBACK_MAX;
	}

	if (bch_cutoff_writeback > bch_cutoff_writeback_sync) {
		pr_warn("set bch_cutoff_writeback (%u) to %u",
			bch_cutoff_writeback, bch_cutoff_writeback_sync);
		bch_cutoff_writeback = bch_cutoff_writeback_sync;
	}
}

2495 2496 2497 2498 2499 2500 2501 2502
static int __init bcache_init(void)
{
	static const struct attribute *files[] = {
		&ksysfs_register.attr,
		&ksysfs_register_quiet.attr,
		NULL
	};

2503 2504
	check_module_parameters();

2505 2506 2507 2508 2509
	mutex_init(&bch_register_lock);
	init_waitqueue_head(&unregister_wait);
	register_reboot_notifier(&reboot);

	bcache_major = register_blkdev(0, "bcache");
2510 2511
	if (bcache_major < 0) {
		unregister_reboot_notifier(&reboot);
2512
		mutex_destroy(&bch_register_lock);
2513
		return bcache_major;
2514
	}
2515

2516 2517 2518 2519
	bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0);
	if (!bcache_wq)
		goto err;

2520 2521 2522 2523
	bch_journal_wq = alloc_workqueue("bch_journal", WQ_MEM_RECLAIM, 0);
	if (!bch_journal_wq)
		goto err;

2524 2525 2526 2527 2528
	bcache_kobj = kobject_create_and_add("bcache", fs_kobj);
	if (!bcache_kobj)
		goto err;

	if (bch_request_init() ||
2529
	    sysfs_create_files(bcache_kobj, files))
2530 2531
		goto err;

2532
	bch_debug_init();
2533 2534
	closure_debug_init();

2535 2536 2537 2538 2539 2540
	return 0;
err:
	bcache_exit();
	return -ENOMEM;
}

2541 2542 2543
/*
 * Module hooks
 */
2544 2545
module_exit(bcache_exit);
module_init(bcache_init);
2546

2547 2548 2549 2550 2551 2552
module_param(bch_cutoff_writeback, uint, 0);
MODULE_PARM_DESC(bch_cutoff_writeback, "threshold to cutoff writeback");

module_param(bch_cutoff_writeback_sync, uint, 0);
MODULE_PARM_DESC(bch_cutoff_writeback_sync, "hard threshold to cutoff writeback");

2553 2554 2555
MODULE_DESCRIPTION("Bcache: a Linux block layer cache");
MODULE_AUTHOR("Kent Overstreet <kent.overstreet@gmail.com>");
MODULE_LICENSE("GPL");