Commit 03891f9c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'dm-4.5-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

 - The most significant set of changes this cycle is the Forward Error
   Correction (FEC) support that has been added to the DM verity target.

   Google uses DM verity on all Android devices and it is believed that
   this FEC support will enable DM verity to recover from storage
   failures seen since DM verity was first deployed as part of Android.

 - A stable fix for a race in the destruction of DM thin pool's
   workqueue

 - A stable fix for hung IO if a DM snapshot copy hit an error

 - A few small cleanups in DM core and DM persistent data.

 - A couple DM thinp range discard improvements (address atomicity of
   finding a range and the efficiency of discarding a partially mapped
   thin device)

 - Add ability to debug DM bufio leaks by recording stack trace when a
   buffer is allocated.  Upon detected leak the recorded stack is
   dumped.

* tag 'dm-4.5-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm snapshot: fix hung bios when copy error occurs
  dm thin: bump thin and thin-pool target versions
  dm thin: fix race condition when destroying thin pool workqueue
  dm space map metadata: remove unused variable in brb_pop()
  dm verity: add ignore_zero_blocks feature
  dm verity: add support for forward error correction
  dm verity: factor out verity_for_bv_block()
  dm verity: factor out structures and functions useful to separate object
  dm verity: move dm-verity.c to dm-verity-target.c
  dm verity: separate function for parsing opt args
  dm verity: clean up duplicate hashing code
  dm btree: factor out need_insert() helper
  dm bufio: use BUG_ON instead of conditional call to BUG
  dm bufio: store stacktrace in buffers to help find buffer leaks
  dm bufio: return NULL to improve code clarity
  dm block manager: cleanup code that prints stacktrace
  dm: don't save and restore bi_private
  dm thin metadata: make dm_thin_find_mapped_range() atomic
  dm thin metadata: speed up discard of partially mapped volumes
parents 47c62e4b 385277bf
......@@ -18,11 +18,11 @@ Construction Parameters
0 is the original format used in the Chromium OS.
The salt is appended when hashing, digests are stored continuously and
the rest of the block is padded with zeros.
the rest of the block is padded with zeroes.
1 is the current format that should be used for new devices.
The salt is prepended when hashing and each digest is
padded with zeros to the power of two.
padded with zeroes to the power of two.
<dev>
This is the device containing data, the integrity of which needs to be
......@@ -79,6 +79,37 @@ restart_on_corruption
not compatible with ignore_corruption and requires user space support to
avoid restart loops.
ignore_zero_blocks
Do not verify blocks that are expected to contain zeroes and always return
zeroes instead. This may be useful if the partition contains unused blocks
that are not guaranteed to contain zeroes.
use_fec_from_device <fec_dev>
Use forward error correction (FEC) to recover from corruption if hash
verification fails. Use encoding data from the specified device. This
may be the same device where data and hash blocks reside, in which case
fec_start must be outside data and hash areas.
If the encoding data covers additional metadata, it must be accessible
on the hash device after the hash blocks.
Note: block sizes for data and hash devices must match. Also, if the
verity <dev> is encrypted the <fec_dev> should be too.
fec_roots <num>
Number of generator roots. This equals to the number of parity bytes in
the encoding data. For example, in RS(M, N) encoding, the number of roots
is M-N.
fec_blocks <num>
The number of encoding data blocks on the FEC device. The block size for
the FEC device is <data_block_size>.
fec_start <offset>
This is the offset, in <data_block_size> blocks, from the start of the
FEC device to the beginning of the encoding data.
Theory of operation
===================
......@@ -98,6 +129,11 @@ per-block basis. This allows for a lightweight hash computation on first read
into the page cache. Block hashes are stored linearly, aligned to the nearest
block size.
If forward error correction (FEC) support is enabled any recovery of
corrupted data will be verified using the cryptographic hash of the
corresponding data. This is why combining error correction with
integrity checking is essential.
Hash Tree
---------
......
......@@ -240,6 +240,15 @@ config DM_BUFIO
as a cache, holding recently-read blocks in memory and performing
delayed writes.
config DM_DEBUG_BLOCK_STACK_TRACING
bool "Keep stack trace of persistent data block lock holders"
depends on STACKTRACE_SUPPORT && DM_BUFIO
select STACKTRACE
---help---
Enable this for messages that may help debug problems with the
block manager locking used by thin provisioning and caching.
If unsure, say N.
config DM_BIO_PRISON
tristate
depends on BLK_DEV_DM
......@@ -458,6 +467,18 @@ config DM_VERITY
If unsure, say N.
config DM_VERITY_FEC
bool "Verity forward error correction support"
depends on DM_VERITY
select REED_SOLOMON
select REED_SOLOMON_DEC8
---help---
Add forward error correction support to dm-verity. This option
makes it possible to use pre-generated error correction data to
recover from corrupted blocks.
If unsure, say N.
config DM_SWITCH
tristate "Switch target support (EXPERIMENTAL)"
depends on BLK_DEV_DM
......
......@@ -16,6 +16,7 @@ dm-cache-mq-y += dm-cache-policy-mq.o
dm-cache-smq-y += dm-cache-policy-smq.o
dm-cache-cleaner-y += dm-cache-policy-cleaner.o
dm-era-y += dm-era-target.o
dm-verity-y += dm-verity-target.o
md-mod-y += md.o bitmap.o
raid456-y += raid5.o raid5-cache.o
......@@ -63,3 +64,7 @@ obj-$(CONFIG_DM_LOG_WRITES) += dm-log-writes.o
ifeq ($(CONFIG_DM_UEVENT),y)
dm-mod-objs += dm-uevent.o
endif
ifeq ($(CONFIG_DM_VERITY_FEC),y)
dm-verity-objs += dm-verity-fec.o
endif
......@@ -16,6 +16,7 @@
#include <linux/shrinker.h>
#include <linux/module.h>
#include <linux/rbtree.h>
#include <linux/stacktrace.h>
#define DM_MSG_PREFIX "bufio"
......@@ -149,6 +150,11 @@ struct dm_buffer {
struct list_head write_list;
struct bio bio;
struct bio_vec bio_vec[DM_BUFIO_INLINE_VECS];
#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
#define MAX_STACK 10
struct stack_trace stack_trace;
unsigned long stack_entries[MAX_STACK];
#endif
};
/*----------------------------------------------------------------*/
......@@ -253,6 +259,17 @@ static LIST_HEAD(dm_bufio_all_clients);
*/
static DEFINE_MUTEX(dm_bufio_clients_lock);
#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
static void buffer_record_stack(struct dm_buffer *b)
{
b->stack_trace.nr_entries = 0;
b->stack_trace.max_entries = MAX_STACK;
b->stack_trace.entries = b->stack_entries;
b->stack_trace.skip = 2;
save_stack_trace(&b->stack_trace);
}
#endif
/*----------------------------------------------------------------
* A red/black tree acts as an index for all the buffers.
*--------------------------------------------------------------*/
......@@ -454,6 +471,9 @@ static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask)
adjust_total_allocated(b->data_mode, (long)c->block_size);
#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
memset(&b->stack_trace, 0, sizeof(b->stack_trace));
#endif
return b;
}
......@@ -1063,12 +1083,16 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
dm_bufio_lock(c);
b = __bufio_new(c, block, nf, &need_submit, &write_list);
#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
if (b && b->hold_count == 1)
buffer_record_stack(b);
#endif
dm_bufio_unlock(c);
__flush_write_list(&write_list);
if (!b)
return b;
return NULL;
if (need_submit)
submit_io(b, READ, b->block, read_endio);
......@@ -1462,6 +1486,7 @@ static void drop_buffers(struct dm_bufio_client *c)
{
struct dm_buffer *b;
int i;
bool warned = false;
BUG_ON(dm_bufio_in_request());
......@@ -1476,9 +1501,21 @@ static void drop_buffers(struct dm_bufio_client *c)
__free_buffer_wake(b);
for (i = 0; i < LIST_SIZE; i++)
list_for_each_entry(b, &c->lru[i], lru_list)
list_for_each_entry(b, &c->lru[i], lru_list) {
WARN_ON(!warned);
warned = true;
DMERR("leaked buffer %llx, hold count %u, list %d",
(unsigned long long)b->block, b->hold_count, i);
#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
print_stack_trace(&b->stack_trace, 1);
b->hold_count = 0; /* mark unclaimed to avoid BUG_ON below */
#endif
}
#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
while ((b = __get_unclaimed_buffer(c)))
__free_buffer_wake(b);
#endif
for (i = 0; i < LIST_SIZE; i++)
BUG_ON(!list_empty(&c->lru[i]));
......@@ -1891,8 +1928,7 @@ static void __exit dm_bufio_exit(void)
bug = 1;
}
if (bug)
BUG();
BUG_ON(bug);
}
module_init(dm_bufio_init)
......
......@@ -118,14 +118,12 @@ static void iot_io_end(struct io_tracker *iot, sector_t len)
*/
struct dm_hook_info {
bio_end_io_t *bi_end_io;
void *bi_private;
};
static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
bio_end_io_t *bi_end_io, void *bi_private)
{
h->bi_end_io = bio->bi_end_io;
h->bi_private = bio->bi_private;
bio->bi_end_io = bi_end_io;
bio->bi_private = bi_private;
......@@ -134,7 +132,6 @@ static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
{
bio->bi_end_io = h->bi_end_io;
bio->bi_private = h->bi_private;
}
/*----------------------------------------------------------------*/
......
......@@ -69,7 +69,7 @@ struct dm_exception_store_type {
* Update the metadata with this exception.
*/
void (*commit_exception) (struct dm_exception_store *store,
struct dm_exception *e,
struct dm_exception *e, int valid,
void (*callback) (void *, int success),
void *callback_context);
......
......@@ -695,7 +695,7 @@ static int persistent_prepare_exception(struct dm_exception_store *store,
}
static void persistent_commit_exception(struct dm_exception_store *store,
struct dm_exception *e,
struct dm_exception *e, int valid,
void (*callback) (void *, int success),
void *callback_context)
{
......@@ -704,6 +704,9 @@ static void persistent_commit_exception(struct dm_exception_store *store,
struct core_exception ce;
struct commit_callback *cb;
if (!valid)
ps->valid = 0;
ce.old_chunk = e->old_chunk;
ce.new_chunk = e->new_chunk;
write_exception(ps, ps->current_committed++, &ce);
......
......@@ -52,12 +52,12 @@ static int transient_prepare_exception(struct dm_exception_store *store,
}
static void transient_commit_exception(struct dm_exception_store *store,
struct dm_exception *e,
struct dm_exception *e, int valid,
void (*callback) (void *, int success),
void *callback_context)
{
/* Just succeed */
callback(callback_context, 1);
callback(callback_context, valid);
}
static void transient_usage(struct dm_exception_store *store,
......
......@@ -207,7 +207,6 @@ struct dm_snap_pending_exception {
*/
struct bio *full_bio;
bio_end_io_t *full_bio_end_io;
void *full_bio_private;
};
/*
......@@ -1438,8 +1437,9 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err)
dm_table_event(s->ti->table);
}
static void pending_complete(struct dm_snap_pending_exception *pe, int success)
static void pending_complete(void *context, int success)
{
struct dm_snap_pending_exception *pe = context;
struct dm_exception *e;
struct dm_snapshot *s = pe->snap;
struct bio *origin_bios = NULL;
......@@ -1485,10 +1485,8 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
snapshot_bios = bio_list_get(&pe->snapshot_bios);
origin_bios = bio_list_get(&pe->origin_bios);
full_bio = pe->full_bio;
if (full_bio) {
if (full_bio)
full_bio->bi_end_io = pe->full_bio_end_io;
full_bio->bi_private = pe->full_bio_private;
}
increment_pending_exceptions_done_count();
up_write(&s->lock);
......@@ -1509,24 +1507,13 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
free_pending_exception(pe);
}
static void commit_callback(void *context, int success)
{
struct dm_snap_pending_exception *pe = context;
pending_complete(pe, success);
}
static void complete_exception(struct dm_snap_pending_exception *pe)
{
struct dm_snapshot *s = pe->snap;
if (unlikely(pe->copy_error))
pending_complete(pe, 0);
else
/* Update the metadata if we are persistent */
s->store->type->commit_exception(s->store, &pe->e,
commit_callback, pe);
s->store->type->commit_exception(s->store, &pe->e, !pe->copy_error,
pending_complete, pe);
}
/*
......@@ -1605,7 +1592,6 @@ static void start_full_bio(struct dm_snap_pending_exception *pe,
pe->full_bio = bio;
pe->full_bio_end_io = bio->bi_end_io;
pe->full_bio_private = bio->bi_private;
callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
copy_callback, pe);
......
......@@ -1395,7 +1395,20 @@ static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time)
return td->snapshotted_time > time;
}
int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block,
static void unpack_lookup_result(struct dm_thin_device *td, __le64 value,
struct dm_thin_lookup_result *result)
{
uint64_t block_time = 0;
dm_block_t exception_block;
uint32_t exception_time;
block_time = le64_to_cpu(value);
unpack_block_time(block_time, &exception_block, &exception_time);
result->block = exception_block;
result->shared = __snapshotted_since(td, exception_time);
}
static int __find_block(struct dm_thin_device *td, dm_block_t block,
int can_issue_io, struct dm_thin_lookup_result *result)
{
int r;
......@@ -1404,36 +1417,53 @@ int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block,
dm_block_t keys[2] = { td->id, block };
struct dm_btree_info *info;
down_read(&pmd->root_lock);
if (pmd->fail_io) {
up_read(&pmd->root_lock);
return -EINVAL;
}
if (can_issue_io) {
info = &pmd->info;
} else
info = &pmd->nb_info;
r = dm_btree_lookup(info, pmd->root, keys, &value);
if (!r) {
uint64_t block_time = 0;
dm_block_t exception_block;
uint32_t exception_time;
if (!r)
unpack_lookup_result(td, value, result);
block_time = le64_to_cpu(value);
unpack_block_time(block_time, &exception_block,
&exception_time);
result->block = exception_block;
result->shared = __snapshotted_since(td, exception_time);
return r;
}
int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block,
int can_issue_io, struct dm_thin_lookup_result *result)
{
int r;
struct dm_pool_metadata *pmd = td->pmd;
down_read(&pmd->root_lock);
if (pmd->fail_io) {
up_read(&pmd->root_lock);
return -EINVAL;
}
r = __find_block(td, block, can_issue_io, result);
up_read(&pmd->root_lock);
return r;
}
/* FIXME: write a more efficient one in btree */
int dm_thin_find_mapped_range(struct dm_thin_device *td,
static int __find_next_mapped_block(struct dm_thin_device *td, dm_block_t block,
dm_block_t *vblock,
struct dm_thin_lookup_result *result)
{
int r;
__le64 value;
struct dm_pool_metadata *pmd = td->pmd;
dm_block_t keys[2] = { td->id, block };
r = dm_btree_lookup_next(&pmd->info, pmd->root, keys, vblock, &value);
if (!r)
unpack_lookup_result(td, value, result);
return r;
}
static int __find_mapped_range(struct dm_thin_device *td,
dm_block_t begin, dm_block_t end,
dm_block_t *thin_begin, dm_block_t *thin_end,
dm_block_t *pool_begin, bool *maybe_shared)
......@@ -1445,21 +1475,11 @@ int dm_thin_find_mapped_range(struct dm_thin_device *td,
if (end < begin)
return -ENODATA;
/*
* Find first mapped block.
*/
while (begin < end) {
r = dm_thin_find_block(td, begin, true, &lookup);
if (r) {
if (r != -ENODATA)
r = __find_next_mapped_block(td, begin, &begin, &lookup);
if (r)
return r;
} else
break;
begin++;
}
if (begin == end)
if (begin >= end)
return -ENODATA;
*thin_begin = begin;
......@@ -1469,7 +1489,7 @@ int dm_thin_find_mapped_range(struct dm_thin_device *td,
begin++;
pool_end = *pool_begin + 1;
while (begin != end) {
r = dm_thin_find_block(td, begin, true, &lookup);
r = __find_block(td, begin, true, &lookup);
if (r) {
if (r == -ENODATA)
break;
......@@ -1489,6 +1509,24 @@ int dm_thin_find_mapped_range(struct dm_thin_device *td,
return 0;
}
int dm_thin_find_mapped_range(struct dm_thin_device *td,
dm_block_t begin, dm_block_t end,
dm_block_t *thin_begin, dm_block_t *thin_end,
dm_block_t *pool_begin, bool *maybe_shared)
{
int r = -EINVAL;
struct dm_pool_metadata *pmd = td->pmd;
down_read(&pmd->root_lock);
if (!pmd->fail_io) {
r = __find_mapped_range(td, begin, end, thin_begin, thin_end,
pool_begin, maybe_shared);
}
up_read(&pmd->root_lock);
return r;
}
static int __insert(struct dm_thin_device *td, dm_block_t block,
dm_block_t data_block)
{
......
......@@ -3453,8 +3453,8 @@ static void pool_postsuspend(struct dm_target *ti)
struct pool_c *pt = ti->private;
struct pool *pool = pt->pool;
cancel_delayed_work(&pool->waker);
cancel_delayed_work(&pool->no_space_timeout);
cancel_delayed_work_sync(&pool->waker);
cancel_delayed_work_sync(&pool->no_space_timeout);
flush_workqueue(pool->wq);
(void) commit(pool);
}
......@@ -3886,7 +3886,7 @@ static struct target_type pool_target = {
.name = "thin-pool",
.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
DM_TARGET_IMMUTABLE,
.version = {1, 16, 0},
.version = {1, 17, 0},
.module = THIS_MODULE,
.ctr = pool_ctr,
.dtr = pool_dtr,
......@@ -4260,7 +4260,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type thin_target = {
.name = "thin",
.version = {1, 16, 0},
.version = {1, 17, 0},
.module = THIS_MODULE,
.ctr = thin_ctr,
.dtr = thin_dtr,
......
This diff is collapsed.
/*
* Copyright (C) 2015 Google, Inc.
*
* Author: Sami Tolvanen <samitolvanen@google.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#ifndef DM_VERITY_FEC_H
#define DM_VERITY_FEC_H
#include "dm-verity.h"
#include <linux/rslib.h>
/* Reed-Solomon(M, N) parameters */
#define DM_VERITY_FEC_RSM 255
#define DM_VERITY_FEC_MAX_RSN 253
#define DM_VERITY_FEC_MIN_RSN 231 /* ~10% space overhead */
/* buffers for deinterleaving and decoding */
#define DM_VERITY_FEC_BUF_PREALLOC 1 /* buffers to preallocate */
#define DM_VERITY_FEC_BUF_RS_BITS 4 /* 1 << RS blocks per buffer */
/* we need buffers for at most 1 << block size RS blocks */
#define DM_VERITY_FEC_BUF_MAX \
(1 << (PAGE_SHIFT - DM_VERITY_FEC_BUF_RS_BITS))
#define DM_VERITY_OPT_FEC_DEV "use_fec_from_device"
#define DM_VERITY_OPT_FEC_BLOCKS "fec_blocks"
#define DM_VERITY_OPT_FEC_START "fec_start"
#define DM_VERITY_OPT_FEC_ROOTS "fec_roots"
/* configuration */
struct dm_verity_fec {
struct dm_dev *dev; /* parity data device */
struct dm_bufio_client *data_bufio; /* for data dev access */
struct dm_bufio_client *bufio; /* for parity data access */
sector_t start; /* parity data start in blocks */
sector_t blocks; /* number of blocks covered */
sector_t rounds; /* number of interleaving rounds */
sector_t hash_blocks; /* blocks covered after v->hash_start */
unsigned char roots; /* number of parity bytes, M-N of RS(M, N) */
unsigned char rsn; /* N of RS(M, N) */
mempool_t *rs_pool; /* mempool for fio->rs */
mempool_t *prealloc_pool; /* mempool for preallocated buffers */
mempool_t *extra_pool; /* mempool for extra buffers */
mempool_t *output_pool; /* mempool for output */
struct kmem_cache *cache; /* cache for buffers */
};
/* per-bio data */
struct dm_verity_fec_io {
struct rs_control *rs; /* Reed-Solomon state */
int erasures[DM_VERITY_FEC_MAX_RSN]; /* erasures for decode_rs8 */
u8 *bufs[DM_VERITY_FEC_BUF_MAX]; /* bufs for deinterleaving */
unsigned nbufs; /* number of buffers allocated */
u8 *output; /* buffer for corrected output */
size_t output_pos;
};
#ifdef CONFIG_DM_VERITY_FEC
/* each feature parameter requires a value */
#define DM_VERITY_OPTS_FEC 8
extern bool verity_fec_is_enabled(struct dm_verity *v);
extern int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
enum verity_block_type type, sector_t block,
u8 *dest, struct bvec_iter *iter);
extern unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz,
char *result, unsigned maxlen);
extern void verity_fec_finish_io(struct dm_verity_io *io);
extern void verity_fec_init_io(struct dm_verity_io *io);
extern bool verity_is_fec_opt_arg(const char *arg_name);
extern int verity_fec_parse_opt_args(struct dm_arg_set *as,
struct dm_verity *v, unsigned *argc,
const char *arg_name);
extern void verity_fec_dtr(struct dm_verity *v);
extern int verity_fec_ctr_alloc(struct dm_verity *v);
extern int verity_fec_ctr(struct dm_verity *v);
#else /* !CONFIG_DM_VERITY_FEC */
#define DM_VERITY_OPTS_FEC 0
static inline bool verity_fec_is_enabled(struct dm_verity *v)
{
return false;
}
static inline int verity_fec_decode(struct dm_verity *v,
struct dm_verity_io *io,
enum verity_block_type type,
sector_t block, u8 *dest,
struct bvec_iter *iter)
{
return -EOPNOTSUPP;
}
static inline unsigned verity_fec_status_table(struct dm_verity *v,
unsigned sz, char *result,
unsigned maxlen)
{
return sz;
}
static inline void verity_fec_finish_io(struct dm_verity_io *io)
{
}
static inline void verity_fec_init_io(struct dm_verity_io *io)
{
}
static inline bool verity_is_fec_opt_arg(const char *arg_name)
{
return false;
}
static inline int verity_fec_parse_opt_args(struct dm_arg_set *as,
struct dm_verity *v,
unsigned *argc,
const char *arg_name)
{
return -EINVAL;
}
static inline void verity_fec_dtr(struct dm_verity *v)
{
}
static inline int verity_fec_ctr_alloc(struct dm_verity *v)
{
return 0;
}
static inline int verity_fec_ctr(struct dm_verity *v)
{
return 0;
}
#endif /* CONFIG_DM_VERITY_FEC */
#endif /* DM_VERITY_FEC_H */
/*
* Copyright (C) 2012 Red Hat, Inc.
* Copyright (C) 2015 Google, Inc.
*
* Author: Mikulas Patocka <mpatocka@redhat.com>
*
* Based on Chromium dm-verity driver (C) 2011 The Chromium OS Authors
*
* This file is released under the GPLv2.
*/
#ifndef DM_VERITY_H
#define DM_VERITY_H
#include "dm-bufio.h"
#include <linux/device-mapper.h>
#include <crypto/hash.h>
#define DM_VERITY_MAX_LEVELS 63
enum verity_mode {
DM_VERITY_MODE_EIO,
DM_VERITY_MODE_LOGGING,
DM_VERITY_MODE_RESTART
};
enum verity_block_type {
DM_VERITY_BLOCK_TYPE_DATA,
DM_VERITY_BLOCK_TYPE_METADATA
};
struct dm_verity_fec;
struct dm_verity {
struct dm_dev *data_dev;
struct dm_dev *hash_dev;
struct dm_target *ti;
struct dm_bufio_client *bufio;
char *alg_name;
struct crypto_shash *tfm;
u8 *root_digest; /* digest of the root block */
u8 *salt; /* salt: its size is salt_size */
u8 *zero_digest; /* digest for a zero block */
unsigned salt_size;
sector_t data_start; /* data offset in 512-byte sectors */
sector_t hash_start; /* hash start in blocks */
sector_t data_blocks; /* the number of data blocks */
sector_t hash_blocks; /* the number of hash blocks */
unsigned char data_dev_block_bits; /* log2(data blocksize) */
unsigned char hash_dev_block_bits; /* log2(hash blocksize) */
unsigned char hash_per_block_bits; /* log2(hashes in hash block) */
unsigned char levels; /* the number of tree levels */
unsigned char version;
unsigned digest_size; /* digest size for the current hash algorithm */
unsigned shash_descsize;/* the size of temporary space for crypto */
int hash_failed; /* set to 1 if hash of any block failed */
enum verity_mode mode; /* mode for handling verification errors */
unsigned corrupted_errs;/* Number of errors for corrupted blocks */
struct workqueue_struct *verify_wq;
/* starting blocks for each tree level. 0 is the lowest level. */
sector_t hash_level_block[DM_VERITY_MAX_LEVELS];
struct dm_verity_fec *fec; /* forward error correction */
};
struct dm_verity_io {
struct dm_verity *v;
/* original value of bio->bi_end_io */
bio_end_io_t *orig_bi_end_io;
sector_t block;
unsigned n_blocks;
struct bvec_iter iter;
struct work_struct work;
/*
* Three variably-size fields follow this struct:
*
* u8 hash_desc[v->shash_descsize];
* u8 real_digest[v->digest_size];
* u8 want_digest[v->digest_size];
*
* To access them use: verity_io_hash_desc(), verity_io_real_digest()
* and verity_io_want_digest().
*/
};
static inline struct shash_desc *verity_io_hash_desc(struct dm_verity *v,
struct dm_verity_io *io)
{
return (struct shash_desc *)(io + 1);
}
static inline u8 *verity_io_real_digest(struct dm_verity *v,
struct dm_verity_io *io)
{
return (u8 *)(io + 1) + v->shash_descsize;
}
static inline u8 *verity_io_want_digest(struct dm_verity *v,
struct dm_verity_io *io)
{
return (u8 *)(io + 1) + v->shash_descsize + v->digest_size;
}
static inline u8 *verity_io_digest_end(struct dm_verity *v,
struct dm_verity_io *io)
{
return verity_io_want_digest(v, io) + v->digest_size;
}
extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
struct bvec_iter *iter,
int (*process)(struct dm_verity *v,
struct dm_verity_io *io,
u8 *data, size_t len));
extern int verity_hash(struct dm_verity *v, struct shash_desc *desc,
const u8 *data, size_t len, u8 *digest);
extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
sector_t block, u8 *digest, bool *is_zero);
#endif /* DM_VERITY_H */
......@@ -7,12 +7,3 @@ config DM_PERSISTENT_DATA
Library providing immutable on-disk data structure support for
device-mapper targets such as the thin provisioning target.
config DM_DEBUG_BLOCK_STACK_TRACING
bool "Keep stack trace of persistent data block lock holders"
depends on STACKTRACE_SUPPORT && DM_PERSISTENT_DATA
select STACKTRACE
---help---
Enable this for messages that may help debug problems with the
block manager locking used by thin provisioning and caching.
If unsure, say N.
......@@ -97,10 +97,6 @@ static void __del_holder(struct block_lock *lock, struct task_struct *task)
static int __check_holder(struct block_lock *lock)
{
unsigned i;
#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
static struct stack_trace t;
static stack_entries entries;
#endif
for (i = 0; i < MAX_HOLDERS; i++) {
if (lock->holders[i] == current) {
......@@ -110,12 +106,7 @@ static int __check_holder(struct block_lock *lock)
print_stack_trace(lock->traces + i, 4);
DMERR("subsequent acquisition attempted here:");
t.nr_entries = 0;
t.max_entries = MAX_STACK;
t.entries = entries;
t.skip = 3;
save_stack_trace(&t);
print_stack_trace(&t, 4);
dump_stack();
#endif
return -EINVAL;
}
......
......@@ -754,12 +754,19 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root,
return 0;
}
static bool need_insert(struct btree_node *node, uint64_t *keys,
unsigned level, unsigned index)
{
return ((index >= le32_to_cpu(node->header.nr_entries)) ||
(le64_to_cpu(node->keys[index]) != keys[level]));
}
static int insert(struct dm_btree_info *info, dm_block_t root,
uint64_t *keys, void *value, dm_block_t *new_root,
int *inserted)
__dm_written_to_disk(value)
{
int r, need_insert;
int r;
unsigned level, index = -1, last_level = info->levels - 1;
dm_block_t block = root;
struct shadow_spine spine;
......@@ -775,10 +782,8 @@ static int insert(struct dm_btree_info *info, dm_block_t root,
goto bad;
n = dm_block_data(shadow_current(&spine));
need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
(le64_to_cpu(n->keys[index]) != keys[level]));
if (need_insert) {
if (need_insert(n, keys, level, index)) {
dm_block_t new_tree;
__le64 new_le;
......@@ -805,10 +810,8 @@ static int insert(struct dm_btree_info *info, dm_block_t root,
goto bad;
n = dm_block_data(shadow_current(&spine));
need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) ||
(le64_to_cpu(n->keys[index]) != keys[level]));
if (need_insert) {
if (need_insert(n, keys, level, index)) {
if (inserted)
*inserted = 1;
......
......@@ -152,12 +152,9 @@ static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result)
static int brb_pop(struct bop_ring_buffer *brb)
{
struct block_op *bop;
if (brb_empty(brb))
return -ENODATA;
bop = brb->bops + brb->begin;
brb->begin = brb_next(brb, brb->begin);
return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment