Commit 15c981d1 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "A number of core changes that make things work better in general, code
  is simpler and cleaner.

  Core changes:

   - per-inode file extent tree, for in memory tracking of contiguous
     extent ranges to make sure i_size adjustments are accurate

   - tree root structures are protected by reference counts, replacing
     SRCU that did not cover some cases

   - leak detector for tree root structures

   - per-transaction pinned extent tracking

   - buffer heads are replaced by bios for super block access

   - speedup of extent back reference resolution, on an example test
     scenario the runtime of send went down from a hour to minutes

   - factor out locking scheme used for subvolume writer and NOCOW
     exclusion, abstracted as DREW lock, double reader-writer exclusion
     (allow either readers or writers)

   - cleanup and abstract extent allocation policies, preparation for
     zoned device support

   - make reflink/clone_range work on inline extents

   - add more cancellation point for relocation, improves long response
     from 'balance cancel'

   - add page migration callback for data pages

   - switch to guid for uuids, with additional cleanups of the interface

   - make ranged full fsyncs more efficient

   - removal of obsolete ioctl flag BTRFS_SUBVOL_CREATE_ASYNC

   - remove b-tree readahead from delayed refs paths, avoiding seek and
     read unnecessary blocks

  Features:

   - v2 of ioctl to delete subvolumes, allowing to delete by id and more
     future extensions

  Fixes:

   - fix qgroup rescan worker that could block umount

   - fix crash during unmount due to race with delayed inode workers

   - fix dellaloc flushing logic that could create unnecessary chunks
     under heavy load

   - fix missing file extent item for hole after ranged fsync

   - several fixes in relocation error handling

  Other:

   - more documentation of relocation, device replace, space
     reservations

   - many random cleanups"

* tag 'for-5.7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (210 commits)
  btrfs: fix missing semaphore unlock in btrfs_sync_file
  btrfs: use nofs allocations for running delayed items
  btrfs: sysfs: Use scnprintf() instead of snprintf()
  btrfs: do not resolve backrefs for roots that are being deleted
  btrfs: track reloc roots based on their commit root bytenr
  btrfs: restart relocate_tree_blocks properly
  btrfs: reloc: reorder reservation before root selection
  btrfs: do not readahead in build_backref_tree
  btrfs: do not use readahead for running delayed refs
  btrfs: Remove async_transid from btrfs_mksubvol/create_subvol/create_snapshot
  btrfs: Remove transid argument from btrfs_ioctl_snap_create_transid
  btrfs: Remove BTRFS_SUBVOL_CREATE_ASYNC support
  btrfs: kill the subvol_srcu
  btrfs: make btrfs_cleanup_fs_roots use the radix tree lock
  btrfs: don't take an extra root ref at allocation time
  btrfs: hold a ref on the root on the dead roots list
  btrfs: make inodes hold a ref on their roots
  btrfs: move the root freeing stuff into btrfs_put_root
  btrfs: move ino_cache_inode dropping out of btrfs_free_fs_root
  btrfs: make the extent buffer leak check per fs info
  ...
parents 1455c699 6ff06729
......@@ -11,7 +11,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
block-rsv.o delalloc-space.o block-group.o discard.o
block-rsv.o delalloc-space.o block-group.o discard.o reflink.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
......
......@@ -395,3 +395,11 @@ void btrfs_set_work_high_priority(struct btrfs_work *work)
{
set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
}
void btrfs_flush_workqueue(struct btrfs_workqueue *wq)
{
if (wq->high)
flush_workqueue(wq->high->normal_wq);
flush_workqueue(wq->normal->normal_wq);
}
......@@ -44,5 +44,6 @@ void btrfs_set_work_high_priority(struct btrfs_work *work);
struct btrfs_fs_info * __pure btrfs_work_owner(const struct btrfs_work *work);
struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct __btrfs_workqueue *wq);
bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq);
void btrfs_flush_workqueue(struct btrfs_workqueue *wq);
#endif
This diff is collapsed.
......@@ -40,6 +40,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **leafs,
const u64 *extent_item_pos, bool ignore_offset);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots, bool ignore_offset);
......
......@@ -460,7 +460,7 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end
int ret;
while (start < end) {
ret = find_first_extent_bit(info->pinned_extents, start,
ret = find_first_extent_bit(&info->excluded_extents, start,
&extent_start, &extent_end,
EXTENT_DIRTY | EXTENT_UPTODATE,
NULL);
......@@ -1248,6 +1248,55 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
return ret;
}
static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
struct btrfs_block_group *bg)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
struct btrfs_transaction *prev_trans = NULL;
const u64 start = bg->start;
const u64 end = start + bg->length - 1;
int ret;
spin_lock(&fs_info->trans_lock);
if (trans->transaction->list.prev != &fs_info->trans_list) {
prev_trans = list_last_entry(&trans->transaction->list,
struct btrfs_transaction, list);
refcount_inc(&prev_trans->use_count);
}
spin_unlock(&fs_info->trans_lock);
/*
* Hold the unused_bg_unpin_mutex lock to avoid racing with
* btrfs_finish_extent_commit(). If we are at transaction N, another
* task might be running finish_extent_commit() for the previous
* transaction N - 1, and have seen a range belonging to the block
* group in pinned_extents before we were able to clear the whole block
* group range from pinned_extents. This means that task can lookup for
* the block group after we unpinned it from pinned_extents and removed
* it, leading to a BUG_ON() at unpin_extent_range().
*/
mutex_lock(&fs_info->unused_bg_unpin_mutex);
if (prev_trans) {
ret = clear_extent_bits(&prev_trans->pinned_extents, start, end,
EXTENT_DIRTY);
if (ret)
goto err;
}
ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end,
EXTENT_DIRTY);
if (ret)
goto err;
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
return true;
err:
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
btrfs_dec_block_group_ro(bg);
return false;
}
/*
* Process the unused_bgs list and remove any that don't have any allocated
* space inside of them.
......@@ -1265,7 +1314,6 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
spin_lock(&fs_info->unused_bgs_lock);
while (!list_empty(&fs_info->unused_bgs)) {
u64 start, end;
int trimming;
block_group = list_first_entry(&fs_info->unused_bgs,
......@@ -1344,35 +1392,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* We could have pending pinned extents for this block group,
* just delete them, we don't care about them anymore.
*/
start = block_group->start;
end = start + block_group->length - 1;
/*
* Hold the unused_bg_unpin_mutex lock to avoid racing with
* btrfs_finish_extent_commit(). If we are at transaction N,
* another task might be running finish_extent_commit() for the
* previous transaction N - 1, and have seen a range belonging
* to the block group in freed_extents[] before we were able to
* clear the whole block group range from freed_extents[]. This
* means that task can lookup for the block group after we
* unpinned it from freed_extents[] and removed it, leading to
* a BUG_ON() at btrfs_unpin_extent_range().
*/
mutex_lock(&fs_info->unused_bg_unpin_mutex);
ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
EXTENT_DIRTY);
if (ret) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
btrfs_dec_block_group_ro(block_group);
if (!clean_pinned_extents(trans, block_group))
goto end_trans;
}
ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
EXTENT_DIRTY);
if (ret) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
btrfs_dec_block_group_ro(block_group);
goto end_trans;
}
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
/*
* At this point, the block_group is read only and should fail
......@@ -1987,6 +2008,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
btrfs_release_path(path);
}
rcu_read_lock();
list_for_each_entry_rcu(space_info, &info->space_info, list) {
if (!(btrfs_get_alloc_profile(info, space_info->flags) &
(BTRFS_BLOCK_GROUP_RAID10 |
......@@ -2007,6 +2029,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
list)
inc_block_group_ro(cache, 1);
}
rcu_read_unlock();
btrfs_init_global_block_rsv(info);
ret = check_chunk_block_group_mappings(info);
......@@ -2345,7 +2368,7 @@ static int cache_save_setup(struct btrfs_block_group *block_group,
return 0;
}
if (trans->aborted)
if (TRANS_ABORTED(trans))
return 0;
again:
inode = lookup_free_space_inode(block_group, path);
......@@ -2881,7 +2904,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
&cache->space_info->total_bytes_pinned,
num_bytes,
BTRFS_TOTAL_BYTES_PINNED_BATCH);
set_extent_dirty(info->pinned_extents,
set_extent_dirty(&trans->transaction->pinned_extents,
bytenr, bytenr + num_bytes - 1,
GFP_NOFS | __GFP_NOFAIL);
}
......
......@@ -6,6 +6,98 @@
#include "space-info.h"
#include "transaction.h"
/*
* HOW DO BLOCK RESERVES WORK
*
* Think of block_rsv's as buckets for logically grouped metadata
* reservations. Each block_rsv has a ->size and a ->reserved. ->size is
* how large we want our block rsv to be, ->reserved is how much space is
* currently reserved for this block reserve.
*
* ->failfast exists for the truncate case, and is described below.
*
* NORMAL OPERATION
*
* -> Reserve
* Entrance: btrfs_block_rsv_add, btrfs_block_rsv_refill
*
* We call into btrfs_reserve_metadata_bytes() with our bytes, which is
* accounted for in space_info->bytes_may_use, and then add the bytes to
* ->reserved, and ->size in the case of btrfs_block_rsv_add.
*
* ->size is an over-estimation of how much we may use for a particular
* operation.
*
* -> Use
* Entrance: btrfs_use_block_rsv
*
* When we do a btrfs_alloc_tree_block() we call into btrfs_use_block_rsv()
* to determine the appropriate block_rsv to use, and then verify that
* ->reserved has enough space for our tree block allocation. Once
* successful we subtract fs_info->nodesize from ->reserved.
*
* -> Finish
* Entrance: btrfs_block_rsv_release
*
* We are finished with our operation, subtract our individual reservation
* from ->size, and then subtract ->size from ->reserved and free up the
* excess if there is any.
*
* There is some logic here to refill the delayed refs rsv or the global rsv
* as needed, otherwise the excess is subtracted from
* space_info->bytes_may_use.
*
* TYPES OF BLOCK RESERVES
*
* BLOCK_RSV_TRANS, BLOCK_RSV_DELOPS, BLOCK_RSV_CHUNK
* These behave normally, as described above, just within the confines of the
* lifetime of their particular operation (transaction for the whole trans
* handle lifetime, for example).
*
* BLOCK_RSV_GLOBAL
* It is impossible to properly account for all the space that may be required
* to make our extent tree updates. This block reserve acts as an overflow
* buffer in case our delayed refs reserve does not reserve enough space to
* update the extent tree.
*
* We can steal from this in some cases as well, notably on evict() or
* truncate() in order to help users recover from ENOSPC conditions.
*
* BLOCK_RSV_DELALLOC
* The individual item sizes are determined by the per-inode size
* calculations, which are described with the delalloc code. This is pretty
* straightforward, it's just the calculation of ->size encodes a lot of
* different items, and thus it gets used when updating inodes, inserting file
* extents, and inserting checksums.
*
* BLOCK_RSV_DELREFS
* We keep a running tally of how many delayed refs we have on the system.
* We assume each one of these delayed refs are going to use a full
* reservation. We use the transaction items and pre-reserve space for every
* operation, and use this reservation to refill any gap between ->size and
* ->reserved that may exist.
*
* From there it's straightforward, removing a delayed ref means we remove its
* count from ->size and free up reservations as necessary. Since this is
* the most dynamic block reserve in the system, we will try to refill this
* block reserve first with any excess returned by any other block reserve.
*
* BLOCK_RSV_EMPTY
* This is the fallback block reserve to make us try to reserve space if we
* don't have a specific bucket for this allocation. It is mostly used for
* updating the device tree and such, since that is a separate pool we're
* content to just reserve space from the space_info on demand.
*
* BLOCK_RSV_TEMP
* This is used by things like truncate and iput. We will temporarily
* allocate a block reserve, set it to some size, and then truncate bytes
* until we have no space left. With ->failfast set we'll simply return
* ENOSPC from btrfs_use_block_rsv() to signal that we need to unwind and try
* to make a new reservation. This is because these operations are
* unbounded, so we want to do as much work as we can, and then back off and
* re-reserve.
*/
static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
struct btrfs_block_rsv *dest, u64 num_bytes,
......@@ -111,7 +203,7 @@ void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
{
if (!rsv)
return;
btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
btrfs_block_rsv_release(fs_info, rsv, (u64)-1, NULL);
kfree(rsv);
}
......@@ -178,9 +270,9 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
return ret;
}
u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes, u64 *qgroup_to_release)
u64 btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv, u64 num_bytes,
u64 *qgroup_to_release)
{
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
......@@ -297,9 +389,9 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
if (block_rsv->reserved < block_rsv->size) {
num_bytes = block_rsv->size - block_rsv->reserved;
block_rsv->reserved += num_bytes;
btrfs_space_info_update_bytes_may_use(fs_info, sinfo,
num_bytes);
block_rsv->reserved = block_rsv->size;
} else if (block_rsv->reserved > block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
btrfs_space_info_update_bytes_may_use(fs_info, sinfo,
......@@ -344,7 +436,8 @@ void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info)
void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info)
{
btrfs_block_rsv_release(fs_info, &fs_info->global_block_rsv, (u64)-1);
btrfs_block_rsv_release(fs_info, &fs_info->global_block_rsv, (u64)-1,
NULL);
WARN_ON(fs_info->trans_block_rsv.size > 0);
WARN_ON(fs_info->trans_block_rsv.reserved > 0);
WARN_ON(fs_info->chunk_block_rsv.size > 0);
......
......@@ -73,7 +73,7 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
int min_factor);
void btrfs_block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes, bool update_size);
u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
u64 btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes, u64 *qgroup_to_release);
void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info);
......@@ -82,20 +82,12 @@ void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info);
struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u32 blocksize);
static inline void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes)
{
__btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
}
static inline void btrfs_unuse_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
u32 blocksize)
{
btrfs_block_rsv_add_bytes(block_rsv, blocksize, false);
btrfs_block_rsv_release(fs_info, block_rsv, 0);
btrfs_block_rsv_release(fs_info, block_rsv, 0, NULL);
}
#endif /* BTRFS_BLOCK_RSV_H */
......@@ -60,6 +60,12 @@ struct btrfs_inode {
*/
struct extent_io_tree io_failure_tree;
/*
* Keep track of where the inode has extent items mapped in order to
* make sure the i_size adjustments are accurate
*/
struct extent_io_tree file_extent_tree;
/* held while logging the inode in tree-log.c */
struct mutex log_mutex;
......
This diff is collapsed.
......@@ -7,11 +7,9 @@
#define BTRFS_CHECK_INTEGRITY_H
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
int btrfsic_submit_bh(int op, int op_flags, struct buffer_head *bh);
void btrfsic_submit_bio(struct bio *bio);
int btrfsic_submit_bio_wait(struct bio *bio);
#else
#define btrfsic_submit_bh submit_bh
#define btrfsic_submit_bio submit_bio
#define btrfsic_submit_bio_wait submit_bio_wait
#endif
......
......@@ -31,8 +31,8 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
static const struct btrfs_csums {
u16 size;
const char *name;
const char *driver;
const char name[10];
const char driver[12];
} btrfs_csums[] = {
[BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
[BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
......@@ -63,7 +63,8 @@ const char *btrfs_super_csum_name(u16 csum_type)
const char *btrfs_super_csum_driver(u16 csum_type)
{
/* csum type is validated at mount time */
return btrfs_csums[csum_type].driver ?:
return btrfs_csums[csum_type].driver[0] ?
btrfs_csums[csum_type].driver :
btrfs_csums[csum_type].name;
}
......@@ -143,44 +144,6 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
return eb;
}
/* loop around taking references on and locking the root node of the
* tree until you end up with a lock on the root. A locked buffer
* is returned, with a reference held.
*/
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
{
struct extent_buffer *eb;
while (1) {
eb = btrfs_root_node(root);
btrfs_tree_lock(eb);
if (eb == root->node)
break;
btrfs_tree_unlock(eb);
free_extent_buffer(eb);
}
return eb;
}
/* loop around taking references on and locking the root node of the
* tree until you end up with a lock on the root. A locked buffer
* is returned, with a reference held.
*/
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
{
struct extent_buffer *eb;
while (1) {
eb = btrfs_root_node(root);
btrfs_tree_read_lock(eb);
if (eb == root->node)
break;
btrfs_tree_read_unlock(eb);
free_extent_buffer(eb);
}
return eb;
}
/* cowonly root (everything not a reference counted cow subvolume), just get
* put onto a simple dirty list. transaction.c walks this to make sure they
* get properly updated on disk.
......@@ -341,7 +304,6 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct rb_root *tm_root;
struct rb_node *node;
struct rb_node *next;
struct seq_list *cur_elem;
struct tree_mod_elem *tm;
u64 min_seq = (u64)-1;
u64 seq_putting = elem->seq;
......@@ -353,18 +315,20 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
list_del(&elem->list);
elem->seq = 0;
list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) {
if (cur_elem->seq < min_seq) {
if (seq_putting > cur_elem->seq) {
if (!list_empty(&fs_info->tree_mod_seq_list)) {
struct seq_list *first;
first = list_first_entry(&fs_info->tree_mod_seq_list,
struct seq_list, list);
if (seq_putting > first->seq) {
/*
* blocker with lower sequence number exists, we
* cannot remove anything from the log
* Blocker with lower sequence number exists, we
* cannot remove anything from the log.
*/
write_unlock(&fs_info->tree_mod_log_lock);
return;
}
min_seq = cur_elem->seq;
}
min_seq = first->seq;
}
/*
......@@ -962,9 +926,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (new_flags != 0) {
int level = btrfs_header_level(buf);
ret = btrfs_set_disk_extent_flags(trans,
buf->start,
buf->len,
ret = btrfs_set_disk_extent_flags(trans, buf,
new_flags, level, 0);
if (ret)
return ret;
......
......@@ -33,6 +33,7 @@
#include "extent_map.h"
#include "async-thread.h"
#include "block-rsv.h"
#include "locking.h"
struct btrfs_trans_handle;
struct btrfs_transaction;
......@@ -596,8 +597,8 @@ struct btrfs_fs_info {
/* keep track of unallocated space */
atomic64_t free_chunk_space;
struct extent_io_tree freed_extents[2];
struct extent_io_tree *pinned_extents;
/* Track ranges which are used by log trees blocks/logged data extents */
struct extent_io_tree excluded_extents;
/* logical->physical extent mapping */
struct extent_map_tree mapping_tree;
......@@ -696,7 +697,6 @@ struct btrfs_fs_info {
struct rw_semaphore cleanup_work_sem;
struct rw_semaphore subvol_sem;
struct srcu_struct subvol_srcu;
spinlock_t trans_lock;
/*
......@@ -947,6 +947,10 @@ struct btrfs_fs_info {
#ifdef CONFIG_BTRFS_DEBUG
struct kobject *debug_kobj;
struct kobject *discard_debug_kobj;
struct list_head allocated_roots;
spinlock_t eb_leak_lock;
struct list_head allocated_ebs;
#endif
};
......@@ -955,11 +959,6 @@ static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
return sb->s_fs_info;
}
struct btrfs_subvolume_writers {
struct percpu_counter counter;
wait_queue_head_t wait;
};
/*
* The state of btrfs root
*/
......@@ -1131,8 +1130,9 @@ struct btrfs_root {
* root_item_lock.
*/
int dedupe_in_progress;
struct btrfs_subvolume_writers *subv_writers;
atomic_t will_be_snapshotted;
/* For exclusion of snapshot creation and nocow writes */
struct btrfs_drew_lock snapshot_lock;
atomic_t snapshot_force_cow;
/* For qgroup metadata reserved space */
......@@ -1149,6 +1149,10 @@ struct btrfs_root {
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
u64 alloc_bytenr;
#endif
#ifdef CONFIG_BTRFS_DEBUG
struct list_head leak_list;
#endif
};
struct btrfs_clone_extent_info {
......@@ -1971,16 +1975,6 @@ static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb,
btrfs_set_header_flags(eb, flags);
}
static inline unsigned long btrfs_header_fsid(void)
{
return offsetof(struct btrfs_header, fsid);
}
static inline unsigned long btrfs_header_chunk_tree_uuid(const struct extent_buffer *eb)
{
return offsetof(struct btrfs_header, chunk_tree_uuid);
}
static inline int btrfs_is_leaf(const struct extent_buffer *eb)
{
return btrfs_header_level(eb) == 0;
......@@ -2458,9 +2452,9 @@ int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 offset, int metadata, u64 *refs, u64 *flags);
int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num, int reserved);
int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num,
int reserved);
int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes);
int btrfs_exclude_logged_extents(struct extent_buffer *eb);
int btrfs_cross_ref_exist(struct btrfs_root *root,
......@@ -2490,13 +2484,13 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref);
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 flags,
struct extent_buffer *eb, u64 flags,
int level, int is_data);
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref);
int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
u64 start, u64 len, int delalloc);
int btrfs_pin_reserved_extent(struct btrfs_fs_info *fs_info, u64 start,
int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
u64 len);
void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
......@@ -2665,9 +2659,8 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
return btrfs_next_old_item(root, p, 0);
}
int btrfs_leaf_free_space(struct extent_buffer *leaf);
int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
int update_ref, int for_reloc);
int __must_check btrfs_drop_snapshot(struct btrfs_root *root, int update_ref,
int for_reloc);
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *node,
......@@ -2695,23 +2688,6 @@ static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info)
return fs_info->sb->s_flags & SB_RDONLY || btrfs_fs_closing(fs_info);
}
static inline void free_fs_info(struct btrfs_fs_info *fs_info)
{
kfree(fs_info->balance_ctl);
kfree(fs_info->delayed_root);
kfree(fs_info->extent_root);
kfree(fs_info->tree_root);
kfree(fs_info->chunk_root);
kfree(fs_info->dev_root);
kfree(fs_info->csum_root);
kfree(fs_info->quota_root);
kfree(fs_info->uuid_root);
kfree(fs_info->free_space_root);
kfree(fs_info->super_copy);
kfree(fs_info->super_for_commit);
kvfree(fs_info);
}
/* tree mod log functions from ctree.c */
u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
......@@ -2750,9 +2726,7 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
u64 subid);
int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
u64 subid);
int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
int (*check_func)(struct btrfs_fs_info *, u8 *, u8,
u64));
int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info);
/* dir-item.c */
int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
......@@ -2859,6 +2833,12 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
struct btrfs_file_extent_item *fi,
const bool new_inline,
struct extent_map *em);
int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
u64 len);
int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
u64 len);
void btrfs_inode_safe_disk_i_size_write(struct inode *inode, u64 new_i_size);
u64 btrfs_file_extent_end(const struct btrfs_path *path);
/* inode.c */
struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
......@@ -2996,9 +2976,6 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
size_t num_pages, loff_t pos, size_t write_bytes,
struct extent_state **cached);
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
loff_t btrfs_remap_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags);
/* tree-defrag.c */
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
......@@ -3008,6 +2985,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
unsigned long new_flags);
int btrfs_sync_fs(struct super_block *sb, int wait);
char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
u64 subvol_objectid);
static inline __printf(2, 3) __cold
void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
......@@ -3401,6 +3380,7 @@ void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
u64 *bytes_to_reserve);
int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending);
int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info);
/* scrub.c */
int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
......
......@@ -9,6 +9,108 @@
#include "qgroup.h"
#include "block-group.h"
/*
* HOW DOES THIS WORK
*
* There are two stages to data reservations, one for data and one for metadata
* to handle the new extents and checksums generated by writing data.
*
*
* DATA RESERVATION
* The general flow of the data reservation is as follows
*
* -> Reserve
* We call into btrfs_reserve_data_bytes() for the user request bytes that
* they wish to write. We make this reservation and add it to
* space_info->bytes_may_use. We set EXTENT_DELALLOC on the inode io_tree
* for the range and carry on if this is buffered, or follow up trying to
* make a real allocation if we are pre-allocating or doing O_DIRECT.
*
* -> Use
* At writepages()/prealloc/O_DIRECT time we will call into
* btrfs_reserve_extent() for some part or all of this range of bytes. We
* will make the allocation and subtract space_info->bytes_may_use by the
* original requested length and increase the space_info->bytes_reserved by
* the allocated length. This distinction is important because compression
* may allocate a smaller on disk extent than we previously reserved.
*
* -> Allocation
* finish_ordered_io() will insert the new file extent item for this range,
* and then add a delayed ref update for the extent tree. Once that delayed
* ref is written the extent size is subtracted from
* space_info->bytes_reserved and added to space_info->bytes_used.
*
* Error handling
*
* -> By the reservation maker
* This is the simplest case, we haven't completed our operation and we know
* how much we reserved, we can simply call
* btrfs_free_reserved_data_space*() and it will be removed from
* space_info->bytes_may_use.
*
* -> After the reservation has been made, but before cow_file_range()
* This is specifically for the delalloc case. You must clear
* EXTENT_DELALLOC with the EXTENT_CLEAR_DATA_RESV bit, and the range will
* be subtracted from space_info->bytes_may_use.
*
* METADATA RESERVATION
* The general metadata reservation lifetimes are discussed elsewhere, this
* will just focus on how it is used for delalloc space.
*
* We keep track of two things on a per inode bases
*
* ->outstanding_extents
* This is the number of file extent items we'll need to handle all of the
* outstanding DELALLOC space we have in this inode. We limit the maximum
* size of an extent, so a large contiguous dirty area may require more than
* one outstanding_extent, which is why count_max_extents() is used to
* determine how many outstanding_extents get added.
*
* ->csum_bytes
* This is essentially how many dirty bytes we have for this inode, so we
* can calculate the number of checksum items we would have to add in order
* to checksum our outstanding data.
*
* We keep a per-inode block_rsv in order to make it easier to keep track of
* our reservation. We use btrfs_calculate_inode_block_rsv_size() to
* calculate the current theoretical maximum reservation we would need for the
* metadata for this inode. We call this and then adjust our reservation as
* necessary, either by attempting to reserve more space, or freeing up excess
* space.
*
* OUTSTANDING_EXTENTS HANDLING
*
* ->outstanding_extents is used for keeping track of how many extents we will
* need to use for this inode, and it will fluctuate depending on where you are
* in the life cycle of the dirty data. Consider the following normal case for
* a completely clean inode, with a num_bytes < our maximum allowed extent size
*
* -> reserve
* ->outstanding_extents += 1 (current value is 1)
*
* -> set_delalloc
* ->outstanding_extents += 1 (currrent value is 2)
*
* -> btrfs_delalloc_release_extents()
* ->outstanding_extents -= 1 (current value is 1)
*
* We must call this once we are done, as we hold our reservation for the
* duration of our operation, and then assume set_delalloc will update the
* counter appropriately.
*
* -> add ordered extent
* ->outstanding_extents += 1 (current value is 2)
*
* -> btrfs_clear_delalloc_extent
* ->outstanding_extents -= 1 (current value is 1)
*
* -> finish_ordered_io/btrfs_remove_ordered_extent
* ->outstanding_extents -= 1 (current value is 0)
*
* Each stage is responsible for their own accounting of the extent, thus
* making error handling and cleanup easier.
*/
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
{
struct btrfs_root *root = inode->root;
......@@ -228,7 +330,7 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
* are releasing 0 bytes, and then we'll just get the reservation over
* the size free'd.
*/
released = __btrfs_block_rsv_release(fs_info, block_rsv, 0,
released = btrfs_block_rsv_release(fs_info, block_rsv, 0,
&qgroup_to_release);
if (released > 0)
trace_btrfs_space_reservation(fs_info, "delalloc",
......
......@@ -6,6 +6,7 @@
#include <linux/slab.h>
#include <linux/iversion.h>
#include <linux/sched/mm.h>
#include "misc.h"
#include "delayed-inode.h"
#include "disk-io.h"
......@@ -595,8 +596,7 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
trace_btrfs_space_reservation(fs_info, "delayed_item",
item->key.objectid, item->bytes_reserved,
0);
btrfs_block_rsv_release(fs_info, rsv,
item->bytes_reserved);
btrfs_block_rsv_release(fs_info, rsv, item->bytes_reserved, NULL);
}
static int btrfs_delayed_inode_reserve_metadata(
......@@ -677,8 +677,7 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
rsv = &fs_info->delayed_block_rsv;
trace_btrfs_space_reservation(fs_info, "delayed_inode",
node->inode_id, node->bytes_reserved, 0);
btrfs_block_rsv_release(fs_info, rsv,
node->bytes_reserved);
btrfs_block_rsv_release(fs_info, rsv, node->bytes_reserved, NULL);
if (qgroup_free)
btrfs_qgroup_free_meta_prealloc(node->root,
node->bytes_reserved);
......@@ -805,11 +804,14 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
struct btrfs_delayed_item *delayed_item)
{
struct extent_buffer *leaf;
unsigned int nofs_flag;
char *ptr;
int ret;
nofs_flag = memalloc_nofs_save();
ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
delayed_item->data_len);
memalloc_nofs_restore(nofs_flag);
if (ret < 0 && ret != -EEXIST)
return ret;
......@@ -937,6 +939,7 @@ static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_node *node)
{
struct btrfs_delayed_item *curr, *prev;
unsigned int nofs_flag;
int ret = 0;
do_again:
......@@ -945,7 +948,9 @@ static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
if (!curr)
goto delete_fail;
nofs_flag = memalloc_nofs_save();
ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
memalloc_nofs_restore(nofs_flag);
if (ret < 0)
goto delete_fail;
else if (ret > 0) {
......@@ -1012,6 +1017,7 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
struct btrfs_key key;
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
unsigned int nofs_flag;
int mod;
int ret;
......@@ -1024,7 +1030,9 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
else
mod = 1;
nofs_flag = memalloc_nofs_save();
ret = btrfs_lookup_inode(trans, root, path, &key, mod);
memalloc_nofs_restore(nofs_flag);
if (ret > 0) {
btrfs_release_path(path);
return -ENOENT;
......@@ -1075,7 +1083,10 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
key.type = BTRFS_INODE_EXTREF_KEY;
key.offset = -1;
nofs_flag = memalloc_nofs_save();
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
memalloc_nofs_restore(nofs_flag);
if (ret < 0)
goto err_out;
ASSERT(ret);
......@@ -1139,7 +1150,7 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
int ret = 0;
bool count = (nr > 0);
if (trans->aborted)
if (TRANS_ABORTED(trans))
return -EIO;
path = btrfs_alloc_path();
......@@ -1760,6 +1771,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
int btrfs_fill_inode(struct inode *inode, u32 *rdev)
{
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct btrfs_delayed_node *delayed_node;
struct btrfs_inode_item *inode_item;
......@@ -1779,6 +1791,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item));
btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0,
round_up(i_size_read(inode), fs_info->sectorsize));
inode->i_mode = btrfs_stack_inode_mode(inode_item);
set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
......
......@@ -70,7 +70,7 @@ struct btrfs_delayed_item {
refcount_t refs;
int ins_or_del;
u32 data_len;
char data[0];
char data[];
};
static inline void btrfs_init_delayed_root(
......
......@@ -82,8 +82,7 @@ void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
u64 num_bytes = btrfs_calc_insert_metadata_size(fs_info, nr);
u64 released = 0;
released = __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes,
NULL);
released = btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
if (released)
trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
0, released, 0);
......
......@@ -22,6 +22,46 @@
#include "dev-replace.h"
#include "sysfs.h"
/*
* Device replace overview
*
* [Objective]
* To copy all extents (both new and on-disk) from source device to target
* device, while still keeping the filesystem read-write.
*
* [Method]
* There are two main methods involved:
*
* - Write duplication
*
* All new writes will be written to both target and source devices, so even
* if replace gets canceled, sources device still contans up-to-date data.
*
* Location: handle_ops_on_dev_replace() from __btrfs_map_block()
* Start: btrfs_dev_replace_start()
* End: btrfs_dev_replace_finishing()
* Content: Latest data/metadata
*
* - Copy existing extents
*
* This happens by re-using scrub facility, as scrub also iterates through
* existing extents from commit root.
*
* Location: scrub_write_block_to_dev_replace() from
* scrub_block_complete()
* Content: Data/meta from commit root.
*
* Due to the content difference, we need to avoid nocow write when dev-replace
* is happening. This is done by marking the block group read-only and waiting
* for NOCOW writes.
*
* After replace is done, the finishing part is done by swapping the target and
* source devices.
*
* Location: btrfs_dev_replace_update_device_in_mapping_tree() from
* btrfs_dev_replace_finishing()
*/
static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
int scrub_ret);
static void btrfs_dev_replace_update_device_in_mapping_tree(
......@@ -472,7 +512,7 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0);
up_write(&dev_replace->rwsem);
ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
ret = btrfs_sysfs_add_devices_dir(tgt_device->fs_devices, tgt_device);
if (ret)
btrfs_err(fs_info, "kobj add dev failed %d", ret);
......@@ -703,7 +743,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
/* replace the sysfs entry */
btrfs_sysfs_rm_device_link(fs_info->fs_devices, src_device);
btrfs_sysfs_remove_devices_dir(fs_info->fs_devices, src_device);
btrfs_sysfs_update_devid(tgt_device);
btrfs_rm_dev_replace_free_srcdev(src_device);
......
This diff is collapsed.
......@@ -39,6 +39,8 @@ static inline u64 btrfs_sb_offset(int mirror)
struct btrfs_device;
struct btrfs_fs_devices;
void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info);
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info);
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
struct btrfs_key *first_key, u64 parent_transid);
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
......@@ -54,15 +56,12 @@ int __cold open_ctree(struct super_block *sb,
char *options);
void __cold close_ctree(struct btrfs_fs_info *fs_info);
int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
struct buffer_head **bh_ret);
struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev);
struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
int copy_num);
int btrfs_commit_super(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
struct btrfs_key *location);
int btrfs_init_fs_root(struct btrfs_root *root);
struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
u64 root_id);
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
struct btrfs_key *key);
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
......@@ -70,19 +69,13 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_key *key,
bool check_ref);
static inline struct btrfs_root *
btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
struct btrfs_key *location)
{
return btrfs_get_fs_root(fs_info, location, true);
}
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
void btrfs_free_fs_root(struct btrfs_root *root);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
......@@ -95,19 +88,16 @@ struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
* If you want to ensure the whole tree is safe, you should use
* fs_info->subvol_srcu
*/
static inline struct btrfs_root *btrfs_grab_fs_root(struct btrfs_root *root)
static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
{
if (!root)
return NULL;
if (refcount_inc_not_zero(&root->refs))
return root;
return NULL;
}
static inline void btrfs_put_fs_root(struct btrfs_root *root)
{
if (refcount_dec_and_test(&root->refs))
kfree(root);
}
void btrfs_put_root(struct btrfs_root *root);
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int atomic);
......
......@@ -57,7 +57,7 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
return type;
}
static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
u64 root_objectid, u32 generation,
int check_generation)
{
......@@ -65,8 +65,6 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
struct btrfs_root *root;
struct inode *inode;
struct btrfs_key key;
int index;
int err = 0;
if (objectid < BTRFS_FIRST_FREE_OBJECTID)
return ERR_PTR(-ESTALE);
......@@ -75,25 +73,18 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
index = srcu_read_lock(&fs_info->subvol_srcu);
root = btrfs_read_fs_root_no_name(fs_info, &key);
if (IS_ERR(root)) {
err = PTR_ERR(root);
goto fail;
}
root = btrfs_get_fs_root(fs_info, &key, true);
if (IS_ERR(root))
return ERR_CAST(root);
key.objectid = objectid;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
inode = btrfs_iget(sb, &key, root);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto fail;
}
srcu_read_unlock(&fs_info->subvol_srcu, index);
btrfs_put_root(root);
if (IS_ERR(inode))
return ERR_CAST(inode);
if (check_generation && generation != inode->i_generation) {
iput(inode);
......@@ -101,9 +92,6 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
}
return d_obtain_alias(inode);
fail:
srcu_read_unlock(&fs_info->subvol_srcu, index);
return ERR_PTR(err);
}
static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
......@@ -152,7 +140,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
}
static struct dentry *btrfs_get_parent(struct dentry *child)
struct dentry *btrfs_get_parent(struct dentry *child)
{
struct inode *dir = d_inode(child);
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
......
......@@ -18,4 +18,9 @@ struct btrfs_fid {
u64 parent_root_objectid;
} __attribute__ ((packed));
struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
u64 root_objectid, u32 generation,
int check_generation);
struct dentry *btrfs_get_parent(struct dentry *child);
#endif
......@@ -36,13 +36,14 @@ struct io_failure_record;
#define CHUNK_TRIMMED EXTENT_DEFRAG
enum {
IO_TREE_FS_INFO_FREED_EXTENTS0,
IO_TREE_FS_INFO_FREED_EXTENTS1,
IO_TREE_FS_PINNED_EXTENTS,
IO_TREE_FS_EXCLUDED_EXTENTS,
IO_TREE_INODE_IO,
IO_TREE_INODE_IO_FAILURE,
IO_TREE_RELOC_BLOCKS,
IO_TREE_TRANS_DIRTY_PAGES,
IO_TREE_ROOT_DIRTY_LOG_PAGES,
IO_TREE_INODE_FILE_EXTENT,
IO_TREE_SELFTEST,
};
......@@ -222,6 +223,8 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
struct extent_state **cached_state);
void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned bits);
int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned bits);
int extent_invalidatepage(struct extent_io_tree *tree,
struct page *page, unsigned long offset);
bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
......
This diff is collapsed.
This diff is collapsed.
......@@ -189,8 +189,8 @@ typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
int try_release_extent_mapping(struct page *page, gfp_t mask);
int try_release_extent_buffer(struct page *page);
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent, int mirror_num);
int extent_read_full_page(struct page *page, get_extent_t *get_extent,
int mirror_num);
int extent_write_full_page(struct page *page, struct writeback_control *wbc);
int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
int mode);
......@@ -325,4 +325,11 @@ bool find_lock_delalloc_range(struct inode *inode,
#endif
struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start);
#ifdef CONFIG_BTRFS_DEBUG
void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info);
#else
#define btrfs_extent_buffer_leak_debug_check(fs_info) do {} while (0)
#endif
#endif
......@@ -23,6 +23,97 @@
#define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \
PAGE_SIZE))
/**
* @inode - the inode we want to update the disk_i_size for
* @new_i_size - the i_size we want to set to, 0 if we use i_size
*
* With NO_HOLES set this simply sets the disk_is_size to whatever i_size_read()
* returns as it is perfectly fine with a file that has holes without hole file
* extent items.
*
* However without NO_HOLES we need to only return the area that is contiguous
* from the 0 offset of the file. Otherwise we could end up adjust i_size up
* to an extent that has a gap in between.
*
* Finally new_i_size should only be set in the case of truncate where we're not
* ready to use i_size_read() as the limiter yet.
*/
void btrfs_inode_safe_disk_i_size_write(struct inode *inode, u64 new_i_size)
{
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
u64 start, end, i_size;
int ret;
i_size = new_i_size ?: i_size_read(inode);
if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
BTRFS_I(inode)->disk_i_size = i_size;
return;
}
spin_lock(&BTRFS_I(inode)->lock);
ret = find_contiguous_extent_bit(&BTRFS_I(inode)->file_extent_tree, 0,
&start, &end, EXTENT_DIRTY);
if (!ret && start == 0)
i_size = min(i_size, end + 1);
else
i_size = 0;
BTRFS_I(inode)->disk_i_size = i_size;
spin_unlock(&BTRFS_I(inode)->lock);
}
/**
* @inode - the inode we're modifying
* @start - the start file offset of the file extent we've inserted
* @len - the logical length of the file extent item
*
* Call when we are inserting a new file extent where there was none before.
* Does not need to call this in the case where we're replacing an existing file
* extent, however if not sure it's fine to call this multiple times.
*
* The start and len must match the file extent item, so thus must be sectorsize
* aligned.
*/
int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
u64 len)
{
if (len == 0)
return 0;
ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize));
if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES))
return 0;
return set_extent_bits(&inode->file_extent_tree, start, start + len - 1,
EXTENT_DIRTY);
}
/**
* @inode - the inode we're modifying
* @start - the start file offset of the file extent we've inserted
* @len - the logical length of the file extent item
*
* Called when we drop a file extent, for example when we truncate. Doesn't
* need to be called for cases where we're replacing a file extent, like when
* we've COWed a file extent.
*
* The start and len must match the file extent item, so thus must be sectorsize
* aligned.
*/
int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
u64 len)
{
if (len == 0)
return 0;
ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize) ||
len == (u64)-1);
if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES))
return 0;
return clear_extent_bit(&inode->file_extent_tree, start,
start + len - 1, EXTENT_DIRTY, 0, 0, NULL);
}
static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info,
u16 csum_size)
{
......@@ -949,18 +1040,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
btrfs_item_key_to_cpu(leaf, &key, slot);
extent_start = key.offset;
if (type == BTRFS_FILE_EXTENT_REG ||
type == BTRFS_FILE_EXTENT_PREALLOC) {
extent_end = extent_start +
btrfs_file_extent_num_bytes(leaf, fi);
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
size_t size;
size = btrfs_file_extent_ram_bytes(leaf, fi);
extent_end = ALIGN(extent_start + size,
fs_info->sectorsize);
}
extent_end = btrfs_file_extent_end(path);
em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
if (type == BTRFS_FILE_EXTENT_REG ||
type == BTRFS_FILE_EXTENT_PREALLOC) {
......@@ -1007,3 +1087,30 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
root->root_key.objectid);
}
}
/*
* Returns the end offset (non inclusive) of the file extent item the given path
* points to. If it points to an inline extent, the returned offset is rounded
* up to the sector size.
*/
u64 btrfs_file_extent_end(const struct btrfs_path *path)
{
const struct extent_buffer *leaf = path->nodes[0];
const int slot = path->slots[0];
struct btrfs_file_extent_item *fi;
struct btrfs_key key;
u64 end;
btrfs_item_key_to_cpu(leaf, &key, slot);
ASSERT(key.type == BTRFS_EXTENT_DATA_KEY);
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
end = btrfs_file_extent_ram_bytes(leaf, fi);
end = ALIGN(key.offset + end, leaf->fs_info->sectorsize);
} else {
end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
}
return end;
}
This diff is collapsed.
This diff is collapsed.
......@@ -1251,9 +1251,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
btrfs_free_tree_block(trans, free_space_root, free_space_root->node,
0, 1);
free_extent_buffer(free_space_root->node);
free_extent_buffer(free_space_root->commit_root);
kfree(free_space_root);
btrfs_put_root(free_space_root);
return btrfs_commit_transaction(trans);
......
......@@ -515,7 +515,7 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
trace_btrfs_space_reservation(fs_info, "ino_cache", trans->transid,
trans->bytes_reserved, 0);
btrfs_block_rsv_release(fs_info, trans->block_rsv,
trans->bytes_reserved);
trans->bytes_reserved, NULL);
out:
trans->block_rsv = rsv;
trans->bytes_reserved = num_bytes;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -6,6 +6,9 @@
#ifndef BTRFS_LOCKING_H
#define BTRFS_LOCKING_H
#include <linux/atomic.h>
#include <linux/wait.h>
#include <linux/percpu_counter.h>
#include "extent_io.h"
#define BTRFS_WRITE_LOCK 1
......@@ -13,6 +16,8 @@
#define BTRFS_WRITE_LOCK_BLOCKING 3
#define BTRFS_READ_LOCK_BLOCKING 4
struct btrfs_path;
void btrfs_tree_lock(struct extent_buffer *eb);
void btrfs_tree_unlock(struct extent_buffer *eb);
......@@ -48,4 +53,19 @@ static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
BUG();
}
struct btrfs_drew_lock {
atomic_t readers;
struct percpu_counter writers;
wait_queue_head_t pending_writers;
wait_queue_head_t pending_readers;
};
int btrfs_drew_lock_init(struct btrfs_drew_lock *lock);
void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock);
void btrfs_drew_write_lock(struct btrfs_drew_lock *lock);
bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock);
void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock);
void btrfs_drew_read_lock(struct btrfs_drew_lock *lock);
void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock);
#endif
This diff is collapsed.
......@@ -52,11 +52,6 @@ enum {
BTRFS_ORDERED_DIRECT,
/* We had an io error when writing this out */
BTRFS_ORDERED_IOERR,
/*
* indicates whether this ordered extent has done its due diligence in
* updating the isize
*/
BTRFS_ORDERED_UPDATED_ISIZE,
/* Set when we have to truncate an extent */
BTRFS_ORDERED_TRUNCATED,
/* Regular IO for COW */
......@@ -182,16 +177,13 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
struct btrfs_inode *inode,
u64 file_offset,
u64 len);
int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
u8 *sum, int len);
u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
const u64 range_start, const u64 range_len);
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
const u64 range_start, const u64 range_len);
void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree,
struct btrfs_inode *inode, u64 start,
void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
u64 end,
struct extent_state **cached_state);
int __init ordered_data_init(void);
......
......@@ -383,7 +383,7 @@ static int inherit_props(struct btrfs_trans_handle *trans,
if (need_reserve) {
btrfs_block_rsv_release(fs_info, trans->block_rsv,
num_bytes);
num_bytes, NULL);
if (ret)
return ret;
}
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment