Commit ce4c854e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.18-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - prevent deleting subvolume with active swapfile

 - fix qgroup reserve limit calculation overflow

 - remove device count in superblock and its item in one transaction so
   they cant't get out of sync

 - skip defragmenting an isolated sector, this could cause some extra IO

 - unify handling of mtime/permissions in hole punch with fallocate

 - zoned mode fixes:
     - remove assert checking for only single mode, we have the
       DUP mode implemented
     - fix potential lockdep warning while traversing devices
       when checking for zone activation

* tag 'for-5.18-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: prevent subvol with swapfile from being deleted
  btrfs: do not warn for free space inode in cow_file_range
  btrfs: avoid defragging extents whose next extents are not targets
  btrfs: fix fallocate to use file_modified to update permissions consistently
  btrfs: remove device item and update super block in the same transaction
  btrfs: fix qgroup reserve overflow the qgroup limit
  btrfs: zoned: remove left over ASSERT checking for single profile
  btrfs: zoned: traverse devices under chunk_mutex in btrfs_can_activate_zone
parents 31231092 60021bd7
...@@ -118,7 +118,7 @@ struct btrfs_bio_ctrl { ...@@ -118,7 +118,7 @@ struct btrfs_bio_ctrl {
*/ */
struct extent_changeset { struct extent_changeset {
/* How many bytes are set/cleared in this operation */ /* How many bytes are set/cleared in this operation */
unsigned int bytes_changed; u64 bytes_changed;
/* Changed ranges */ /* Changed ranges */
struct ulist range_changed; struct ulist range_changed;
......
...@@ -2957,8 +2957,9 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode, ...@@ -2957,8 +2957,9 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
return ret; return ret;
} }
static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
{ {
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_state *cached_state = NULL; struct extent_state *cached_state = NULL;
...@@ -2990,6 +2991,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) ...@@ -2990,6 +2991,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
goto out_only_mutex; goto out_only_mutex;
} }
ret = file_modified(file);
if (ret)
goto out_only_mutex;
lockstart = round_up(offset, btrfs_inode_sectorsize(BTRFS_I(inode))); lockstart = round_up(offset, btrfs_inode_sectorsize(BTRFS_I(inode)));
lockend = round_down(offset + len, lockend = round_down(offset + len,
btrfs_inode_sectorsize(BTRFS_I(inode))) - 1; btrfs_inode_sectorsize(BTRFS_I(inode))) - 1;
...@@ -3430,7 +3435,7 @@ static long btrfs_fallocate(struct file *file, int mode, ...@@ -3430,7 +3435,7 @@ static long btrfs_fallocate(struct file *file, int mode,
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (mode & FALLOC_FL_PUNCH_HOLE) if (mode & FALLOC_FL_PUNCH_HOLE)
return btrfs_punch_hole(inode, offset, len); return btrfs_punch_hole(file, offset, len);
/* /*
* Only trigger disk allocation, don't trigger qgroup reserve * Only trigger disk allocation, don't trigger qgroup reserve
...@@ -3452,6 +3457,10 @@ static long btrfs_fallocate(struct file *file, int mode, ...@@ -3452,6 +3457,10 @@ static long btrfs_fallocate(struct file *file, int mode,
goto out; goto out;
} }
ret = file_modified(file);
if (ret)
goto out;
/* /*
* TODO: Move these two operations after we have checked * TODO: Move these two operations after we have checked
* accurate reserved space, or fallocate can still fail but * accurate reserved space, or fallocate can still fail but
......
...@@ -1128,7 +1128,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode, ...@@ -1128,7 +1128,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
int ret = 0; int ret = 0;
if (btrfs_is_free_space_inode(inode)) { if (btrfs_is_free_space_inode(inode)) {
WARN_ON_ONCE(1);
ret = -EINVAL; ret = -EINVAL;
goto out_unlock; goto out_unlock;
} }
...@@ -4488,6 +4487,13 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) ...@@ -4488,6 +4487,13 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
dest->root_key.objectid); dest->root_key.objectid);
return -EPERM; return -EPERM;
} }
if (atomic_read(&dest->nr_swapfiles)) {
spin_unlock(&dest->root_item_lock);
btrfs_warn(fs_info,
"attempt to delete subvolume %llu with active swapfile",
root->root_key.objectid);
return -EPERM;
}
root_flags = btrfs_root_flags(&dest->root_item); root_flags = btrfs_root_flags(&dest->root_item);
btrfs_set_root_flags(&dest->root_item, btrfs_set_root_flags(&dest->root_item,
root_flags | BTRFS_ROOT_SUBVOL_DEAD); root_flags | BTRFS_ROOT_SUBVOL_DEAD);
...@@ -11107,8 +11113,23 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, ...@@ -11107,8 +11113,23 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
* set. We use this counter to prevent snapshots. We must increment it * set. We use this counter to prevent snapshots. We must increment it
* before walking the extents because we don't want a concurrent * before walking the extents because we don't want a concurrent
* snapshot to run after we've already checked the extents. * snapshot to run after we've already checked the extents.
*
* It is possible that subvolume is marked for deletion but still not
* removed yet. To prevent this race, we check the root status before
* activating the swapfile.
*/ */
spin_lock(&root->root_item_lock);
if (btrfs_root_dead(root)) {
spin_unlock(&root->root_item_lock);
btrfs_exclop_finish(fs_info);
btrfs_warn(fs_info,
"cannot activate swapfile because subvolume %llu is being deleted",
root->root_key.objectid);
return -EPERM;
}
atomic_inc(&root->nr_swapfiles); atomic_inc(&root->nr_swapfiles);
spin_unlock(&root->root_item_lock);
isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize); isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
......
...@@ -1239,7 +1239,7 @@ static u32 get_extent_max_capacity(const struct extent_map *em) ...@@ -1239,7 +1239,7 @@ static u32 get_extent_max_capacity(const struct extent_map *em)
} }
static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em, static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
bool locked) u32 extent_thresh, u64 newer_than, bool locked)
{ {
struct extent_map *next; struct extent_map *next;
bool ret = false; bool ret = false;
...@@ -1249,11 +1249,12 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em, ...@@ -1249,11 +1249,12 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
return false; return false;
/* /*
* We want to check if the next extent can be merged with the current * Here we need to pass @newer_then when checking the next extent, or
* one, which can be an extent created in a past generation, so we pass * we will hit a case we mark current extent for defrag, but the next
* a minimum generation of 0 to defrag_lookup_extent(). * one will not be a target.
* This will just cause extra IO without really reducing the fragments.
*/ */
next = defrag_lookup_extent(inode, em->start + em->len, 0, locked); next = defrag_lookup_extent(inode, em->start + em->len, newer_than, locked);
/* No more em or hole */ /* No more em or hole */
if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
goto out; goto out;
...@@ -1265,6 +1266,13 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em, ...@@ -1265,6 +1266,13 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
*/ */
if (next->len >= get_extent_max_capacity(em)) if (next->len >= get_extent_max_capacity(em))
goto out; goto out;
/* Skip older extent */
if (next->generation < newer_than)
goto out;
/* Also check extent size */
if (next->len >= extent_thresh)
goto out;
ret = true; ret = true;
out: out:
free_extent_map(next); free_extent_map(next);
...@@ -1470,7 +1478,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode, ...@@ -1470,7 +1478,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
goto next; goto next;
next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em, next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em,
locked); extent_thresh, newer_than, locked);
if (!next_mergeable) { if (!next_mergeable) {
struct defrag_target_range *last; struct defrag_target_range *last;
......
...@@ -1896,23 +1896,18 @@ static void update_dev_time(const char *device_path) ...@@ -1896,23 +1896,18 @@ static void update_dev_time(const char *device_path)
path_put(&path); path_put(&path);
} }
static int btrfs_rm_dev_item(struct btrfs_device *device) static int btrfs_rm_dev_item(struct btrfs_trans_handle *trans,
struct btrfs_device *device)
{ {
struct btrfs_root *root = device->fs_info->chunk_root; struct btrfs_root *root = device->fs_info->chunk_root;
int ret; int ret;
struct btrfs_path *path; struct btrfs_path *path;
struct btrfs_key key; struct btrfs_key key;
struct btrfs_trans_handle *trans;
path = btrfs_alloc_path(); path = btrfs_alloc_path();
if (!path) if (!path)
return -ENOMEM; return -ENOMEM;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
btrfs_free_path(path);
return PTR_ERR(trans);
}
key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
key.type = BTRFS_DEV_ITEM_KEY; key.type = BTRFS_DEV_ITEM_KEY;
key.offset = device->devid; key.offset = device->devid;
...@@ -1923,21 +1918,12 @@ static int btrfs_rm_dev_item(struct btrfs_device *device) ...@@ -1923,21 +1918,12 @@ static int btrfs_rm_dev_item(struct btrfs_device *device)
if (ret) { if (ret) {
if (ret > 0) if (ret > 0)
ret = -ENOENT; ret = -ENOENT;
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
goto out; goto out;
} }
ret = btrfs_del_item(trans, root, path); ret = btrfs_del_item(trans, root, path);
if (ret) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
}
out: out:
btrfs_free_path(path); btrfs_free_path(path);
if (!ret)
ret = btrfs_commit_transaction(trans);
return ret; return ret;
} }
...@@ -2078,6 +2064,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, ...@@ -2078,6 +2064,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
struct btrfs_dev_lookup_args *args, struct btrfs_dev_lookup_args *args,
struct block_device **bdev, fmode_t *mode) struct block_device **bdev, fmode_t *mode)
{ {
struct btrfs_trans_handle *trans;
struct btrfs_device *device; struct btrfs_device *device;
struct btrfs_fs_devices *cur_devices; struct btrfs_fs_devices *cur_devices;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
...@@ -2098,7 +2085,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, ...@@ -2098,7 +2085,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1); ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
if (ret) if (ret)
goto out; return ret;
device = btrfs_find_device(fs_info->fs_devices, args); device = btrfs_find_device(fs_info->fs_devices, args);
if (!device) { if (!device) {
...@@ -2106,27 +2093,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, ...@@ -2106,27 +2093,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND; ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
else else
ret = -ENOENT; ret = -ENOENT;
goto out; return ret;
} }
if (btrfs_pinned_by_swapfile(fs_info, device)) { if (btrfs_pinned_by_swapfile(fs_info, device)) {
btrfs_warn_in_rcu(fs_info, btrfs_warn_in_rcu(fs_info,
"cannot remove device %s (devid %llu) due to active swapfile", "cannot remove device %s (devid %llu) due to active swapfile",
rcu_str_deref(device->name), device->devid); rcu_str_deref(device->name), device->devid);
ret = -ETXTBSY; return -ETXTBSY;
goto out;
} }
if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
ret = BTRFS_ERROR_DEV_TGT_REPLACE; return BTRFS_ERROR_DEV_TGT_REPLACE;
goto out;
}
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
fs_info->fs_devices->rw_devices == 1) { fs_info->fs_devices->rw_devices == 1)
ret = BTRFS_ERROR_DEV_ONLY_WRITABLE; return BTRFS_ERROR_DEV_ONLY_WRITABLE;
goto out;
}
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
mutex_lock(&fs_info->chunk_mutex); mutex_lock(&fs_info->chunk_mutex);
...@@ -2139,14 +2121,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, ...@@ -2139,14 +2121,22 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
if (ret) if (ret)
goto error_undo; goto error_undo;
/* trans = btrfs_start_transaction(fs_info->chunk_root, 0);
* TODO: the superblock still includes this device in its num_devices if (IS_ERR(trans)) {
* counter although write_all_supers() is not locked out. This ret = PTR_ERR(trans);
* could give a filesystem state which requires a degraded mount.
*/
ret = btrfs_rm_dev_item(device);
if (ret)
goto error_undo; goto error_undo;
}
ret = btrfs_rm_dev_item(trans, device);
if (ret) {
/* Any error in dev item removal is critical */
btrfs_crit(fs_info,
"failed to remove device item for devid %llu: %d",
device->devid, ret);
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
}
clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
btrfs_scrub_cancel_dev(device); btrfs_scrub_cancel_dev(device);
...@@ -2229,7 +2219,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, ...@@ -2229,7 +2219,8 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
free_fs_devices(cur_devices); free_fs_devices(cur_devices);
} }
out: ret = btrfs_commit_transaction(trans);
return ret; return ret;
error_undo: error_undo:
...@@ -2240,7 +2231,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, ...@@ -2240,7 +2231,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
device->fs_devices->rw_devices++; device->fs_devices->rw_devices++;
mutex_unlock(&fs_info->chunk_mutex); mutex_unlock(&fs_info->chunk_mutex);
} }
goto out; return ret;
} }
void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev) void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
......
...@@ -1801,7 +1801,6 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info, ...@@ -1801,7 +1801,6 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
map = em->map_lookup; map = em->map_lookup;
/* We only support single profile for now */ /* We only support single profile for now */
ASSERT(map->num_stripes == 1);
device = map->stripes[0].dev; device = map->stripes[0].dev;
free_extent_map(em); free_extent_map(em);
...@@ -1976,18 +1975,16 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group) ...@@ -1976,18 +1975,16 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags) bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
{ {
struct btrfs_fs_info *fs_info = fs_devices->fs_info;
struct btrfs_device *device; struct btrfs_device *device;
bool ret = false; bool ret = false;
if (!btrfs_is_zoned(fs_devices->fs_info)) if (!btrfs_is_zoned(fs_info))
return true; return true;
/* Non-single profiles are not supported yet */
ASSERT((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0);
/* Check if there is a device with active zones left */ /* Check if there is a device with active zones left */
mutex_lock(&fs_devices->device_list_mutex); mutex_lock(&fs_info->chunk_mutex);
list_for_each_entry(device, &fs_devices->devices, dev_list) { list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
struct btrfs_zoned_device_info *zinfo = device->zone_info; struct btrfs_zoned_device_info *zinfo = device->zone_info;
if (!device->bdev) if (!device->bdev)
...@@ -1999,7 +1996,7 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags) ...@@ -1999,7 +1996,7 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
break; break;
} }
} }
mutex_unlock(&fs_devices->device_list_mutex); mutex_unlock(&fs_info->chunk_mutex);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment