Commit 4669e13c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'block-5.14-2021-07-30' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - gendisk freeing fix (Christoph)

 - blk-iocost wake ordering fix (Tejun)

 - tag allocation error handling fix (John)

 - loop locking fix. While this isn't the prettiest fix in the world,
   nobody has any good alternatives for 5.14. Something to likely
   revisit for 5.15. (Tetsuo)

* tag 'block-5.14-2021-07-30' of git://git.kernel.dk/linux-block:
  block: delay freeing the gendisk
  blk-iocost: fix operation ordering in iocg_wake_fn()
  blk-mq-sched: Fix blk_mq_sched_alloc_tags() error handling
  loop: reintroduce global lock for safe loop_validate_file() traversal
parents 27eb687b 340e8457
...@@ -1440,16 +1440,17 @@ static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode, ...@@ -1440,16 +1440,17 @@ static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode,
return -1; return -1;
iocg_commit_bio(ctx->iocg, wait->bio, wait->abs_cost, cost); iocg_commit_bio(ctx->iocg, wait->bio, wait->abs_cost, cost);
wait->committed = true;
/* /*
* autoremove_wake_function() removes the wait entry only when it * autoremove_wake_function() removes the wait entry only when it
* actually changed the task state. We want the wait always * actually changed the task state. We want the wait always removed.
* removed. Remove explicitly and use default_wake_function(). * Remove explicitly and use default_wake_function(). Note that the
* order of operations is important as finish_wait() tests whether
* @wq_entry is removed without grabbing the lock.
*/ */
list_del_init(&wq_entry->entry);
wait->committed = true;
default_wake_function(wq_entry, mode, flags, key); default_wake_function(wq_entry, mode, flags, key);
list_del_init_careful(&wq_entry->entry);
return 0; return 0;
} }
......
...@@ -515,17 +515,6 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx, ...@@ -515,17 +515,6 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
percpu_ref_put(&q->q_usage_counter); percpu_ref_put(&q->q_usage_counter);
} }
static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx)
{
if (hctx->sched_tags) {
blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
blk_mq_free_rq_map(hctx->sched_tags, set->flags);
hctx->sched_tags = NULL;
}
}
static int blk_mq_sched_alloc_tags(struct request_queue *q, static int blk_mq_sched_alloc_tags(struct request_queue *q,
struct blk_mq_hw_ctx *hctx, struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx) unsigned int hctx_idx)
...@@ -539,8 +528,10 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q, ...@@ -539,8 +528,10 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
return -ENOMEM; return -ENOMEM;
ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests); ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
if (ret) if (ret) {
blk_mq_sched_free_tags(set, hctx, hctx_idx); blk_mq_free_rq_map(hctx->sched_tags, set->flags);
hctx->sched_tags = NULL;
}
return ret; return ret;
} }
......
...@@ -1079,10 +1079,9 @@ static void disk_release(struct device *dev) ...@@ -1079,10 +1079,9 @@ static void disk_release(struct device *dev)
disk_release_events(disk); disk_release_events(disk);
kfree(disk->random); kfree(disk->random);
xa_destroy(&disk->part_tbl); xa_destroy(&disk->part_tbl);
bdput(disk->part0);
if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue) if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue)
blk_put_queue(disk->queue); blk_put_queue(disk->queue);
kfree(disk); bdput(disk->part0); /* frees the disk */
} }
struct class block_class = { struct class block_class = {
.name = "block", .name = "block",
......
...@@ -88,6 +88,47 @@ ...@@ -88,6 +88,47 @@
static DEFINE_IDR(loop_index_idr); static DEFINE_IDR(loop_index_idr);
static DEFINE_MUTEX(loop_ctl_mutex); static DEFINE_MUTEX(loop_ctl_mutex);
static DEFINE_MUTEX(loop_validate_mutex);
/**
* loop_global_lock_killable() - take locks for safe loop_validate_file() test
*
* @lo: struct loop_device
* @global: true if @lo is about to bind another "struct loop_device", false otherwise
*
* Returns 0 on success, -EINTR otherwise.
*
* Since loop_validate_file() traverses on other "struct loop_device" if
* is_loop_device() is true, we need a global lock for serializing concurrent
* loop_configure()/loop_change_fd()/__loop_clr_fd() calls.
*/
static int loop_global_lock_killable(struct loop_device *lo, bool global)
{
int err;
if (global) {
err = mutex_lock_killable(&loop_validate_mutex);
if (err)
return err;
}
err = mutex_lock_killable(&lo->lo_mutex);
if (err && global)
mutex_unlock(&loop_validate_mutex);
return err;
}
/**
* loop_global_unlock() - release locks taken by loop_global_lock_killable()
*
* @lo: struct loop_device
* @global: true if @lo was about to bind another "struct loop_device", false otherwise
*/
static void loop_global_unlock(struct loop_device *lo, bool global)
{
mutex_unlock(&lo->lo_mutex);
if (global)
mutex_unlock(&loop_validate_mutex);
}
static int max_part; static int max_part;
static int part_shift; static int part_shift;
...@@ -672,13 +713,15 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) ...@@ -672,13 +713,15 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
while (is_loop_device(f)) { while (is_loop_device(f)) {
struct loop_device *l; struct loop_device *l;
lockdep_assert_held(&loop_validate_mutex);
if (f->f_mapping->host->i_rdev == bdev->bd_dev) if (f->f_mapping->host->i_rdev == bdev->bd_dev)
return -EBADF; return -EBADF;
l = I_BDEV(f->f_mapping->host)->bd_disk->private_data; l = I_BDEV(f->f_mapping->host)->bd_disk->private_data;
if (l->lo_state != Lo_bound) { if (l->lo_state != Lo_bound)
return -EINVAL; return -EINVAL;
} /* Order wrt setting lo->lo_backing_file in loop_configure(). */
rmb();
f = l->lo_backing_file; f = l->lo_backing_file;
} }
if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
...@@ -697,13 +740,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) ...@@ -697,13 +740,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
unsigned int arg) unsigned int arg)
{ {
struct file *file = NULL, *old_file; struct file *file = fget(arg);
int error; struct file *old_file;
bool partscan; int error;
bool partscan;
bool is_loop;
error = mutex_lock_killable(&lo->lo_mutex); if (!file)
return -EBADF;
is_loop = is_loop_device(file);
error = loop_global_lock_killable(lo, is_loop);
if (error) if (error)
return error; goto out_putf;
error = -ENXIO; error = -ENXIO;
if (lo->lo_state != Lo_bound) if (lo->lo_state != Lo_bound)
goto out_err; goto out_err;
...@@ -713,11 +761,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, ...@@ -713,11 +761,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
goto out_err; goto out_err;
error = -EBADF;
file = fget(arg);
if (!file)
goto out_err;
error = loop_validate_file(file, bdev); error = loop_validate_file(file, bdev);
if (error) if (error)
goto out_err; goto out_err;
...@@ -740,7 +783,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, ...@@ -740,7 +783,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
loop_update_dio(lo); loop_update_dio(lo);
blk_mq_unfreeze_queue(lo->lo_queue); blk_mq_unfreeze_queue(lo->lo_queue);
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
mutex_unlock(&lo->lo_mutex); loop_global_unlock(lo, is_loop);
/*
* Flush loop_validate_file() before fput(), for l->lo_backing_file
* might be pointing at old_file which might be the last reference.
*/
if (!is_loop) {
mutex_lock(&loop_validate_mutex);
mutex_unlock(&loop_validate_mutex);
}
/* /*
* We must drop file reference outside of lo_mutex as dropping * We must drop file reference outside of lo_mutex as dropping
* the file ref can take open_mutex which creates circular locking * the file ref can take open_mutex which creates circular locking
...@@ -752,9 +804,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, ...@@ -752,9 +804,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
return 0; return 0;
out_err: out_err:
mutex_unlock(&lo->lo_mutex); loop_global_unlock(lo, is_loop);
if (file) out_putf:
fput(file); fput(file);
return error; return error;
} }
...@@ -1136,22 +1188,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, ...@@ -1136,22 +1188,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
struct block_device *bdev, struct block_device *bdev,
const struct loop_config *config) const struct loop_config *config)
{ {
struct file *file; struct file *file = fget(config->fd);
struct inode *inode; struct inode *inode;
struct address_space *mapping; struct address_space *mapping;
int error; int error;
loff_t size; loff_t size;
bool partscan; bool partscan;
unsigned short bsize; unsigned short bsize;
bool is_loop;
if (!file)
return -EBADF;
is_loop = is_loop_device(file);
/* This is safe, since we have a reference from open(). */ /* This is safe, since we have a reference from open(). */
__module_get(THIS_MODULE); __module_get(THIS_MODULE);
error = -EBADF;
file = fget(config->fd);
if (!file)
goto out;
/* /*
* If we don't hold exclusive handle for the device, upgrade to it * If we don't hold exclusive handle for the device, upgrade to it
* here to avoid changing device under exclusive owner. * here to avoid changing device under exclusive owner.
...@@ -1162,7 +1214,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, ...@@ -1162,7 +1214,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
goto out_putf; goto out_putf;
} }
error = mutex_lock_killable(&lo->lo_mutex); error = loop_global_lock_killable(lo, is_loop);
if (error) if (error)
goto out_bdev; goto out_bdev;
...@@ -1242,6 +1294,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, ...@@ -1242,6 +1294,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
size = get_loop_size(lo, file); size = get_loop_size(lo, file);
loop_set_size(lo, size); loop_set_size(lo, size);
/* Order wrt reading lo_state in loop_validate_file(). */
wmb();
lo->lo_state = Lo_bound; lo->lo_state = Lo_bound;
if (part_shift) if (part_shift)
lo->lo_flags |= LO_FLAGS_PARTSCAN; lo->lo_flags |= LO_FLAGS_PARTSCAN;
...@@ -1253,7 +1308,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, ...@@ -1253,7 +1308,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
* put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
*/ */
bdgrab(bdev); bdgrab(bdev);
mutex_unlock(&lo->lo_mutex); loop_global_unlock(lo, is_loop);
if (partscan) if (partscan)
loop_reread_partitions(lo); loop_reread_partitions(lo);
if (!(mode & FMODE_EXCL)) if (!(mode & FMODE_EXCL))
...@@ -1261,13 +1316,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, ...@@ -1261,13 +1316,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
return 0; return 0;
out_unlock: out_unlock:
mutex_unlock(&lo->lo_mutex); loop_global_unlock(lo, is_loop);
out_bdev: out_bdev:
if (!(mode & FMODE_EXCL)) if (!(mode & FMODE_EXCL))
bd_abort_claiming(bdev, loop_configure); bd_abort_claiming(bdev, loop_configure);
out_putf: out_putf:
fput(file); fput(file);
out:
/* This is safe: open() is still holding a reference. */ /* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE); module_put(THIS_MODULE);
return error; return error;
...@@ -1283,6 +1337,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) ...@@ -1283,6 +1337,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
int lo_number; int lo_number;
struct loop_worker *pos, *worker; struct loop_worker *pos, *worker;
/*
* Flush loop_configure() and loop_change_fd(). It is acceptable for
* loop_validate_file() to succeed, for actual clear operation has not
* started yet.
*/
mutex_lock(&loop_validate_mutex);
mutex_unlock(&loop_validate_mutex);
/*
* loop_validate_file() now fails because l->lo_state != Lo_bound
* became visible.
*/
mutex_lock(&lo->lo_mutex); mutex_lock(&lo->lo_mutex);
if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) { if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
err = -ENXIO; err = -ENXIO;
......
...@@ -812,6 +812,8 @@ static void bdev_free_inode(struct inode *inode) ...@@ -812,6 +812,8 @@ static void bdev_free_inode(struct inode *inode)
free_percpu(bdev->bd_stats); free_percpu(bdev->bd_stats);
kfree(bdev->bd_meta_info); kfree(bdev->bd_meta_info);
if (!bdev_is_partition(bdev))
kfree(bdev->bd_disk);
kmem_cache_free(bdev_cachep, BDEV_I(inode)); kmem_cache_free(bdev_cachep, BDEV_I(inode));
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment