Commit 5b56b6ed authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe

block: refactor blkdev_get

Move more code that is only run on the outer open but not the open of
the underlying whole device when opening a partition into blkdev_get,
which leads to a much easier to follow structure.

This allows to simplify the disk and module refcounting so that one
reference is held for each open, similar to what we do with normal
file operations.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Acked-by: default avatarTejun Heo <tj@kernel.org>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Reviewed-by: default avatarHannes Reinecke <hare@suse.de>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent ec5d4514
...@@ -1407,46 +1407,12 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed); ...@@ -1407,46 +1407,12 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed);
* mutex_lock(part->bd_mutex) * mutex_lock(part->bd_mutex)
* mutex_lock_nested(whole->bd_mutex, 1) * mutex_lock_nested(whole->bd_mutex, 1)
*/ */
static int __blkdev_get(struct block_device *bdev, struct gendisk *disk,
static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, int partno, fmode_t mode)
int for_part)
{ {
struct block_device *whole = NULL, *claiming = NULL;
struct gendisk *disk;
int ret; int ret;
int partno;
bool first_open = false, unblock_events = true, need_restart;
restart:
need_restart = false;
ret = -ENXIO;
disk = bdev_get_gendisk(bdev, &partno);
if (!disk)
goto out;
if (partno) {
whole = bdget_disk(disk, 0);
if (!whole) {
ret = -ENOMEM;
goto out_put_disk;
}
}
if (!for_part && (mode & FMODE_EXCL)) {
WARN_ON_ONCE(!holder);
if (whole)
claiming = whole;
else
claiming = bdev;
ret = bd_prepare_to_claim(bdev, claiming, holder);
if (ret)
goto out_put_whole;
}
disk_block_events(disk);
mutex_lock_nested(&bdev->bd_mutex, for_part);
if (!bdev->bd_openers) { if (!bdev->bd_openers) {
first_open = true;
bdev->bd_disk = disk; bdev->bd_disk = disk;
bdev->bd_contains = bdev; bdev->bd_contains = bdev;
bdev->bd_partno = partno; bdev->bd_partno = partno;
...@@ -1458,15 +1424,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, ...@@ -1458,15 +1424,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
goto out_clear; goto out_clear;
ret = 0; ret = 0;
if (disk->fops->open) { if (disk->fops->open)
ret = disk->fops->open(bdev, mode); ret = disk->fops->open(bdev, mode);
/*
* If we lost a race with 'disk' being deleted,
* try again. See md.c
*/
if (ret == -ERESTARTSYS)
need_restart = true;
}
if (!ret) { if (!ret) {
bd_set_nr_sectors(bdev, get_capacity(disk)); bd_set_nr_sectors(bdev, get_capacity(disk));
...@@ -1486,14 +1445,23 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, ...@@ -1486,14 +1445,23 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
if (ret) if (ret)
goto out_clear; goto out_clear;
} else { } else {
BUG_ON(for_part); struct block_device *whole = bdget_disk(disk, 0);
ret = __blkdev_get(whole, mode, NULL, 1);
if (ret) mutex_lock_nested(&whole->bd_mutex, 1);
ret = __blkdev_get(whole, disk, 0, mode);
if (ret) {
mutex_unlock(&whole->bd_mutex);
bdput(whole);
goto out_clear; goto out_clear;
bdev->bd_contains = bdgrab(whole); }
whole->bd_part_count++;
mutex_unlock(&whole->bd_mutex);
bdev->bd_contains = whole;
bdev->bd_part = disk_get_part(disk, partno); bdev->bd_part = disk_get_part(disk, partno);
if (!(disk->flags & GENHD_FL_UP) || if (!(disk->flags & GENHD_FL_UP) ||
!bdev->bd_part || !bdev->bd_part->nr_sects) { !bdev->bd_part || !bdev->bd_part->nr_sects) {
__blkdev_put(whole, mode, 1);
ret = -ENXIO; ret = -ENXIO;
goto out_clear; goto out_clear;
} }
...@@ -1513,58 +1481,17 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, ...@@ -1513,58 +1481,17 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
(!ret || ret == -ENOMEDIUM)) (!ret || ret == -ENOMEDIUM))
bdev_disk_changed(bdev, ret == -ENOMEDIUM); bdev_disk_changed(bdev, ret == -ENOMEDIUM);
if (ret) if (ret)
goto out_unlock_bdev; return ret;
} }
} }
bdev->bd_openers++; bdev->bd_openers++;
if (for_part)
bdev->bd_part_count++;
if (claiming)
bd_finish_claiming(bdev, claiming, holder);
/*
* Block event polling for write claims if requested. Any write holder
* makes the write_holder state stick until all are released. This is
* good enough and tracking individual writeable reference is too
* fragile given the way @mode is used in blkdev_get/put().
*/
if (claiming && (mode & FMODE_WRITE) && !bdev->bd_write_holder &&
(disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
bdev->bd_write_holder = true;
unblock_events = false;
}
mutex_unlock(&bdev->bd_mutex);
if (unblock_events)
disk_unblock_events(disk);
/* only one opener holds refs to the module and disk */
if (!first_open)
put_disk_and_module(disk);
if (whole)
bdput(whole);
return 0; return 0;
out_clear: out_clear:
disk_put_part(bdev->bd_part); disk_put_part(bdev->bd_part);
bdev->bd_disk = NULL; bdev->bd_disk = NULL;
bdev->bd_part = NULL; bdev->bd_part = NULL;
if (bdev != bdev->bd_contains)
__blkdev_put(bdev->bd_contains, mode, 1);
bdev->bd_contains = NULL; bdev->bd_contains = NULL;
out_unlock_bdev:
if (claiming)
bd_abort_claiming(bdev, claiming, holder);
mutex_unlock(&bdev->bd_mutex);
disk_unblock_events(disk);
out_put_whole:
if (whole)
bdput(whole);
out_put_disk:
put_disk_and_module(disk);
if (need_restart)
goto restart;
out:
return ret; return ret;
} }
...@@ -1589,7 +1516,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, ...@@ -1589,7 +1516,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder,
*/ */
static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
{ {
int ret, perm = 0; struct block_device *claiming;
bool unblock_events = true;
struct gendisk *disk;
int perm = 0;
int partno;
int ret;
if (mode & FMODE_READ) if (mode & FMODE_READ)
perm |= MAY_READ; perm |= MAY_READ;
...@@ -1599,12 +1531,66 @@ static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) ...@@ -1599,12 +1531,66 @@ static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
if (ret) if (ret)
goto bdput; goto bdput;
ret =__blkdev_get(bdev, mode, holder, 0); /*
if (ret) * If we lost a race with 'disk' being deleted, try again. See md.c.
*/
retry:
ret = -ENXIO;
disk = bdev_get_gendisk(bdev, &partno);
if (!disk)
goto bdput; goto bdput;
return 0;
if (mode & FMODE_EXCL) {
WARN_ON_ONCE(!holder);
ret = -ENOMEM;
claiming = bdget_disk(disk, 0);
if (!claiming)
goto put_disk;
ret = bd_prepare_to_claim(bdev, claiming, holder);
if (ret)
goto put_claiming;
}
disk_block_events(disk);
mutex_lock(&bdev->bd_mutex);
ret =__blkdev_get(bdev, disk, partno, mode);
if (!(mode & FMODE_EXCL)) {
; /* nothing to do here */
} else if (ret) {
bd_abort_claiming(bdev, claiming, holder);
} else {
bd_finish_claiming(bdev, claiming, holder);
/*
* Block event polling for write claims if requested. Any write
* holder makes the write_holder state stick until all are
* released. This is good enough and tracking individual
* writeable reference is too fragile given the way @mode is
* used in blkdev_get/put().
*/
if ((mode & FMODE_WRITE) && !bdev->bd_write_holder &&
(disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
bdev->bd_write_holder = true;
unblock_events = false;
}
}
mutex_unlock(&bdev->bd_mutex);
if (unblock_events)
disk_unblock_events(disk);
put_claiming:
if (mode & FMODE_EXCL)
bdput(claiming);
put_disk:
if (ret)
put_disk_and_module(disk);
if (ret == -ERESTARTSYS)
goto retry;
bdput: bdput:
if (ret)
bdput(bdev); bdput(bdev);
return ret; return ret;
} }
...@@ -1753,8 +1739,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) ...@@ -1753,8 +1739,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
if (bdev_is_partition(bdev)) if (bdev_is_partition(bdev))
victim = bdev->bd_contains; victim = bdev->bd_contains;
bdev->bd_contains = NULL; bdev->bd_contains = NULL;
put_disk_and_module(disk);
} else { } else {
if (!bdev_is_partition(bdev) && disk->fops->release) if (!bdev_is_partition(bdev) && disk->fops->release)
disk->fops->release(disk, mode); disk->fops->release(disk, mode);
...@@ -1767,6 +1751,8 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) ...@@ -1767,6 +1751,8 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
void blkdev_put(struct block_device *bdev, fmode_t mode) void blkdev_put(struct block_device *bdev, fmode_t mode)
{ {
struct gendisk *disk = bdev->bd_disk;
mutex_lock(&bdev->bd_mutex); mutex_lock(&bdev->bd_mutex);
if (mode & FMODE_EXCL) { if (mode & FMODE_EXCL) {
...@@ -1795,7 +1781,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) ...@@ -1795,7 +1781,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
* unblock evpoll if it was a write holder. * unblock evpoll if it was a write holder.
*/ */
if (bdev_free && bdev->bd_write_holder) { if (bdev_free && bdev->bd_write_holder) {
disk_unblock_events(bdev->bd_disk); disk_unblock_events(disk);
bdev->bd_write_holder = false; bdev->bd_write_holder = false;
} }
} }
...@@ -1805,11 +1791,12 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) ...@@ -1805,11 +1791,12 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
* event. This is to ensure detection of media removal commanded * event. This is to ensure detection of media removal commanded
* from userland - e.g. eject(1). * from userland - e.g. eject(1).
*/ */
disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE); disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
mutex_unlock(&bdev->bd_mutex); mutex_unlock(&bdev->bd_mutex);
__blkdev_put(bdev, mode, 0); __blkdev_put(bdev, mode, 0);
put_disk_and_module(disk);
} }
EXPORT_SYMBOL(blkdev_put); EXPORT_SYMBOL(blkdev_put);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment