Commit 3b97f5a0 authored by Hannes Reinecke's avatar Hannes Reinecke Committed by Keith Busch

nvme-multipath: avoid hang on inaccessible namespaces

During repetitive namespace remapping operations on the target the
namespace might have changed between the time the initial scan
was performed, and partition scan was invoked by device_add_disk()
in nvme_mpath_set_live(). We then end up with a stuck scanning process:

[<0>] folio_wait_bit_common+0x12a/0x310
[<0>] filemap_read_folio+0x97/0xd0
[<0>] do_read_cache_folio+0x108/0x390
[<0>] read_part_sector+0x31/0xa0
[<0>] read_lba+0xc5/0x160
[<0>] efi_partition+0xd9/0x8f0
[<0>] bdev_disk_changed+0x23d/0x6d0
[<0>] blkdev_get_whole+0x78/0xc0
[<0>] bdev_open+0x2c6/0x3b0
[<0>] bdev_file_open_by_dev+0xcb/0x120
[<0>] disk_scan_partitions+0x5d/0x100
[<0>] device_add_disk+0x402/0x420
[<0>] nvme_mpath_set_live+0x4f/0x1f0 [nvme_core]
[<0>] nvme_mpath_add_disk+0x107/0x120 [nvme_core]
[<0>] nvme_alloc_ns+0xac6/0xe60 [nvme_core]
[<0>] nvme_scan_ns+0x2dd/0x3e0 [nvme_core]
[<0>] nvme_scan_work+0x1a3/0x490 [nvme_core]

This happens when we have several paths, some of which are inaccessible,
and the active paths are removed first. Then nvme_find_path() will requeue
I/O in the ns_head (as paths are present), but the requeue list is never
triggered as all remaining paths are inactive.

This patch checks for NVME_NSHEAD_DISK_LIVE in nvme_available_path(),
and requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared once
the last path has been removed to properly terminate pending I/O.
Signed-off-by: default avatarHannes Reinecke <hare@kernel.org>
Reviewed-by: default avatarSagi Grimberg <sagi@grimberg.me>
Signed-off-by: default avatarKeith Busch <kbusch@kernel.org>
parent 63bcf901
...@@ -421,6 +421,9 @@ static bool nvme_available_path(struct nvme_ns_head *head) ...@@ -421,6 +421,9 @@ static bool nvme_available_path(struct nvme_ns_head *head)
{ {
struct nvme_ns *ns; struct nvme_ns *ns;
if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
return NULL;
list_for_each_entry_rcu(ns, &head->list, siblings) { list_for_each_entry_rcu(ns, &head->list, siblings) {
if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags)) if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags))
continue; continue;
...@@ -969,11 +972,16 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) ...@@ -969,11 +972,16 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
{ {
if (!head->disk) if (!head->disk)
return; return;
kblockd_schedule_work(&head->requeue_work); if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
if (test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
nvme_cdev_del(&head->cdev, &head->cdev_device); nvme_cdev_del(&head->cdev, &head->cdev_device);
del_gendisk(head->disk); del_gendisk(head->disk);
} }
/*
* requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
* to allow multipath to fail all I/O.
*/
synchronize_srcu(&head->srcu);
kblockd_schedule_work(&head->requeue_work);
} }
void nvme_mpath_remove_disk(struct nvme_ns_head *head) void nvme_mpath_remove_disk(struct nvme_ns_head *head)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment