Commit eac3ef26 authored by Christoph Hellwig's avatar Christoph Hellwig

nvme-pci: split the initial probe from the rest path

nvme_reset_work is a little fragile as it needs to handle both resetting
a live controller and initializing one during probe.  Split out the initial
probe and open code it in nvme_probe and leave nvme_reset_work to just do
the live controller reset.

This fixes a recently introduced bug where nvme_dev_disable causes a NULL
pointer dereferences in blk_mq_quiesce_tagset because the tagset pointer
is not set when the reset state is entered directly from the new state.
The separate probe code can skip the reset state and probe directly and
fixes this.

To make sure the system isn't single threaded on enabling nvme
controllers, set the PROBE_PREFER_ASYNCHRONOUS flag in the device_driver
structure so that the driver core probes in parallel.

Fixes: 98d81f0d ("nvme: use blk_mq_[un]quiesce_tagset")
Reported-by: default avatarGerd Bayer <gbayer@linux.ibm.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarKeith Busch <kbusch@kernel.org>
Reviewed-by: default avatarChaitanya Kulkarni <kch@nvidia.com>
Tested-by Gerd Bayer <gbayer@linxu.ibm.com>
parent acb71e53
...@@ -2837,15 +2837,7 @@ static void nvme_reset_work(struct work_struct *work) ...@@ -2837,15 +2837,7 @@ static void nvme_reset_work(struct work_struct *work)
result = nvme_pci_enable(dev); result = nvme_pci_enable(dev);
if (result) if (result)
goto out_unlock; goto out_unlock;
nvme_start_admin_queue(&dev->ctrl);
if (!dev->ctrl.admin_q) {
result = nvme_pci_alloc_admin_tag_set(dev);
if (result)
goto out_unlock;
} else {
nvme_start_admin_queue(&dev->ctrl);
}
mutex_unlock(&dev->shutdown_lock); mutex_unlock(&dev->shutdown_lock);
/* /*
...@@ -2873,37 +2865,23 @@ static void nvme_reset_work(struct work_struct *work) ...@@ -2873,37 +2865,23 @@ static void nvme_reset_work(struct work_struct *work)
if (result) if (result)
goto out; goto out;
if (dev->ctrl.tagset) { /*
/* * Freeze and update the number of I/O queues as thos might have
* This is a controller reset and we already have a tagset. * changed. If there are no I/O queues left after this reset, keep the
* Freeze and update the number of I/O queues as thos might have * controller around but remove all namespaces.
* changed. If there are no I/O queues left after this reset, */
* keep the controller around but remove all namespaces. if (dev->online_queues > 1) {
*/ nvme_start_queues(&dev->ctrl);
if (dev->online_queues > 1) { nvme_wait_freeze(&dev->ctrl);
nvme_start_queues(&dev->ctrl); nvme_pci_update_nr_queues(dev);
nvme_wait_freeze(&dev->ctrl); nvme_dbbuf_set(dev);
nvme_pci_update_nr_queues(dev); nvme_unfreeze(&dev->ctrl);
nvme_dbbuf_set(dev);
nvme_unfreeze(&dev->ctrl);
} else {
dev_warn(dev->ctrl.device, "IO queues lost\n");
nvme_mark_namespaces_dead(&dev->ctrl);
nvme_start_queues(&dev->ctrl);
nvme_remove_namespaces(&dev->ctrl);
nvme_free_tagset(dev);
}
} else { } else {
/* dev_warn(dev->ctrl.device, "IO queues lost\n");
* First probe. Still allow the controller to show up even if nvme_mark_namespaces_dead(&dev->ctrl);
* there are no namespaces. nvme_start_queues(&dev->ctrl);
*/ nvme_remove_namespaces(&dev->ctrl);
if (dev->online_queues > 1) { nvme_free_tagset(dev);
nvme_pci_alloc_tag_set(dev);
nvme_dbbuf_set(dev);
} else {
dev_warn(dev->ctrl.device, "IO queues not created\n");
}
} }
/* /*
...@@ -3059,15 +3037,6 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) ...@@ -3059,15 +3037,6 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
return 0; return 0;
} }
static void nvme_async_probe(void *data, async_cookie_t cookie)
{
struct nvme_dev *dev = data;
flush_work(&dev->ctrl.reset_work);
flush_work(&dev->ctrl.scan_work);
nvme_put_ctrl(&dev->ctrl);
}
static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
const struct pci_device_id *id) const struct pci_device_id *id)
{ {
...@@ -3159,12 +3128,69 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -3159,12 +3128,69 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto out_release_prp_pools; goto out_release_prp_pools;
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
result = nvme_pci_enable(dev);
if (result)
goto out_release_iod_mempool;
result = nvme_pci_alloc_admin_tag_set(dev);
if (result)
goto out_disable;
/*
* Mark the controller as connecting before sending admin commands to
* allow the timeout handler to do the right thing.
*/
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
dev_warn(dev->ctrl.device,
"failed to mark controller CONNECTING\n");
result = -EBUSY;
goto out_disable;
}
result = nvme_init_ctrl_finish(&dev->ctrl, false);
if (result)
goto out_disable;
nvme_dbbuf_dma_alloc(dev);
result = nvme_setup_host_mem(dev);
if (result < 0)
goto out_disable;
result = nvme_setup_io_queues(dev);
if (result)
goto out_disable;
if (dev->online_queues > 1) {
nvme_pci_alloc_tag_set(dev);
nvme_dbbuf_set(dev);
} else {
dev_warn(dev->ctrl.device, "IO queues not created\n");
}
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
dev_warn(dev->ctrl.device,
"failed to mark controller live state\n");
result = -ENODEV;
goto out_disable;
}
pci_set_drvdata(pdev, dev); pci_set_drvdata(pdev, dev);
nvme_reset_ctrl(&dev->ctrl); nvme_start_ctrl(&dev->ctrl);
async_schedule(nvme_async_probe, dev); nvme_put_ctrl(&dev->ctrl);
return 0; return 0;
out_disable:
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
nvme_dev_disable(dev, true);
nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev);
nvme_dbbuf_dma_free(dev);
nvme_free_queues(dev, 0);
out_release_iod_mempool:
mempool_destroy(dev->iod_mempool);
out_release_prp_pools: out_release_prp_pools:
nvme_release_prp_pools(dev); nvme_release_prp_pools(dev);
out_dev_unmap: out_dev_unmap:
...@@ -3560,11 +3586,12 @@ static struct pci_driver nvme_driver = { ...@@ -3560,11 +3586,12 @@ static struct pci_driver nvme_driver = {
.probe = nvme_probe, .probe = nvme_probe,
.remove = nvme_remove, .remove = nvme_remove,
.shutdown = nvme_shutdown, .shutdown = nvme_shutdown,
#ifdef CONFIG_PM_SLEEP
.driver = { .driver = {
.pm = &nvme_dev_pm_ops, .probe_type = PROBE_PREFER_ASYNCHRONOUS,
}, #ifdef CONFIG_PM_SLEEP
.pm = &nvme_dev_pm_ops,
#endif #endif
},
.sriov_configure = pci_sriov_configure_simple, .sriov_configure = pci_sriov_configure_simple,
.err_handler = &nvme_err_handler, .err_handler = &nvme_err_handler,
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment