Commit 30d6592f authored by Keith Busch's avatar Keith Busch Committed by Kamal Mostafa

NVMe: Don't unmap controller registers on reset

BugLink: http://bugs.launchpad.net/bugs/1621113

Commit b00a726a upstream.

Unmapping the registers on reset or shutdown is not necessary. Keeping
the mapping simplifies reset handling.

This was backported to 4.4 stable tree because it prevents a race
between the reset_work and the shutdown hook, that may provoke the Oops
below, in the nvme_wait_ready function.

The Oops is easily reproducible on systems that will kexec/reboot
immediately after booting, which is actually the common use case for
kexec based bootloaders, like Petitboot.  This patch removes the
unnecessary early unmapping of the PCI configuration in the shutdown
hook, allowing a proper handling of the reset work.

Unable to handle kernel paging request for data at address 0x0000001c
Faulting instruction address: 0xd000000000720b38
cpu 0x1b: Vector: 300 (Data Access) at [c000007f7a9a38a0]
    pc: d000000000720b38: nvme_wait_ready+0x50/0x120 [nvme]
    lr: d000000000720b7c: nvme_wait_ready+0x94/0x120 [nvme]
    sp: c000007f7a9a3b20
   msr: 9000000000009033
   dar: 1c
 dsisr: 40000000
  current = 0xc000007f7a926c80
  paca    = 0xc00000000fe85100   softe: 0        irq_happened: 0x01
    pid   = 2608, comm = kworker/27:1
enter ? for help
[c000007f7a9a3bb0] d00000000072572c nvme_setup_io_queues+0xc08/0x1218 [nvme]
[c000007f7a9a3c70] c00000000006bbd8 process_one_work+0x228/0x378
[c000007f7a9a3d00] c00000000006c050 worker_thread+0x2e0/0x420
[c000007f7a9a3d80] c00000000007161c kthread+0xfc/0x108
[c000007f7a9a3e30] c0000000000094b4 ret_from_kernel_thread+0x5c/0xa8
Signed-off-by: default avatarKeith Busch <keith.busch@intel.com>
Reviewed-by: default avatarJohannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
Signed-off-by: default avatarGabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
	[Backport to v4.4.y]
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
	[Backport to Ubuntu Xenial]
Signed-off-by: default avatarTim Gardner <tim.gardner@canonical.com>
Signed-off-by: default avatarKamal Mostafa <kamal@canonical.com>
parent 7a100d54
...@@ -1717,34 +1717,24 @@ static int nvme_dev_add(struct nvme_dev *dev) ...@@ -1717,34 +1717,24 @@ static int nvme_dev_add(struct nvme_dev *dev)
return 0; return 0;
} }
static int nvme_dev_map(struct nvme_dev *dev) static int nvme_pci_enable(struct nvme_dev *dev)
{ {
u64 cap; u64 cap;
int bars, result = -ENOMEM; int result = -ENOMEM;
struct pci_dev *pdev = to_pci_dev(dev->dev); struct pci_dev *pdev = to_pci_dev(dev->dev);
if (pci_enable_device_mem(pdev)) if (pci_enable_device_mem(pdev))
return result; return result;
pci_set_master(pdev); pci_set_master(pdev);
bars = pci_select_bars(pdev, IORESOURCE_MEM);
if (!bars)
goto disable_pci;
if (pci_request_selected_regions(pdev, bars, "nvme"))
goto disable_pci;
if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) && if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) &&
dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32))) dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)))
goto disable; goto disable;
dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
if (!dev->bar)
goto disable;
if (readl(dev->bar + NVME_REG_CSTS) == -1) { if (readl(dev->bar + NVME_REG_CSTS) == -1) {
result = -ENODEV; result = -ENODEV;
goto unmap; goto disable;
} }
/* /*
...@@ -1752,14 +1742,10 @@ static int nvme_dev_map(struct nvme_dev *dev) ...@@ -1752,14 +1742,10 @@ static int nvme_dev_map(struct nvme_dev *dev)
* interrupts. Pre-enable a single MSIX or MSI vec for setup. We'll * interrupts. Pre-enable a single MSIX or MSI vec for setup. We'll
* adjust this later. * adjust this later.
*/ */
if (pci_enable_msix(pdev, dev->entry, 1)) { if (!pdev->irq) {
pci_enable_msi(pdev); result = pci_enable_msix(pdev, dev->entry, 1);
dev->entry[0].vector = pdev->irq; if (result < 0)
} goto disable;
if (!dev->entry[0].vector) {
result = -ENODEV;
goto unmap;
} }
cap = lo_hi_readq(dev->bar + NVME_REG_CAP); cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
...@@ -1786,17 +1772,20 @@ static int nvme_dev_map(struct nvme_dev *dev) ...@@ -1786,17 +1772,20 @@ static int nvme_dev_map(struct nvme_dev *dev)
pci_save_state(pdev); pci_save_state(pdev);
return 0; return 0;
unmap:
iounmap(dev->bar);
dev->bar = NULL;
disable: disable:
pci_release_regions(pdev); pci_release_regions(pdev);
disable_pci:
pci_disable_device(pdev);
return result; return result;
} }
static void nvme_dev_unmap(struct nvme_dev *dev) static void nvme_dev_unmap(struct nvme_dev *dev)
{
if (dev->bar)
iounmap(dev->bar);
pci_release_regions(to_pci_dev(dev->dev));
}
static void nvme_pci_disable(struct nvme_dev *dev)
{ {
struct pci_dev *pdev = to_pci_dev(dev->dev); struct pci_dev *pdev = to_pci_dev(dev->dev);
...@@ -1805,12 +1794,6 @@ static void nvme_dev_unmap(struct nvme_dev *dev) ...@@ -1805,12 +1794,6 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
else if (pdev->msix_enabled) else if (pdev->msix_enabled)
pci_disable_msix(pdev); pci_disable_msix(pdev);
if (dev->bar) {
iounmap(dev->bar);
dev->bar = NULL;
pci_release_regions(pdev);
}
if (pci_is_enabled(pdev)) { if (pci_is_enabled(pdev)) {
pci_disable_pcie_error_reporting(pdev); pci_disable_pcie_error_reporting(pdev);
pci_disable_device(pdev); pci_disable_device(pdev);
...@@ -1825,7 +1808,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) ...@@ -1825,7 +1808,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
del_timer_sync(&dev->watchdog_timer); del_timer_sync(&dev->watchdog_timer);
mutex_lock(&dev->shutdown_lock); mutex_lock(&dev->shutdown_lock);
if (dev->bar) { if (pci_is_enabled(to_pci_dev(dev->dev))) {
nvme_stop_queues(&dev->ctrl); nvme_stop_queues(&dev->ctrl);
csts = readl(dev->bar + NVME_REG_CSTS); csts = readl(dev->bar + NVME_REG_CSTS);
} }
...@@ -1839,7 +1822,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) ...@@ -1839,7 +1822,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
nvme_disable_io_queues(dev); nvme_disable_io_queues(dev);
nvme_disable_admin_queue(dev, shutdown); nvme_disable_admin_queue(dev, shutdown);
} }
nvme_dev_unmap(dev); nvme_pci_disable(dev);
for (i = dev->queue_count - 1; i >= 0; i--) for (i = dev->queue_count - 1; i >= 0; i--)
nvme_clear_queue(dev->queues[i]); nvme_clear_queue(dev->queues[i]);
...@@ -1895,7 +1878,7 @@ static void nvme_reset_work(struct work_struct *work) ...@@ -1895,7 +1878,7 @@ static void nvme_reset_work(struct work_struct *work)
* If we're called to reset a live controller first shut it down before * If we're called to reset a live controller first shut it down before
* moving on. * moving on.
*/ */
if (dev->bar) if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
nvme_dev_disable(dev, false); nvme_dev_disable(dev, false);
if (test_bit(NVME_CTRL_REMOVING, &dev->flags)) if (test_bit(NVME_CTRL_REMOVING, &dev->flags))
...@@ -1903,7 +1886,7 @@ static void nvme_reset_work(struct work_struct *work) ...@@ -1903,7 +1886,7 @@ static void nvme_reset_work(struct work_struct *work)
set_bit(NVME_CTRL_RESETTING, &dev->flags); set_bit(NVME_CTRL_RESETTING, &dev->flags);
result = nvme_dev_map(dev); result = nvme_pci_enable(dev);
if (result) if (result)
goto out; goto out;
...@@ -2028,6 +2011,27 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { ...@@ -2028,6 +2011,27 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.free_ctrl = nvme_pci_free_ctrl, .free_ctrl = nvme_pci_free_ctrl,
}; };
static int nvme_dev_map(struct nvme_dev *dev)
{
int bars;
struct pci_dev *pdev = to_pci_dev(dev->dev);
bars = pci_select_bars(pdev, IORESOURCE_MEM);
if (!bars)
return -ENODEV;
if (pci_request_selected_regions(pdev, bars, "nvme"))
return -ENODEV;
dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
if (!dev->bar)
goto release;
return 0;
release:
pci_release_regions(pdev);
return -ENODEV;
}
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{ {
int node, result = -ENOMEM; int node, result = -ENOMEM;
...@@ -2052,6 +2056,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -2052,6 +2056,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
dev->dev = get_device(&pdev->dev); dev->dev = get_device(&pdev->dev);
pci_set_drvdata(pdev, dev); pci_set_drvdata(pdev, dev);
result = nvme_dev_map(dev);
if (result)
goto free;
INIT_WORK(&dev->scan_work, nvme_dev_scan); INIT_WORK(&dev->scan_work, nvme_dev_scan);
INIT_WORK(&dev->reset_work, nvme_reset_work); INIT_WORK(&dev->reset_work, nvme_reset_work);
INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work); INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
...@@ -2077,6 +2085,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -2077,6 +2085,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
nvme_release_prp_pools(dev); nvme_release_prp_pools(dev);
put_pci: put_pci:
put_device(dev->dev); put_device(dev->dev);
nvme_dev_unmap(dev);
free: free:
kfree(dev->queues); kfree(dev->queues);
kfree(dev->entry); kfree(dev->entry);
...@@ -2116,6 +2125,7 @@ static void nvme_remove(struct pci_dev *pdev) ...@@ -2116,6 +2125,7 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_free_queues(dev, 0); nvme_free_queues(dev, 0);
nvme_release_cmb(dev); nvme_release_cmb(dev);
nvme_release_prp_pools(dev); nvme_release_prp_pools(dev);
nvme_dev_unmap(dev);
nvme_put_ctrl(&dev->ctrl); nvme_put_ctrl(&dev->ctrl);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment