Commit b55f0097 authored by Jens Axboe's avatar Jens Axboe

Merge branch 'nvme-5.4' of git://git.infradead.org/nvme into for-linus

Pull NVMe updates from Keith:

"This is a collection of bug fixes committed since the previous pull
 request that address deadlocks, double resets, memory leaks, and other
 regression."

* 'nvme-5.4' of git://git.infradead.org/nvme:
  nvme-pci: Set the prp2 correctly when using more than 4k page
  nvme-tcp: fix possible leakage during error flow
  nvmet-loop: fix possible leakage during error flow
  nvme-tcp: Initialize sk->sk_ll_usec only with NET_RX_BUSY_POLL
  nvme: Wait for reset state when required
  nvme: Prevent resets during paused controller state
  nvme: Restart request timers in resetting state
  nvme: Remove ADMIN_ONLY state
  nvme-pci: Free tagset if no IO queues
  nvme: retain split access workaround for capability reads
  nvme: fix possible deadlock when nvme_update_formats fails
parents 8b07a65a a4f40484
...@@ -116,10 +116,26 @@ static void nvme_queue_scan(struct nvme_ctrl *ctrl) ...@@ -116,10 +116,26 @@ static void nvme_queue_scan(struct nvme_ctrl *ctrl)
/* /*
* Only new queue scan work when admin and IO queues are both alive * Only new queue scan work when admin and IO queues are both alive
*/ */
if (ctrl->state == NVME_CTRL_LIVE) if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset)
queue_work(nvme_wq, &ctrl->scan_work); queue_work(nvme_wq, &ctrl->scan_work);
} }
/*
* Use this function to proceed with scheduling reset_work for a controller
* that had previously been set to the resetting state. This is intended for
* code paths that can't be interrupted by other reset attempts. A hot removal
* may prevent this from succeeding.
*/
int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
{
if (ctrl->state != NVME_CTRL_RESETTING)
return -EBUSY;
if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
return -EBUSY;
return 0;
}
EXPORT_SYMBOL_GPL(nvme_try_sched_reset);
int nvme_reset_ctrl(struct nvme_ctrl *ctrl) int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
{ {
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
...@@ -137,8 +153,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) ...@@ -137,8 +153,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
ret = nvme_reset_ctrl(ctrl); ret = nvme_reset_ctrl(ctrl);
if (!ret) { if (!ret) {
flush_work(&ctrl->reset_work); flush_work(&ctrl->reset_work);
if (ctrl->state != NVME_CTRL_LIVE && if (ctrl->state != NVME_CTRL_LIVE)
ctrl->state != NVME_CTRL_ADMIN_ONLY)
ret = -ENETRESET; ret = -ENETRESET;
} }
...@@ -315,15 +330,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, ...@@ -315,15 +330,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
old_state = ctrl->state; old_state = ctrl->state;
switch (new_state) { switch (new_state) {
case NVME_CTRL_ADMIN_ONLY:
switch (old_state) {
case NVME_CTRL_CONNECTING:
changed = true;
/* FALLTHRU */
default:
break;
}
break;
case NVME_CTRL_LIVE: case NVME_CTRL_LIVE:
switch (old_state) { switch (old_state) {
case NVME_CTRL_NEW: case NVME_CTRL_NEW:
...@@ -339,7 +345,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, ...@@ -339,7 +345,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
switch (old_state) { switch (old_state) {
case NVME_CTRL_NEW: case NVME_CTRL_NEW:
case NVME_CTRL_LIVE: case NVME_CTRL_LIVE:
case NVME_CTRL_ADMIN_ONLY:
changed = true; changed = true;
/* FALLTHRU */ /* FALLTHRU */
default: default:
...@@ -359,7 +364,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, ...@@ -359,7 +364,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
case NVME_CTRL_DELETING: case NVME_CTRL_DELETING:
switch (old_state) { switch (old_state) {
case NVME_CTRL_LIVE: case NVME_CTRL_LIVE:
case NVME_CTRL_ADMIN_ONLY:
case NVME_CTRL_RESETTING: case NVME_CTRL_RESETTING:
case NVME_CTRL_CONNECTING: case NVME_CTRL_CONNECTING:
changed = true; changed = true;
...@@ -381,8 +385,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, ...@@ -381,8 +385,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
break; break;
} }
if (changed) if (changed) {
ctrl->state = new_state; ctrl->state = new_state;
wake_up_all(&ctrl->state_wq);
}
spin_unlock_irqrestore(&ctrl->lock, flags); spin_unlock_irqrestore(&ctrl->lock, flags);
if (changed && ctrl->state == NVME_CTRL_LIVE) if (changed && ctrl->state == NVME_CTRL_LIVE)
...@@ -391,6 +397,39 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, ...@@ -391,6 +397,39 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
} }
EXPORT_SYMBOL_GPL(nvme_change_ctrl_state); EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
/*
* Returns true for sink states that can't ever transition back to live.
*/
static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
{
switch (ctrl->state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
case NVME_CTRL_RESETTING:
case NVME_CTRL_CONNECTING:
return false;
case NVME_CTRL_DELETING:
case NVME_CTRL_DEAD:
return true;
default:
WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state);
return true;
}
}
/*
* Waits for the controller state to be resetting, or returns false if it is
* not possible to ever transition to that state.
*/
bool nvme_wait_reset(struct nvme_ctrl *ctrl)
{
wait_event(ctrl->state_wq,
nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
nvme_state_terminal(ctrl));
return ctrl->state == NVME_CTRL_RESETTING;
}
EXPORT_SYMBOL_GPL(nvme_wait_reset);
static void nvme_free_ns_head(struct kref *ref) static void nvme_free_ns_head(struct kref *ref)
{ {
struct nvme_ns_head *head = struct nvme_ns_head *head =
...@@ -1306,8 +1345,6 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl) ...@@ -1306,8 +1345,6 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl)
if (ns->disk && nvme_revalidate_disk(ns->disk)) if (ns->disk && nvme_revalidate_disk(ns->disk))
nvme_set_queue_dying(ns); nvme_set_queue_dying(ns);
up_read(&ctrl->namespaces_rwsem); up_read(&ctrl->namespaces_rwsem);
nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
} }
static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
...@@ -1323,6 +1360,7 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) ...@@ -1323,6 +1360,7 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
nvme_unfreeze(ctrl); nvme_unfreeze(ctrl);
nvme_mpath_unfreeze(ctrl->subsys); nvme_mpath_unfreeze(ctrl->subsys);
mutex_unlock(&ctrl->subsys->lock); mutex_unlock(&ctrl->subsys->lock);
nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
mutex_unlock(&ctrl->scan_lock); mutex_unlock(&ctrl->scan_lock);
} }
if (effects & NVME_CMD_EFFECTS_CCC) if (effects & NVME_CMD_EFFECTS_CCC)
...@@ -2874,7 +2912,6 @@ static int nvme_dev_open(struct inode *inode, struct file *file) ...@@ -2874,7 +2912,6 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
switch (ctrl->state) { switch (ctrl->state) {
case NVME_CTRL_LIVE: case NVME_CTRL_LIVE:
case NVME_CTRL_ADMIN_ONLY:
break; break;
default: default:
return -EWOULDBLOCK; return -EWOULDBLOCK;
...@@ -3168,7 +3205,6 @@ static ssize_t nvme_sysfs_show_state(struct device *dev, ...@@ -3168,7 +3205,6 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
static const char *const state_name[] = { static const char *const state_name[] = {
[NVME_CTRL_NEW] = "new", [NVME_CTRL_NEW] = "new",
[NVME_CTRL_LIVE] = "live", [NVME_CTRL_LIVE] = "live",
[NVME_CTRL_ADMIN_ONLY] = "only-admin",
[NVME_CTRL_RESETTING] = "resetting", [NVME_CTRL_RESETTING] = "resetting",
[NVME_CTRL_CONNECTING] = "connecting", [NVME_CTRL_CONNECTING] = "connecting",
[NVME_CTRL_DELETING] = "deleting", [NVME_CTRL_DELETING] = "deleting",
...@@ -3679,11 +3715,10 @@ static void nvme_scan_work(struct work_struct *work) ...@@ -3679,11 +3715,10 @@ static void nvme_scan_work(struct work_struct *work)
struct nvme_id_ctrl *id; struct nvme_id_ctrl *id;
unsigned nn; unsigned nn;
if (ctrl->state != NVME_CTRL_LIVE) /* No tagset on a live ctrl means IO queues could not created */
if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
return; return;
WARN_ON_ONCE(!ctrl->tagset);
if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) { if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
dev_info(ctrl->device, "rescanning namespaces.\n"); dev_info(ctrl->device, "rescanning namespaces.\n");
nvme_clear_changed_ns_log(ctrl); nvme_clear_changed_ns_log(ctrl);
...@@ -3844,13 +3879,13 @@ static void nvme_fw_act_work(struct work_struct *work) ...@@ -3844,13 +3879,13 @@ static void nvme_fw_act_work(struct work_struct *work)
if (time_after(jiffies, fw_act_timeout)) { if (time_after(jiffies, fw_act_timeout)) {
dev_warn(ctrl->device, dev_warn(ctrl->device,
"Fw activation timeout, reset controller\n"); "Fw activation timeout, reset controller\n");
nvme_reset_ctrl(ctrl); nvme_try_sched_reset(ctrl);
break; return;
} }
msleep(100); msleep(100);
} }
if (ctrl->state != NVME_CTRL_LIVE) if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
return; return;
nvme_start_queues(ctrl); nvme_start_queues(ctrl);
...@@ -3870,7 +3905,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) ...@@ -3870,7 +3905,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
nvme_queue_scan(ctrl); nvme_queue_scan(ctrl);
break; break;
case NVME_AER_NOTICE_FW_ACT_STARTING: case NVME_AER_NOTICE_FW_ACT_STARTING:
queue_work(nvme_wq, &ctrl->fw_act_work); /*
* We are (ab)using the RESETTING state to prevent subsequent
* recovery actions from interfering with the controller's
* firmware activation.
*/
if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
queue_work(nvme_wq, &ctrl->fw_act_work);
break; break;
#ifdef CONFIG_NVME_MULTIPATH #ifdef CONFIG_NVME_MULTIPATH
case NVME_AER_NOTICE_ANA: case NVME_AER_NOTICE_ANA:
...@@ -3993,6 +4034,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ...@@ -3993,6 +4034,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work); INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work); INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work); INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
init_waitqueue_head(&ctrl->state_wq);
INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work); INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd)); memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
......
...@@ -182,8 +182,7 @@ bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, ...@@ -182,8 +182,7 @@ bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live) bool queue_live)
{ {
if (likely(ctrl->state == NVME_CTRL_LIVE || if (likely(ctrl->state == NVME_CTRL_LIVE))
ctrl->state == NVME_CTRL_ADMIN_ONLY))
return true; return true;
return __nvmf_check_ready(ctrl, rq, queue_live); return __nvmf_check_ready(ctrl, rq, queue_live);
} }
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <linux/sed-opal.h> #include <linux/sed-opal.h>
#include <linux/fault-inject.h> #include <linux/fault-inject.h>
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
#include <linux/wait.h>
#include <trace/events/block.h> #include <trace/events/block.h>
...@@ -161,7 +162,6 @@ static inline u16 nvme_req_qid(struct request *req) ...@@ -161,7 +162,6 @@ static inline u16 nvme_req_qid(struct request *req)
enum nvme_ctrl_state { enum nvme_ctrl_state {
NVME_CTRL_NEW, NVME_CTRL_NEW,
NVME_CTRL_LIVE, NVME_CTRL_LIVE,
NVME_CTRL_ADMIN_ONLY, /* Only admin queue live */
NVME_CTRL_RESETTING, NVME_CTRL_RESETTING,
NVME_CTRL_CONNECTING, NVME_CTRL_CONNECTING,
NVME_CTRL_DELETING, NVME_CTRL_DELETING,
...@@ -199,6 +199,7 @@ struct nvme_ctrl { ...@@ -199,6 +199,7 @@ struct nvme_ctrl {
struct cdev cdev; struct cdev cdev;
struct work_struct reset_work; struct work_struct reset_work;
struct work_struct delete_work; struct work_struct delete_work;
wait_queue_head_t state_wq;
struct nvme_subsystem *subsys; struct nvme_subsystem *subsys;
struct list_head subsys_entry; struct list_head subsys_entry;
...@@ -449,6 +450,7 @@ void nvme_complete_rq(struct request *req); ...@@ -449,6 +450,7 @@ void nvme_complete_rq(struct request *req);
bool nvme_cancel_request(struct request *req, void *data, bool reserved); bool nvme_cancel_request(struct request *req, void *data, bool reserved);
bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
enum nvme_ctrl_state new_state); enum nvme_ctrl_state new_state);
bool nvme_wait_reset(struct nvme_ctrl *ctrl);
int nvme_disable_ctrl(struct nvme_ctrl *ctrl); int nvme_disable_ctrl(struct nvme_ctrl *ctrl);
int nvme_enable_ctrl(struct nvme_ctrl *ctrl); int nvme_enable_ctrl(struct nvme_ctrl *ctrl);
int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl); int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
...@@ -499,6 +501,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); ...@@ -499,6 +501,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
......
...@@ -773,7 +773,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, ...@@ -773,7 +773,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev,
struct bio_vec *bv) struct bio_vec *bv)
{ {
struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
unsigned int first_prp_len = dev->ctrl.page_size - bv->bv_offset; unsigned int offset = bv->bv_offset & (dev->ctrl.page_size - 1);
unsigned int first_prp_len = dev->ctrl.page_size - offset;
iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0); iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0);
if (dma_mapping_error(dev->dev, iod->first_dma)) if (dma_mapping_error(dev->dev, iod->first_dma))
...@@ -2263,10 +2264,7 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) ...@@ -2263,10 +2264,7 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
return true; return true;
} }
/* static void nvme_dev_add(struct nvme_dev *dev)
* return error value only when tagset allocation failed
*/
static int nvme_dev_add(struct nvme_dev *dev)
{ {
int ret; int ret;
...@@ -2296,7 +2294,7 @@ static int nvme_dev_add(struct nvme_dev *dev) ...@@ -2296,7 +2294,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
if (ret) { if (ret) {
dev_warn(dev->ctrl.device, dev_warn(dev->ctrl.device,
"IO queues tagset allocation failed %d\n", ret); "IO queues tagset allocation failed %d\n", ret);
return ret; return;
} }
dev->ctrl.tagset = &dev->tagset; dev->ctrl.tagset = &dev->tagset;
} else { } else {
...@@ -2307,7 +2305,6 @@ static int nvme_dev_add(struct nvme_dev *dev) ...@@ -2307,7 +2305,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
} }
nvme_dbbuf_set(dev); nvme_dbbuf_set(dev);
return 0;
} }
static int nvme_pci_enable(struct nvme_dev *dev) static int nvme_pci_enable(struct nvme_dev *dev)
...@@ -2467,6 +2464,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) ...@@ -2467,6 +2464,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
mutex_unlock(&dev->shutdown_lock); mutex_unlock(&dev->shutdown_lock);
} }
static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown)
{
if (!nvme_wait_reset(&dev->ctrl))
return -EBUSY;
nvme_dev_disable(dev, shutdown);
return 0;
}
static int nvme_setup_prp_pools(struct nvme_dev *dev) static int nvme_setup_prp_pools(struct nvme_dev *dev)
{ {
dev->prp_page_pool = dma_pool_create("prp list page", dev->dev, dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
...@@ -2490,14 +2495,20 @@ static void nvme_release_prp_pools(struct nvme_dev *dev) ...@@ -2490,14 +2495,20 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
dma_pool_destroy(dev->prp_small_pool); dma_pool_destroy(dev->prp_small_pool);
} }
static void nvme_free_tagset(struct nvme_dev *dev)
{
if (dev->tagset.tags)
blk_mq_free_tag_set(&dev->tagset);
dev->ctrl.tagset = NULL;
}
static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
{ {
struct nvme_dev *dev = to_nvme_dev(ctrl); struct nvme_dev *dev = to_nvme_dev(ctrl);
nvme_dbbuf_dma_free(dev); nvme_dbbuf_dma_free(dev);
put_device(dev->dev); put_device(dev->dev);
if (dev->tagset.tags) nvme_free_tagset(dev);
blk_mq_free_tag_set(&dev->tagset);
if (dev->ctrl.admin_q) if (dev->ctrl.admin_q)
blk_put_queue(dev->ctrl.admin_q); blk_put_queue(dev->ctrl.admin_q);
kfree(dev->queues); kfree(dev->queues);
...@@ -2508,6 +2519,11 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) ...@@ -2508,6 +2519,11 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
static void nvme_remove_dead_ctrl(struct nvme_dev *dev) static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
{ {
/*
* Set state to deleting now to avoid blocking nvme_wait_reset(), which
* may be holding this pci_dev's device lock.
*/
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
nvme_get_ctrl(&dev->ctrl); nvme_get_ctrl(&dev->ctrl);
nvme_dev_disable(dev, false); nvme_dev_disable(dev, false);
nvme_kill_queues(&dev->ctrl); nvme_kill_queues(&dev->ctrl);
...@@ -2521,7 +2537,6 @@ static void nvme_reset_work(struct work_struct *work) ...@@ -2521,7 +2537,6 @@ static void nvme_reset_work(struct work_struct *work)
container_of(work, struct nvme_dev, ctrl.reset_work); container_of(work, struct nvme_dev, ctrl.reset_work);
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
int result; int result;
enum nvme_ctrl_state new_state = NVME_CTRL_LIVE;
if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) { if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) {
result = -ENODEV; result = -ENODEV;
...@@ -2615,13 +2630,11 @@ static void nvme_reset_work(struct work_struct *work) ...@@ -2615,13 +2630,11 @@ static void nvme_reset_work(struct work_struct *work)
dev_warn(dev->ctrl.device, "IO queues not created\n"); dev_warn(dev->ctrl.device, "IO queues not created\n");
nvme_kill_queues(&dev->ctrl); nvme_kill_queues(&dev->ctrl);
nvme_remove_namespaces(&dev->ctrl); nvme_remove_namespaces(&dev->ctrl);
new_state = NVME_CTRL_ADMIN_ONLY; nvme_free_tagset(dev);
} else { } else {
nvme_start_queues(&dev->ctrl); nvme_start_queues(&dev->ctrl);
nvme_wait_freeze(&dev->ctrl); nvme_wait_freeze(&dev->ctrl);
/* hit this only when allocate tagset fails */ nvme_dev_add(dev);
if (nvme_dev_add(dev))
new_state = NVME_CTRL_ADMIN_ONLY;
nvme_unfreeze(&dev->ctrl); nvme_unfreeze(&dev->ctrl);
} }
...@@ -2629,9 +2642,9 @@ static void nvme_reset_work(struct work_struct *work) ...@@ -2629,9 +2642,9 @@ static void nvme_reset_work(struct work_struct *work)
* If only admin queue live, keep it to do further investigation or * If only admin queue live, keep it to do further investigation or
* recovery. * recovery.
*/ */
if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) { if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
dev_warn(dev->ctrl.device, dev_warn(dev->ctrl.device,
"failed to mark controller state %d\n", new_state); "failed to mark controller live state\n");
result = -ENODEV; result = -ENODEV;
goto out; goto out;
} }
...@@ -2672,7 +2685,7 @@ static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val) ...@@ -2672,7 +2685,7 @@ static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
{ {
*val = readq(to_nvme_dev(ctrl)->bar + off); *val = lo_hi_readq(to_nvme_dev(ctrl)->bar + off);
return 0; return 0;
} }
...@@ -2836,19 +2849,28 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -2836,19 +2849,28 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
static void nvme_reset_prepare(struct pci_dev *pdev) static void nvme_reset_prepare(struct pci_dev *pdev)
{ {
struct nvme_dev *dev = pci_get_drvdata(pdev); struct nvme_dev *dev = pci_get_drvdata(pdev);
nvme_dev_disable(dev, false);
/*
* We don't need to check the return value from waiting for the reset
* state as pci_dev device lock is held, making it impossible to race
* with ->remove().
*/
nvme_disable_prepare_reset(dev, false);
nvme_sync_queues(&dev->ctrl);
} }
static void nvme_reset_done(struct pci_dev *pdev) static void nvme_reset_done(struct pci_dev *pdev)
{ {
struct nvme_dev *dev = pci_get_drvdata(pdev); struct nvme_dev *dev = pci_get_drvdata(pdev);
nvme_reset_ctrl_sync(&dev->ctrl);
if (!nvme_try_sched_reset(&dev->ctrl))
flush_work(&dev->ctrl.reset_work);
} }
static void nvme_shutdown(struct pci_dev *pdev) static void nvme_shutdown(struct pci_dev *pdev)
{ {
struct nvme_dev *dev = pci_get_drvdata(pdev); struct nvme_dev *dev = pci_get_drvdata(pdev);
nvme_dev_disable(dev, true); nvme_disable_prepare_reset(dev, true);
} }
/* /*
...@@ -2901,7 +2923,7 @@ static int nvme_resume(struct device *dev) ...@@ -2901,7 +2923,7 @@ static int nvme_resume(struct device *dev)
if (ndev->last_ps == U32_MAX || if (ndev->last_ps == U32_MAX ||
nvme_set_power_state(ctrl, ndev->last_ps) != 0) nvme_set_power_state(ctrl, ndev->last_ps) != 0)
nvme_reset_ctrl(ctrl); return nvme_try_sched_reset(&ndev->ctrl);
return 0; return 0;
} }
...@@ -2929,17 +2951,14 @@ static int nvme_suspend(struct device *dev) ...@@ -2929,17 +2951,14 @@ static int nvme_suspend(struct device *dev)
*/ */
if (pm_suspend_via_firmware() || !ctrl->npss || if (pm_suspend_via_firmware() || !ctrl->npss ||
!pcie_aspm_enabled(pdev) || !pcie_aspm_enabled(pdev) ||
(ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) { (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND))
nvme_dev_disable(ndev, true); return nvme_disable_prepare_reset(ndev, true);
return 0;
}
nvme_start_freeze(ctrl); nvme_start_freeze(ctrl);
nvme_wait_freeze(ctrl); nvme_wait_freeze(ctrl);
nvme_sync_queues(ctrl); nvme_sync_queues(ctrl);
if (ctrl->state != NVME_CTRL_LIVE && if (ctrl->state != NVME_CTRL_LIVE)
ctrl->state != NVME_CTRL_ADMIN_ONLY)
goto unfreeze; goto unfreeze;
ret = nvme_get_power_state(ctrl, &ndev->last_ps); ret = nvme_get_power_state(ctrl, &ndev->last_ps);
...@@ -2965,9 +2984,8 @@ static int nvme_suspend(struct device *dev) ...@@ -2965,9 +2984,8 @@ static int nvme_suspend(struct device *dev)
* Clearing npss forces a controller reset on resume. The * Clearing npss forces a controller reset on resume. The
* correct value will be resdicovered then. * correct value will be resdicovered then.
*/ */
nvme_dev_disable(ndev, true); ret = nvme_disable_prepare_reset(ndev, true);
ctrl->npss = 0; ctrl->npss = 0;
ret = 0;
} }
unfreeze: unfreeze:
nvme_unfreeze(ctrl); nvme_unfreeze(ctrl);
...@@ -2977,9 +2995,7 @@ static int nvme_suspend(struct device *dev) ...@@ -2977,9 +2995,7 @@ static int nvme_suspend(struct device *dev)
static int nvme_simple_suspend(struct device *dev) static int nvme_simple_suspend(struct device *dev)
{ {
struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev)); struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
return nvme_disable_prepare_reset(ndev, true);
nvme_dev_disable(ndev, true);
return 0;
} }
static int nvme_simple_resume(struct device *dev) static int nvme_simple_resume(struct device *dev)
...@@ -2987,8 +3003,7 @@ static int nvme_simple_resume(struct device *dev) ...@@ -2987,8 +3003,7 @@ static int nvme_simple_resume(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev); struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev); struct nvme_dev *ndev = pci_get_drvdata(pdev);
nvme_reset_ctrl(&ndev->ctrl); return nvme_try_sched_reset(&ndev->ctrl);
return 0;
} }
static const struct dev_pm_ops nvme_dev_pm_ops = { static const struct dev_pm_ops nvme_dev_pm_ops = {
......
...@@ -1701,6 +1701,14 @@ nvme_rdma_timeout(struct request *rq, bool reserved) ...@@ -1701,6 +1701,14 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n", dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
rq->tag, nvme_rdma_queue_idx(queue)); rq->tag, nvme_rdma_queue_idx(queue));
/*
* Restart the timer if a controller reset is already scheduled. Any
* timed out commands would be handled before entering the connecting
* state.
*/
if (ctrl->ctrl.state == NVME_CTRL_RESETTING)
return BLK_EH_RESET_TIMER;
if (ctrl->ctrl.state != NVME_CTRL_LIVE) { if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
/* /*
* Teardown immediately if controller times out while starting * Teardown immediately if controller times out while starting
......
...@@ -1386,7 +1386,9 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, ...@@ -1386,7 +1386,9 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
queue->sock->sk->sk_data_ready = nvme_tcp_data_ready; queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
queue->sock->sk->sk_state_change = nvme_tcp_state_change; queue->sock->sk->sk_state_change = nvme_tcp_state_change;
queue->sock->sk->sk_write_space = nvme_tcp_write_space; queue->sock->sk->sk_write_space = nvme_tcp_write_space;
#ifdef CONFIG_NET_RX_BUSY_POLL
queue->sock->sk->sk_ll_usec = 1; queue->sock->sk->sk_ll_usec = 1;
#endif
write_unlock_bh(&queue->sock->sk->sk_callback_lock); write_unlock_bh(&queue->sock->sk->sk_callback_lock);
return 0; return 0;
...@@ -2044,6 +2046,14 @@ nvme_tcp_timeout(struct request *rq, bool reserved) ...@@ -2044,6 +2046,14 @@ nvme_tcp_timeout(struct request *rq, bool reserved)
struct nvme_tcp_ctrl *ctrl = req->queue->ctrl; struct nvme_tcp_ctrl *ctrl = req->queue->ctrl;
struct nvme_tcp_cmd_pdu *pdu = req->pdu; struct nvme_tcp_cmd_pdu *pdu = req->pdu;
/*
* Restart the timer if a controller reset is already scheduled. Any
* timed out commands would be handled before entering the connecting
* state.
*/
if (ctrl->ctrl.state == NVME_CTRL_RESETTING)
return BLK_EH_RESET_TIMER;
dev_warn(ctrl->ctrl.device, dev_warn(ctrl->ctrl.device,
"queue %d: timeout request %#x type %d\n", "queue %d: timeout request %#x type %d\n",
nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type); nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type);
...@@ -2126,6 +2136,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, ...@@ -2126,6 +2136,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
ret = nvme_tcp_map_data(queue, rq); ret = nvme_tcp_map_data(queue, rq);
if (unlikely(ret)) { if (unlikely(ret)) {
nvme_cleanup_cmd(rq);
dev_err(queue->ctrl->ctrl.device, dev_err(queue->ctrl->ctrl.device,
"Failed to map data (%d)\n", ret); "Failed to map data (%d)\n", ret);
return ret; return ret;
......
...@@ -157,8 +157,10 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -157,8 +157,10 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
iod->sg_table.sgl = iod->first_sgl; iod->sg_table.sgl = iod->first_sgl;
if (sg_alloc_table_chained(&iod->sg_table, if (sg_alloc_table_chained(&iod->sg_table,
blk_rq_nr_phys_segments(req), blk_rq_nr_phys_segments(req),
iod->sg_table.sgl, SG_CHUNK_SIZE)) iod->sg_table.sgl, SG_CHUNK_SIZE)) {
nvme_cleanup_cmd(req);
return BLK_STS_RESOURCE; return BLK_STS_RESOURCE;
}
iod->req.sg = iod->sg_table.sgl; iod->req.sg = iod->sg_table.sgl;
iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl); iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment