Commit 9b8d0207 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'block-5.8-2020-06-26' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - NVMe pull request from Christoph:
    - multipath deadlock fixes (Anton)
    - NUMA fixes (Max)
    - RDMA completion vector fix (Max)
    - IO deadlock fix (Sagi)
    - multipath reference fix (Sagi)
    - NS mutation fix (Sagi)

 - Use right allocator when freeing bip in error path (Chengguang)

* tag 'block-5.8-2020-06-26' of git://git.kernel.dk/linux-block:
  nvme-multipath: fix bogus request queue reference put
  nvme-multipath: fix deadlock due to head->lock
  nvme: don't protect ns mutation with ns->head->lock
  nvme-multipath: fix deadlock between ana_work and scan_work
  nvme: fix possible deadlock when I/O is blocked
  nvme-rdma: assign completion vector correctly
  nvme-loop: initialize tagset numa value to the value of the ctrl
  nvme-tcp: initialize tagset numa value to the value of the ctrl
  nvme-pci: initialize tagset numa value to the value of the ctrl
  nvme-pci: override the value of the controller's numa node
  nvme: set initial value for controller's numa node
  block: release bip in a right way in error path
parents 5e8eed27 1b52671d
...@@ -24,6 +24,18 @@ void blk_flush_integrity(void) ...@@ -24,6 +24,18 @@ void blk_flush_integrity(void)
flush_workqueue(kintegrityd_wq); flush_workqueue(kintegrityd_wq);
} }
void __bio_integrity_free(struct bio_set *bs, struct bio_integrity_payload *bip)
{
if (bs && mempool_initialized(&bs->bio_integrity_pool)) {
if (bip->bip_vec)
bvec_free(&bs->bvec_integrity_pool, bip->bip_vec,
bip->bip_slab);
mempool_free(bip, &bs->bio_integrity_pool);
} else {
kfree(bip);
}
}
/** /**
* bio_integrity_alloc - Allocate integrity payload and attach it to bio * bio_integrity_alloc - Allocate integrity payload and attach it to bio
* @bio: bio to attach integrity metadata to * @bio: bio to attach integrity metadata to
...@@ -78,7 +90,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, ...@@ -78,7 +90,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
return bip; return bip;
err: err:
mempool_free(bip, &bs->bio_integrity_pool); __bio_integrity_free(bs, bip);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
EXPORT_SYMBOL(bio_integrity_alloc); EXPORT_SYMBOL(bio_integrity_alloc);
...@@ -99,14 +111,7 @@ void bio_integrity_free(struct bio *bio) ...@@ -99,14 +111,7 @@ void bio_integrity_free(struct bio *bio)
kfree(page_address(bip->bip_vec->bv_page) + kfree(page_address(bip->bip_vec->bv_page) +
bip->bip_vec->bv_offset); bip->bip_vec->bv_offset);
if (bs && mempool_initialized(&bs->bio_integrity_pool)) { __bio_integrity_free(bs, bip);
bvec_free(&bs->bvec_integrity_pool, bip->bip_vec, bip->bip_slab);
mempool_free(bip, &bs->bio_integrity_pool);
} else {
kfree(bip);
}
bio->bi_integrity = NULL; bio->bi_integrity = NULL;
bio->bi_opf &= ~REQ_INTEGRITY; bio->bi_opf &= ~REQ_INTEGRITY;
} }
......
...@@ -1974,7 +1974,6 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ...@@ -1974,7 +1974,6 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
if (ns->head->disk) { if (ns->head->disk) {
nvme_update_disk_info(ns->head->disk, ns, id); nvme_update_disk_info(ns->head->disk, ns, id);
blk_queue_stack_limits(ns->head->disk->queue, ns->queue); blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
revalidate_disk(ns->head->disk);
} }
#endif #endif
return 0; return 0;
...@@ -4174,6 +4173,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ...@@ -4174,6 +4173,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->dev = dev; ctrl->dev = dev;
ctrl->ops = ops; ctrl->ops = ops;
ctrl->quirks = quirks; ctrl->quirks = quirks;
ctrl->numa_node = NUMA_NO_NODE;
INIT_WORK(&ctrl->scan_work, nvme_scan_work); INIT_WORK(&ctrl->scan_work, nvme_scan_work);
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work); INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work); INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
......
...@@ -409,15 +409,14 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) ...@@ -409,15 +409,14 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
{ {
struct nvme_ns_head *head = ns->head; struct nvme_ns_head *head = ns->head;
lockdep_assert_held(&ns->head->lock);
if (!head->disk) if (!head->disk)
return; return;
if (!(head->disk->flags & GENHD_FL_UP)) if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
device_add_disk(&head->subsys->dev, head->disk, device_add_disk(&head->subsys->dev, head->disk,
nvme_ns_id_attr_groups); nvme_ns_id_attr_groups);
mutex_lock(&head->lock);
if (nvme_path_is_optimized(ns)) { if (nvme_path_is_optimized(ns)) {
int node, srcu_idx; int node, srcu_idx;
...@@ -426,9 +425,10 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) ...@@ -426,9 +425,10 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
__nvme_find_path(head, node); __nvme_find_path(head, node);
srcu_read_unlock(&head->srcu, srcu_idx); srcu_read_unlock(&head->srcu, srcu_idx);
} }
mutex_unlock(&head->lock);
synchronize_srcu(&ns->head->srcu); synchronize_srcu(&head->srcu);
kblockd_schedule_work(&ns->head->requeue_work); kblockd_schedule_work(&head->requeue_work);
} }
static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data, static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
...@@ -483,14 +483,12 @@ static inline bool nvme_state_is_live(enum nvme_ana_state state) ...@@ -483,14 +483,12 @@ static inline bool nvme_state_is_live(enum nvme_ana_state state)
static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
struct nvme_ns *ns) struct nvme_ns *ns)
{ {
mutex_lock(&ns->head->lock);
ns->ana_grpid = le32_to_cpu(desc->grpid); ns->ana_grpid = le32_to_cpu(desc->grpid);
ns->ana_state = desc->state; ns->ana_state = desc->state;
clear_bit(NVME_NS_ANA_PENDING, &ns->flags); clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
if (nvme_state_is_live(ns->ana_state)) if (nvme_state_is_live(ns->ana_state))
nvme_mpath_set_live(ns); nvme_mpath_set_live(ns);
mutex_unlock(&ns->head->lock);
} }
static int nvme_update_ana_state(struct nvme_ctrl *ctrl, static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
...@@ -640,31 +638,37 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr, ...@@ -640,31 +638,37 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
} }
DEVICE_ATTR_RO(ana_state); DEVICE_ATTR_RO(ana_state);
static int nvme_set_ns_ana_state(struct nvme_ctrl *ctrl, static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
struct nvme_ana_group_desc *desc, void *data) struct nvme_ana_group_desc *desc, void *data)
{ {
struct nvme_ns *ns = data; struct nvme_ana_group_desc *dst = data;
if (ns->ana_grpid == le32_to_cpu(desc->grpid)) { if (desc->grpid != dst->grpid)
nvme_update_ns_ana_state(desc, ns); return 0;
return -ENXIO; /* just break out of the loop */
}
return 0; *dst = *desc;
return -ENXIO; /* just break out of the loop */
} }
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
{ {
if (nvme_ctrl_use_ana(ns->ctrl)) { if (nvme_ctrl_use_ana(ns->ctrl)) {
struct nvme_ana_group_desc desc = {
.grpid = id->anagrpid,
.state = 0,
};
mutex_lock(&ns->ctrl->ana_lock); mutex_lock(&ns->ctrl->ana_lock);
ns->ana_grpid = le32_to_cpu(id->anagrpid); ns->ana_grpid = le32_to_cpu(id->anagrpid);
nvme_parse_ana_log(ns->ctrl, ns, nvme_set_ns_ana_state); nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc);
mutex_unlock(&ns->ctrl->ana_lock); mutex_unlock(&ns->ctrl->ana_lock);
if (desc.state) {
/* found the group desc: update */
nvme_update_ns_ana_state(&desc, ns);
}
} else { } else {
mutex_lock(&ns->head->lock);
ns->ana_state = NVME_ANA_OPTIMIZED; ns->ana_state = NVME_ANA_OPTIMIZED;
nvme_mpath_set_live(ns); nvme_mpath_set_live(ns);
mutex_unlock(&ns->head->lock);
} }
if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
...@@ -686,6 +690,14 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) ...@@ -686,6 +690,14 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
kblockd_schedule_work(&head->requeue_work); kblockd_schedule_work(&head->requeue_work);
flush_work(&head->requeue_work); flush_work(&head->requeue_work);
blk_cleanup_queue(head->disk->queue); blk_cleanup_queue(head->disk->queue);
if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
/*
* if device_add_disk wasn't called, prevent
* disk release to put a bogus reference on the
* request queue
*/
head->disk->queue = NULL;
}
put_disk(head->disk); put_disk(head->disk);
} }
......
...@@ -364,6 +364,8 @@ struct nvme_ns_head { ...@@ -364,6 +364,8 @@ struct nvme_ns_head {
spinlock_t requeue_lock; spinlock_t requeue_lock;
struct work_struct requeue_work; struct work_struct requeue_work;
struct mutex lock; struct mutex lock;
unsigned long flags;
#define NVME_NSHEAD_DISK_LIVE 0
struct nvme_ns __rcu *current_path[]; struct nvme_ns __rcu *current_path[];
#endif #endif
}; };
......
...@@ -1593,7 +1593,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) ...@@ -1593,7 +1593,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH; dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
dev->admin_tagset.timeout = ADMIN_TIMEOUT; dev->admin_tagset.timeout = ADMIN_TIMEOUT;
dev->admin_tagset.numa_node = dev_to_node(dev->dev); dev->admin_tagset.numa_node = dev->ctrl.numa_node;
dev->admin_tagset.cmd_size = sizeof(struct nvme_iod); dev->admin_tagset.cmd_size = sizeof(struct nvme_iod);
dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED; dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
dev->admin_tagset.driver_data = dev; dev->admin_tagset.driver_data = dev;
...@@ -1669,6 +1669,8 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) ...@@ -1669,6 +1669,8 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
if (result) if (result)
return result; return result;
dev->ctrl.numa_node = dev_to_node(dev->dev);
nvmeq = &dev->queues[0]; nvmeq = &dev->queues[0];
aqa = nvmeq->q_depth - 1; aqa = nvmeq->q_depth - 1;
aqa |= aqa << 16; aqa |= aqa << 16;
...@@ -2257,7 +2259,7 @@ static void nvme_dev_add(struct nvme_dev *dev) ...@@ -2257,7 +2259,7 @@ static void nvme_dev_add(struct nvme_dev *dev)
if (dev->io_queues[HCTX_TYPE_POLL]) if (dev->io_queues[HCTX_TYPE_POLL])
dev->tagset.nr_maps++; dev->tagset.nr_maps++;
dev->tagset.timeout = NVME_IO_TIMEOUT; dev->tagset.timeout = NVME_IO_TIMEOUT;
dev->tagset.numa_node = dev_to_node(dev->dev); dev->tagset.numa_node = dev->ctrl.numa_node;
dev->tagset.queue_depth = dev->tagset.queue_depth =
min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
dev->tagset.cmd_size = sizeof(struct nvme_iod); dev->tagset.cmd_size = sizeof(struct nvme_iod);
......
...@@ -470,7 +470,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) ...@@ -470,7 +470,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
* Spread I/O queues completion vectors according their queue index. * Spread I/O queues completion vectors according their queue index.
* Admin queues can always go on completion vector 0. * Admin queues can always go on completion vector 0.
*/ */
comp_vector = idx == 0 ? idx : idx - 1; comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors;
/* Polling queues need direct cq polling context */ /* Polling queues need direct cq polling context */
if (nvme_rdma_poll_queue(queue)) if (nvme_rdma_poll_queue(queue))
......
...@@ -1532,7 +1532,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl, ...@@ -1532,7 +1532,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
set->ops = &nvme_tcp_admin_mq_ops; set->ops = &nvme_tcp_admin_mq_ops;
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
set->reserved_tags = 2; /* connect + keep-alive */ set->reserved_tags = 2; /* connect + keep-alive */
set->numa_node = NUMA_NO_NODE; set->numa_node = nctrl->numa_node;
set->flags = BLK_MQ_F_BLOCKING; set->flags = BLK_MQ_F_BLOCKING;
set->cmd_size = sizeof(struct nvme_tcp_request); set->cmd_size = sizeof(struct nvme_tcp_request);
set->driver_data = ctrl; set->driver_data = ctrl;
...@@ -1544,7 +1544,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl, ...@@ -1544,7 +1544,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
set->ops = &nvme_tcp_mq_ops; set->ops = &nvme_tcp_mq_ops;
set->queue_depth = nctrl->sqsize + 1; set->queue_depth = nctrl->sqsize + 1;
set->reserved_tags = 1; /* fabric connect */ set->reserved_tags = 1; /* fabric connect */
set->numa_node = NUMA_NO_NODE; set->numa_node = nctrl->numa_node;
set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
set->cmd_size = sizeof(struct nvme_tcp_request); set->cmd_size = sizeof(struct nvme_tcp_request);
set->driver_data = ctrl; set->driver_data = ctrl;
......
...@@ -340,7 +340,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) ...@@ -340,7 +340,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops; ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops;
ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */ ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) + ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
NVME_INLINE_SG_CNT * sizeof(struct scatterlist); NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
ctrl->admin_tag_set.driver_data = ctrl; ctrl->admin_tag_set.driver_data = ctrl;
...@@ -512,7 +512,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) ...@@ -512,7 +512,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
ctrl->tag_set.ops = &nvme_loop_mq_ops; ctrl->tag_set.ops = &nvme_loop_mq_ops;
ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
ctrl->tag_set.reserved_tags = 1; /* fabric connect */ ctrl->tag_set.reserved_tags = 1; /* fabric connect */
ctrl->tag_set.numa_node = NUMA_NO_NODE; ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) + ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
NVME_INLINE_SG_CNT * sizeof(struct scatterlist); NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment