Commit f8eacd8a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'block-6.12-20241018' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - NVMe pull request via Keith:
     - Fix target passthrough identifier (Nilay)
     - Fix tcp locking (Hannes)
     - Replace list with sbitmap for tracking RDMA rsp tags (Guixen)
     - Remove unnecessary fallthrough statements (Tokunori)
     - Remove ready-without-media support (Greg)
     - Fix multipath partition scan deadlock (Keith)
     - Fix concurrent PCI reset and remove queue mapping (Maurizio)
     - Fabrics shutdown fixes (Nilay)

 - Fix for a kerneldoc warning (Keith)

 - Fix a race with blk-rq-qos and wakeups (Omar)

 - Cleanup of checking for always-set tag_set (SurajSonawane2415)

 - Fix for a crash with CPU hotplug notifiers (Ming)

 - Don't allow zero-copy ublk on unprivileged device (Ming)

 - Use array_index_nospec() for CDROM (Josh)

 - Remove dead code in drbd (David)

 - Tweaks to elevator loading (Breno)

* tag 'block-6.12-20241018' of git://git.kernel.dk/linux:
  cdrom: Avoid barrier_nospec() in cdrom_ioctl_media_changed()
  nvme: use helper nvme_ctrl_state in nvme_keep_alive_finish function
  nvme: make keep-alive synchronous operation
  nvme-loop: flush off pending I/O while shutting down loop controller
  nvme-pci: fix race condition between reset and nvme_dev_disable()
  ublk: don't allow user copy for unprivileged device
  blk-rq-qos: fix crash on rq_qos_wait vs. rq_qos_wake_function race
  nvme-multipath: defer partition scanning
  blk-mq: setup queue ->tag_set before initializing hctx
  elevator: Remove argument from elevator_find_get
  elevator: do not request_module if elevator exists
  drbd: Remove unused conn_lowest_minor
  nvme: disable CC.CRIME (NVME_CC_CRIME)
  nvme: delete unnecessary fallthru comment
  nvmet-rdma: use sbitmap to replace rsp free list
  block: Fix elevator_get_default() checking for NULL q->tag_set
  nvme: tcp: avoid race between queue_lock lock and destroy
  nvmet-passthru: clear EUID/NGUID/UUID while using loop target
  block: fix blk_rq_map_integrity_sg kernel-doc
parents a041f478 b0bf1afd
...@@ -4310,6 +4310,12 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, ...@@ -4310,6 +4310,12 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
/* mark the queue as mq asap */ /* mark the queue as mq asap */
q->mq_ops = set->ops; q->mq_ops = set->ops;
/*
* ->tag_set has to be setup before initialize hctx, which cpuphp
* handler needs it for checking queue mapping
*/
q->tag_set = set;
if (blk_mq_alloc_ctxs(q)) if (blk_mq_alloc_ctxs(q))
goto err_exit; goto err_exit;
...@@ -4328,8 +4334,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, ...@@ -4328,8 +4334,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
INIT_WORK(&q->timeout_work, blk_mq_timeout_work); INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ); blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
q->tag_set = set;
q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work); INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
......
...@@ -219,8 +219,8 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr, ...@@ -219,8 +219,8 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr,
data->got_token = true; data->got_token = true;
smp_wmb(); smp_wmb();
list_del_init(&curr->entry);
wake_up_process(data->task); wake_up_process(data->task);
list_del_init_careful(&curr->entry);
return 1; return 1;
} }
......
...@@ -106,8 +106,7 @@ static struct elevator_type *__elevator_find(const char *name) ...@@ -106,8 +106,7 @@ static struct elevator_type *__elevator_find(const char *name)
return NULL; return NULL;
} }
static struct elevator_type *elevator_find_get(struct request_queue *q, static struct elevator_type *elevator_find_get(const char *name)
const char *name)
{ {
struct elevator_type *e; struct elevator_type *e;
...@@ -551,7 +550,7 @@ EXPORT_SYMBOL_GPL(elv_unregister); ...@@ -551,7 +550,7 @@ EXPORT_SYMBOL_GPL(elv_unregister);
static inline bool elv_support_iosched(struct request_queue *q) static inline bool elv_support_iosched(struct request_queue *q)
{ {
if (!queue_is_mq(q) || if (!queue_is_mq(q) ||
(q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED))) (q->tag_set->flags & BLK_MQ_F_NO_SCHED))
return false; return false;
return true; return true;
} }
...@@ -562,14 +561,14 @@ static inline bool elv_support_iosched(struct request_queue *q) ...@@ -562,14 +561,14 @@ static inline bool elv_support_iosched(struct request_queue *q)
*/ */
static struct elevator_type *elevator_get_default(struct request_queue *q) static struct elevator_type *elevator_get_default(struct request_queue *q)
{ {
if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT) if (q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT)
return NULL; return NULL;
if (q->nr_hw_queues != 1 && if (q->nr_hw_queues != 1 &&
!blk_mq_is_shared_tags(q->tag_set->flags)) !blk_mq_is_shared_tags(q->tag_set->flags))
return NULL; return NULL;
return elevator_find_get(q, "mq-deadline"); return elevator_find_get("mq-deadline");
} }
/* /*
...@@ -697,7 +696,7 @@ static int elevator_change(struct request_queue *q, const char *elevator_name) ...@@ -697,7 +696,7 @@ static int elevator_change(struct request_queue *q, const char *elevator_name)
if (q->elevator && elevator_match(q->elevator->type, elevator_name)) if (q->elevator && elevator_match(q->elevator->type, elevator_name))
return 0; return 0;
e = elevator_find_get(q, elevator_name); e = elevator_find_get(elevator_name);
if (!e) if (!e)
return -EINVAL; return -EINVAL;
ret = elevator_switch(q, e); ret = elevator_switch(q, e);
...@@ -709,13 +708,21 @@ int elv_iosched_load_module(struct gendisk *disk, const char *buf, ...@@ -709,13 +708,21 @@ int elv_iosched_load_module(struct gendisk *disk, const char *buf,
size_t count) size_t count)
{ {
char elevator_name[ELV_NAME_MAX]; char elevator_name[ELV_NAME_MAX];
struct elevator_type *found;
const char *name;
if (!elv_support_iosched(disk->queue)) if (!elv_support_iosched(disk->queue))
return -EOPNOTSUPP; return -EOPNOTSUPP;
strscpy(elevator_name, buf, sizeof(elevator_name)); strscpy(elevator_name, buf, sizeof(elevator_name));
name = strstrip(elevator_name);
request_module("%s-iosched", strstrip(elevator_name)); spin_lock(&elv_list_lock);
found = __elevator_find(name);
spin_unlock(&elv_list_lock);
if (!found)
request_module("%s-iosched", name);
return 0; return 0;
} }
......
...@@ -1364,7 +1364,6 @@ extern struct bio_set drbd_io_bio_set; ...@@ -1364,7 +1364,6 @@ extern struct bio_set drbd_io_bio_set;
extern struct mutex resources_mutex; extern struct mutex resources_mutex;
extern int conn_lowest_minor(struct drbd_connection *connection);
extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor); extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor);
extern void drbd_destroy_device(struct kref *kref); extern void drbd_destroy_device(struct kref *kref);
extern void drbd_delete_device(struct drbd_device *device); extern void drbd_delete_device(struct drbd_device *device);
......
...@@ -471,20 +471,6 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait) ...@@ -471,20 +471,6 @@ void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
wait_for_completion(&thi->stop); wait_for_completion(&thi->stop);
} }
int conn_lowest_minor(struct drbd_connection *connection)
{
struct drbd_peer_device *peer_device;
int vnr = 0, minor = -1;
rcu_read_lock();
peer_device = idr_get_next(&connection->peer_devices, &vnr);
if (peer_device)
minor = device_to_minor(peer_device->device);
rcu_read_unlock();
return minor;
}
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* /*
* drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
......
...@@ -2380,10 +2380,19 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) ...@@ -2380,10 +2380,19 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
* TODO: provide forward progress for RECOVERY handler, so that * TODO: provide forward progress for RECOVERY handler, so that
* unprivileged device can benefit from it * unprivileged device can benefit from it
*/ */
if (info.flags & UBLK_F_UNPRIVILEGED_DEV) if (info.flags & UBLK_F_UNPRIVILEGED_DEV) {
info.flags &= ~(UBLK_F_USER_RECOVERY_REISSUE | info.flags &= ~(UBLK_F_USER_RECOVERY_REISSUE |
UBLK_F_USER_RECOVERY); UBLK_F_USER_RECOVERY);
/*
* For USER_COPY, we depends on userspace to fill request
* buffer by pwrite() to ublk char device, which can't be
* used for unprivileged device
*/
if (info.flags & UBLK_F_USER_COPY)
return -EINVAL;
}
/* the created device is always owned by current user */ /* the created device is always owned by current user */
ublk_store_owner_uid_gid(&info.owner_uid, &info.owner_gid); ublk_store_owner_uid_gid(&info.owner_uid, &info.owner_gid);
......
...@@ -2313,7 +2313,7 @@ static int cdrom_ioctl_media_changed(struct cdrom_device_info *cdi, ...@@ -2313,7 +2313,7 @@ static int cdrom_ioctl_media_changed(struct cdrom_device_info *cdi,
return -EINVAL; return -EINVAL;
/* Prevent arg from speculatively bypassing the length check */ /* Prevent arg from speculatively bypassing the length check */
barrier_nospec(); arg = array_index_nospec(arg, cdi->capacity);
info = kmalloc(sizeof(*info), GFP_KERNEL); info = kmalloc(sizeof(*info), GFP_KERNEL);
if (!info) if (!info)
......
...@@ -1292,14 +1292,12 @@ static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl) ...@@ -1292,14 +1292,12 @@ static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl)
queue_delayed_work(nvme_wq, &ctrl->ka_work, delay); queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
} }
static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, static void nvme_keep_alive_finish(struct request *rq,
blk_status_t status) blk_status_t status, struct nvme_ctrl *ctrl)
{ {
struct nvme_ctrl *ctrl = rq->end_io_data;
unsigned long flags;
bool startka = false;
unsigned long rtt = jiffies - (rq->deadline - rq->timeout); unsigned long rtt = jiffies - (rq->deadline - rq->timeout);
unsigned long delay = nvme_keep_alive_work_period(ctrl); unsigned long delay = nvme_keep_alive_work_period(ctrl);
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
/* /*
* Subtract off the keepalive RTT so nvme_keep_alive_work runs * Subtract off the keepalive RTT so nvme_keep_alive_work runs
...@@ -1313,25 +1311,17 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq, ...@@ -1313,25 +1311,17 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
delay = 0; delay = 0;
} }
blk_mq_free_request(rq);
if (status) { if (status) {
dev_err(ctrl->device, dev_err(ctrl->device,
"failed nvme_keep_alive_end_io error=%d\n", "failed nvme_keep_alive_end_io error=%d\n",
status); status);
return RQ_END_IO_NONE; return;
} }
ctrl->ka_last_check_time = jiffies; ctrl->ka_last_check_time = jiffies;
ctrl->comp_seen = false; ctrl->comp_seen = false;
spin_lock_irqsave(&ctrl->lock, flags); if (state == NVME_CTRL_LIVE || state == NVME_CTRL_CONNECTING)
if (ctrl->state == NVME_CTRL_LIVE ||
ctrl->state == NVME_CTRL_CONNECTING)
startka = true;
spin_unlock_irqrestore(&ctrl->lock, flags);
if (startka)
queue_delayed_work(nvme_wq, &ctrl->ka_work, delay); queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
return RQ_END_IO_NONE;
} }
static void nvme_keep_alive_work(struct work_struct *work) static void nvme_keep_alive_work(struct work_struct *work)
...@@ -1340,6 +1330,7 @@ static void nvme_keep_alive_work(struct work_struct *work) ...@@ -1340,6 +1330,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
struct nvme_ctrl, ka_work); struct nvme_ctrl, ka_work);
bool comp_seen = ctrl->comp_seen; bool comp_seen = ctrl->comp_seen;
struct request *rq; struct request *rq;
blk_status_t status;
ctrl->ka_last_check_time = jiffies; ctrl->ka_last_check_time = jiffies;
...@@ -1362,9 +1353,9 @@ static void nvme_keep_alive_work(struct work_struct *work) ...@@ -1362,9 +1353,9 @@ static void nvme_keep_alive_work(struct work_struct *work)
nvme_init_request(rq, &ctrl->ka_cmd); nvme_init_request(rq, &ctrl->ka_cmd);
rq->timeout = ctrl->kato * HZ; rq->timeout = ctrl->kato * HZ;
rq->end_io = nvme_keep_alive_end_io; status = blk_execute_rq(rq, false);
rq->end_io_data = ctrl; nvme_keep_alive_finish(rq, status, ctrl);
blk_execute_rq_nowait(rq, false); blk_mq_free_request(rq);
} }
static void nvme_start_keep_alive(struct nvme_ctrl *ctrl) static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
...@@ -2458,8 +2449,13 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) ...@@ -2458,8 +2449,13 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
else else
ctrl->ctrl_config = NVME_CC_CSS_NVM; ctrl->ctrl_config = NVME_CC_CSS_NVM;
if (ctrl->cap & NVME_CAP_CRMS_CRWMS && ctrl->cap & NVME_CAP_CRMS_CRIMS) /*
ctrl->ctrl_config |= NVME_CC_CRIME; * Setting CRIME results in CSTS.RDY before the media is ready. This
* makes it possible for media related commands to return the error
* NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY. Until the driver is
* restructured to handle retries, disable CC.CRIME.
*/
ctrl->ctrl_config &= ~NVME_CC_CRIME;
ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT; ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
...@@ -2489,9 +2485,6 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) ...@@ -2489,9 +2485,6 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
* devices are known to get this wrong. Use the larger of the * devices are known to get this wrong. Use the larger of the
* two values. * two values.
*/ */
if (ctrl->ctrl_config & NVME_CC_CRIME)
ready_timeout = NVME_CRTO_CRIMT(crto);
else
ready_timeout = NVME_CRTO_CRWMT(crto); ready_timeout = NVME_CRTO_CRWMT(crto);
if (ready_timeout < timeout) if (ready_timeout < timeout)
......
...@@ -431,7 +431,6 @@ static bool nvme_available_path(struct nvme_ns_head *head) ...@@ -431,7 +431,6 @@ static bool nvme_available_path(struct nvme_ns_head *head)
case NVME_CTRL_LIVE: case NVME_CTRL_LIVE:
case NVME_CTRL_RESETTING: case NVME_CTRL_RESETTING:
case NVME_CTRL_CONNECTING: case NVME_CTRL_CONNECTING:
/* fallthru */
return true; return true;
default: default:
break; break;
...@@ -580,6 +579,20 @@ static int nvme_add_ns_head_cdev(struct nvme_ns_head *head) ...@@ -580,6 +579,20 @@ static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
return ret; return ret;
} }
static void nvme_partition_scan_work(struct work_struct *work)
{
struct nvme_ns_head *head =
container_of(work, struct nvme_ns_head, partition_scan_work);
if (WARN_ON_ONCE(!test_and_clear_bit(GD_SUPPRESS_PART_SCAN,
&head->disk->state)))
return;
mutex_lock(&head->disk->open_mutex);
bdev_disk_changed(head->disk, false);
mutex_unlock(&head->disk->open_mutex);
}
static void nvme_requeue_work(struct work_struct *work) static void nvme_requeue_work(struct work_struct *work)
{ {
struct nvme_ns_head *head = struct nvme_ns_head *head =
...@@ -606,6 +619,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) ...@@ -606,6 +619,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
bio_list_init(&head->requeue_list); bio_list_init(&head->requeue_list);
spin_lock_init(&head->requeue_lock); spin_lock_init(&head->requeue_lock);
INIT_WORK(&head->requeue_work, nvme_requeue_work); INIT_WORK(&head->requeue_work, nvme_requeue_work);
INIT_WORK(&head->partition_scan_work, nvme_partition_scan_work);
/* /*
* Add a multipath node if the subsystems supports multiple controllers. * Add a multipath node if the subsystems supports multiple controllers.
...@@ -629,6 +643,16 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) ...@@ -629,6 +643,16 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
return PTR_ERR(head->disk); return PTR_ERR(head->disk);
head->disk->fops = &nvme_ns_head_ops; head->disk->fops = &nvme_ns_head_ops;
head->disk->private_data = head; head->disk->private_data = head;
/*
* We need to suppress the partition scan from occuring within the
* controller's scan_work context. If a path error occurs here, the IO
* will wait until a path becomes available or all paths are torn down,
* but that action also occurs within scan_work, so it would deadlock.
* Defer the partion scan to a different context that does not block
* scan_work.
*/
set_bit(GD_SUPPRESS_PART_SCAN, &head->disk->state);
sprintf(head->disk->disk_name, "nvme%dn%d", sprintf(head->disk->disk_name, "nvme%dn%d",
ctrl->subsys->instance, head->instance); ctrl->subsys->instance, head->instance);
return 0; return 0;
...@@ -655,6 +679,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) ...@@ -655,6 +679,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
return; return;
} }
nvme_add_ns_head_cdev(head); nvme_add_ns_head_cdev(head);
kblockd_schedule_work(&head->partition_scan_work);
} }
mutex_lock(&head->lock); mutex_lock(&head->lock);
...@@ -974,14 +999,14 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) ...@@ -974,14 +999,14 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
return; return;
if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
nvme_cdev_del(&head->cdev, &head->cdev_device); nvme_cdev_del(&head->cdev, &head->cdev_device);
del_gendisk(head->disk);
}
/* /*
* requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
* to allow multipath to fail all I/O. * to allow multipath to fail all I/O.
*/ */
synchronize_srcu(&head->srcu); synchronize_srcu(&head->srcu);
kblockd_schedule_work(&head->requeue_work); kblockd_schedule_work(&head->requeue_work);
del_gendisk(head->disk);
}
} }
void nvme_mpath_remove_disk(struct nvme_ns_head *head) void nvme_mpath_remove_disk(struct nvme_ns_head *head)
...@@ -991,6 +1016,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) ...@@ -991,6 +1016,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
/* make sure all pending bios are cleaned up */ /* make sure all pending bios are cleaned up */
kblockd_schedule_work(&head->requeue_work); kblockd_schedule_work(&head->requeue_work);
flush_work(&head->requeue_work); flush_work(&head->requeue_work);
flush_work(&head->partition_scan_work);
put_disk(head->disk); put_disk(head->disk);
} }
......
...@@ -494,6 +494,7 @@ struct nvme_ns_head { ...@@ -494,6 +494,7 @@ struct nvme_ns_head {
struct bio_list requeue_list; struct bio_list requeue_list;
spinlock_t requeue_lock; spinlock_t requeue_lock;
struct work_struct requeue_work; struct work_struct requeue_work;
struct work_struct partition_scan_work;
struct mutex lock; struct mutex lock;
unsigned long flags; unsigned long flags;
#define NVME_NSHEAD_DISK_LIVE 0 #define NVME_NSHEAD_DISK_LIVE 0
......
...@@ -2506,17 +2506,29 @@ static unsigned int nvme_pci_nr_maps(struct nvme_dev *dev) ...@@ -2506,17 +2506,29 @@ static unsigned int nvme_pci_nr_maps(struct nvme_dev *dev)
return 1; return 1;
} }
static void nvme_pci_update_nr_queues(struct nvme_dev *dev) static bool nvme_pci_update_nr_queues(struct nvme_dev *dev)
{ {
if (!dev->ctrl.tagset) { if (!dev->ctrl.tagset) {
nvme_alloc_io_tag_set(&dev->ctrl, &dev->tagset, &nvme_mq_ops, nvme_alloc_io_tag_set(&dev->ctrl, &dev->tagset, &nvme_mq_ops,
nvme_pci_nr_maps(dev), sizeof(struct nvme_iod)); nvme_pci_nr_maps(dev), sizeof(struct nvme_iod));
return; return true;
}
/* Give up if we are racing with nvme_dev_disable() */
if (!mutex_trylock(&dev->shutdown_lock))
return false;
/* Check if nvme_dev_disable() has been executed already */
if (!dev->online_queues) {
mutex_unlock(&dev->shutdown_lock);
return false;
} }
blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
/* free previously allocated queues that are no longer usable */ /* free previously allocated queues that are no longer usable */
nvme_free_queues(dev, dev->online_queues); nvme_free_queues(dev, dev->online_queues);
mutex_unlock(&dev->shutdown_lock);
return true;
} }
static int nvme_pci_enable(struct nvme_dev *dev) static int nvme_pci_enable(struct nvme_dev *dev)
...@@ -2797,7 +2809,8 @@ static void nvme_reset_work(struct work_struct *work) ...@@ -2797,7 +2809,8 @@ static void nvme_reset_work(struct work_struct *work)
nvme_dbbuf_set(dev); nvme_dbbuf_set(dev);
nvme_unquiesce_io_queues(&dev->ctrl); nvme_unquiesce_io_queues(&dev->ctrl);
nvme_wait_freeze(&dev->ctrl); nvme_wait_freeze(&dev->ctrl);
nvme_pci_update_nr_queues(dev); if (!nvme_pci_update_nr_queues(dev))
goto out;
nvme_unfreeze(&dev->ctrl); nvme_unfreeze(&dev->ctrl);
} else { } else {
dev_warn(dev->ctrl.device, "IO queues lost\n"); dev_warn(dev->ctrl.device, "IO queues lost\n");
......
...@@ -2644,10 +2644,11 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) ...@@ -2644,10 +2644,11 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
len = nvmf_get_address(ctrl, buf, size); len = nvmf_get_address(ctrl, buf, size);
if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
return len;
mutex_lock(&queue->queue_lock); mutex_lock(&queue->queue_lock);
if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
goto done;
ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr); ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr);
if (ret > 0) { if (ret > 0) {
if (len > 0) if (len > 0)
...@@ -2655,7 +2656,7 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) ...@@ -2655,7 +2656,7 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
len += scnprintf(buf + len, size - len, "%ssrc_addr=%pISc\n", len += scnprintf(buf + len, size - len, "%ssrc_addr=%pISc\n",
(len) ? "," : "", &src_addr); (len) ? "," : "", &src_addr);
} }
done:
mutex_unlock(&queue->queue_lock); mutex_unlock(&queue->queue_lock);
return len; return len;
......
...@@ -265,6 +265,13 @@ static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) ...@@ -265,6 +265,13 @@ static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
{ {
if (!test_and_clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags)) if (!test_and_clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags))
return; return;
/*
* It's possible that some requests might have been added
* after admin queue is stopped/quiesced. So now start the
* queue to flush these requests to the completion.
*/
nvme_unquiesce_admin_queue(&ctrl->ctrl);
nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
nvme_remove_admin_tag_set(&ctrl->ctrl); nvme_remove_admin_tag_set(&ctrl->ctrl);
} }
...@@ -297,6 +304,12 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl) ...@@ -297,6 +304,12 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl)
nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
} }
ctrl->ctrl.queue_count = 1; ctrl->ctrl.queue_count = 1;
/*
* It's possible that some requests might have been added
* after io queue is stopped/quiesced. So now start the
* queue to flush these requests to the completion.
*/
nvme_unquiesce_io_queues(&ctrl->ctrl);
} }
static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl) static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
......
...@@ -535,10 +535,6 @@ u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req) ...@@ -535,10 +535,6 @@ u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
break; break;
case nvme_admin_identify: case nvme_admin_identify:
switch (req->cmd->identify.cns) { switch (req->cmd->identify.cns) {
case NVME_ID_CNS_CTRL:
req->execute = nvmet_passthru_execute_cmd;
req->p.use_workqueue = true;
return NVME_SC_SUCCESS;
case NVME_ID_CNS_CS_CTRL: case NVME_ID_CNS_CS_CTRL:
switch (req->cmd->identify.csi) { switch (req->cmd->identify.csi) {
case NVME_CSI_ZNS: case NVME_CSI_ZNS:
...@@ -547,7 +543,9 @@ u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req) ...@@ -547,7 +543,9 @@ u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
return NVME_SC_SUCCESS; return NVME_SC_SUCCESS;
} }
return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
case NVME_ID_CNS_CTRL:
case NVME_ID_CNS_NS: case NVME_ID_CNS_NS:
case NVME_ID_CNS_NS_DESC_LIST:
req->execute = nvmet_passthru_execute_cmd; req->execute = nvmet_passthru_execute_cmd;
req->p.use_workqueue = true; req->p.use_workqueue = true;
return NVME_SC_SUCCESS; return NVME_SC_SUCCESS;
......
...@@ -39,6 +39,8 @@ ...@@ -39,6 +39,8 @@
#define NVMET_RDMA_BACKLOG 128 #define NVMET_RDMA_BACKLOG 128
#define NVMET_RDMA_DISCRETE_RSP_TAG -1
struct nvmet_rdma_srq; struct nvmet_rdma_srq;
struct nvmet_rdma_cmd { struct nvmet_rdma_cmd {
...@@ -75,7 +77,7 @@ struct nvmet_rdma_rsp { ...@@ -75,7 +77,7 @@ struct nvmet_rdma_rsp {
u32 invalidate_rkey; u32 invalidate_rkey;
struct list_head wait_list; struct list_head wait_list;
struct list_head free_list; int tag;
}; };
enum nvmet_rdma_queue_state { enum nvmet_rdma_queue_state {
...@@ -98,8 +100,7 @@ struct nvmet_rdma_queue { ...@@ -98,8 +100,7 @@ struct nvmet_rdma_queue {
struct nvmet_sq nvme_sq; struct nvmet_sq nvme_sq;
struct nvmet_rdma_rsp *rsps; struct nvmet_rdma_rsp *rsps;
struct list_head free_rsps; struct sbitmap rsp_tags;
spinlock_t rsps_lock;
struct nvmet_rdma_cmd *cmds; struct nvmet_rdma_cmd *cmds;
struct work_struct release_work; struct work_struct release_work;
...@@ -172,7 +173,8 @@ static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue); ...@@ -172,7 +173,8 @@ static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev, static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_rsp *r); struct nvmet_rdma_rsp *r);
static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_rsp *r); struct nvmet_rdma_rsp *r,
int tag);
static const struct nvmet_fabrics_ops nvmet_rdma_ops; static const struct nvmet_fabrics_ops nvmet_rdma_ops;
...@@ -210,15 +212,12 @@ static inline bool nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp) ...@@ -210,15 +212,12 @@ static inline bool nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp)
static inline struct nvmet_rdma_rsp * static inline struct nvmet_rdma_rsp *
nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue) nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
{ {
struct nvmet_rdma_rsp *rsp; struct nvmet_rdma_rsp *rsp = NULL;
unsigned long flags; int tag;
spin_lock_irqsave(&queue->rsps_lock, flags); tag = sbitmap_get(&queue->rsp_tags);
rsp = list_first_entry_or_null(&queue->free_rsps, if (tag >= 0)
struct nvmet_rdma_rsp, free_list); rsp = &queue->rsps[tag];
if (likely(rsp))
list_del(&rsp->free_list);
spin_unlock_irqrestore(&queue->rsps_lock, flags);
if (unlikely(!rsp)) { if (unlikely(!rsp)) {
int ret; int ret;
...@@ -226,13 +225,12 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue) ...@@ -226,13 +225,12 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
rsp = kzalloc(sizeof(*rsp), GFP_KERNEL); rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
if (unlikely(!rsp)) if (unlikely(!rsp))
return NULL; return NULL;
ret = nvmet_rdma_alloc_rsp(queue->dev, rsp); ret = nvmet_rdma_alloc_rsp(queue->dev, rsp,
NVMET_RDMA_DISCRETE_RSP_TAG);
if (unlikely(ret)) { if (unlikely(ret)) {
kfree(rsp); kfree(rsp);
return NULL; return NULL;
} }
rsp->allocated = true;
} }
return rsp; return rsp;
...@@ -241,17 +239,13 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue) ...@@ -241,17 +239,13 @@ nvmet_rdma_get_rsp(struct nvmet_rdma_queue *queue)
static inline void static inline void
nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp) nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp)
{ {
unsigned long flags; if (unlikely(rsp->tag == NVMET_RDMA_DISCRETE_RSP_TAG)) {
if (unlikely(rsp->allocated)) {
nvmet_rdma_free_rsp(rsp->queue->dev, rsp); nvmet_rdma_free_rsp(rsp->queue->dev, rsp);
kfree(rsp); kfree(rsp);
return; return;
} }
spin_lock_irqsave(&rsp->queue->rsps_lock, flags); sbitmap_clear_bit(&rsp->queue->rsp_tags, rsp->tag);
list_add_tail(&rsp->free_list, &rsp->queue->free_rsps);
spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags);
} }
static void nvmet_rdma_free_inline_pages(struct nvmet_rdma_device *ndev, static void nvmet_rdma_free_inline_pages(struct nvmet_rdma_device *ndev,
...@@ -404,7 +398,7 @@ static void nvmet_rdma_free_cmds(struct nvmet_rdma_device *ndev, ...@@ -404,7 +398,7 @@ static void nvmet_rdma_free_cmds(struct nvmet_rdma_device *ndev,
} }
static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
struct nvmet_rdma_rsp *r) struct nvmet_rdma_rsp *r, int tag)
{ {
/* NVMe CQE / RDMA SEND */ /* NVMe CQE / RDMA SEND */
r->req.cqe = kmalloc(sizeof(*r->req.cqe), GFP_KERNEL); r->req.cqe = kmalloc(sizeof(*r->req.cqe), GFP_KERNEL);
...@@ -432,6 +426,7 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, ...@@ -432,6 +426,7 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
r->read_cqe.done = nvmet_rdma_read_data_done; r->read_cqe.done = nvmet_rdma_read_data_done;
/* Data Out / RDMA WRITE */ /* Data Out / RDMA WRITE */
r->write_cqe.done = nvmet_rdma_write_data_done; r->write_cqe.done = nvmet_rdma_write_data_done;
r->tag = tag;
return 0; return 0;
...@@ -454,21 +449,23 @@ nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue) ...@@ -454,21 +449,23 @@ nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue)
{ {
struct nvmet_rdma_device *ndev = queue->dev; struct nvmet_rdma_device *ndev = queue->dev;
int nr_rsps = queue->recv_queue_size * 2; int nr_rsps = queue->recv_queue_size * 2;
int ret = -EINVAL, i; int ret = -ENOMEM, i;
if (sbitmap_init_node(&queue->rsp_tags, nr_rsps, -1, GFP_KERNEL,
NUMA_NO_NODE, false, true))
goto out;
queue->rsps = kcalloc(nr_rsps, sizeof(struct nvmet_rdma_rsp), queue->rsps = kcalloc(nr_rsps, sizeof(struct nvmet_rdma_rsp),
GFP_KERNEL); GFP_KERNEL);
if (!queue->rsps) if (!queue->rsps)
goto out; goto out_free_sbitmap;
for (i = 0; i < nr_rsps; i++) { for (i = 0; i < nr_rsps; i++) {
struct nvmet_rdma_rsp *rsp = &queue->rsps[i]; struct nvmet_rdma_rsp *rsp = &queue->rsps[i];
ret = nvmet_rdma_alloc_rsp(ndev, rsp); ret = nvmet_rdma_alloc_rsp(ndev, rsp, i);
if (ret) if (ret)
goto out_free; goto out_free;
list_add_tail(&rsp->free_list, &queue->free_rsps);
} }
return 0; return 0;
...@@ -477,6 +474,8 @@ nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue) ...@@ -477,6 +474,8 @@ nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue)
while (--i >= 0) while (--i >= 0)
nvmet_rdma_free_rsp(ndev, &queue->rsps[i]); nvmet_rdma_free_rsp(ndev, &queue->rsps[i]);
kfree(queue->rsps); kfree(queue->rsps);
out_free_sbitmap:
sbitmap_free(&queue->rsp_tags);
out: out:
return ret; return ret;
} }
...@@ -489,6 +488,7 @@ static void nvmet_rdma_free_rsps(struct nvmet_rdma_queue *queue) ...@@ -489,6 +488,7 @@ static void nvmet_rdma_free_rsps(struct nvmet_rdma_queue *queue)
for (i = 0; i < nr_rsps; i++) for (i = 0; i < nr_rsps; i++)
nvmet_rdma_free_rsp(ndev, &queue->rsps[i]); nvmet_rdma_free_rsp(ndev, &queue->rsps[i]);
kfree(queue->rsps); kfree(queue->rsps);
sbitmap_free(&queue->rsp_tags);
} }
static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
...@@ -1447,8 +1447,6 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, ...@@ -1447,8 +1447,6 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
INIT_LIST_HEAD(&queue->rsp_wait_list); INIT_LIST_HEAD(&queue->rsp_wait_list);
INIT_LIST_HEAD(&queue->rsp_wr_wait_list); INIT_LIST_HEAD(&queue->rsp_wr_wait_list);
spin_lock_init(&queue->rsp_wr_wait_lock); spin_lock_init(&queue->rsp_wr_wait_lock);
INIT_LIST_HEAD(&queue->free_rsps);
spin_lock_init(&queue->rsps_lock);
INIT_LIST_HEAD(&queue->queue_list); INIT_LIST_HEAD(&queue->queue_list);
queue->idx = ida_alloc(&nvmet_rdma_queue_ida, GFP_KERNEL); queue->idx = ida_alloc(&nvmet_rdma_queue_ida, GFP_KERNEL);
......
...@@ -175,7 +175,13 @@ ...@@ -175,7 +175,13 @@
/* use ioctl encoding for uring command */ /* use ioctl encoding for uring command */
#define UBLK_F_CMD_IOCTL_ENCODE (1UL << 6) #define UBLK_F_CMD_IOCTL_ENCODE (1UL << 6)
/* Copy between request and user buffer by pread()/pwrite() */ /*
* Copy between request and user buffer by pread()/pwrite()
*
* Not available for UBLK_F_UNPRIVILEGED_DEV, otherwise userspace may
* deceive us by not filling request buffer, then kernel uninitialized
* data may be leaked.
*/
#define UBLK_F_USER_COPY (1UL << 7) #define UBLK_F_USER_COPY (1UL << 7)
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment