Commit 360e6942 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'block-6.5-2023-08-11' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - NVMe pull request via Keith:
      - Fixes for request_queue state (Ming)
      - Another uuid quirk (August)

 - RCU poll fix for NVMe (Ming)

 - Fix for an IO stall with polled IO (me)

 - Fix for blk-iocost stats enable/disable accounting (Chengming)

 - Regression fix for large pages for zram (Christoph)

* tag 'block-6.5-2023-08-11' of git://git.kernel.dk/linux:
  nvme: core: don't hold rcu read lock in nvme_ns_chr_uring_cmd_iopoll
  blk-iocost: fix queue stats accounting
  block: don't make REQ_POLLED imply REQ_NOWAIT
  block: get rid of unused plug->nowait flag
  zram: take device and not only bvec offset into account
  nvme-pci: add NVME_QUIRK_BOGUS_NID for Samsung PM9B1 256G and 512G
  nvme-rdma: fix potential unbalanced freeze & unfreeze
  nvme-tcp: fix potential unbalanced freeze & unfreeze
  nvme: fix possible hang when removing a controller during error recovery
parents 2e40ed24 a7a7dabb
...@@ -722,14 +722,9 @@ void submit_bio_noacct(struct bio *bio) ...@@ -722,14 +722,9 @@ void submit_bio_noacct(struct bio *bio)
struct block_device *bdev = bio->bi_bdev; struct block_device *bdev = bio->bi_bdev;
struct request_queue *q = bdev_get_queue(bdev); struct request_queue *q = bdev_get_queue(bdev);
blk_status_t status = BLK_STS_IOERR; blk_status_t status = BLK_STS_IOERR;
struct blk_plug *plug;
might_sleep(); might_sleep();
plug = blk_mq_plug(bio);
if (plug && plug->nowait)
bio->bi_opf |= REQ_NOWAIT;
/* /*
* For a REQ_NOWAIT based request, return -EOPNOTSUPP * For a REQ_NOWAIT based request, return -EOPNOTSUPP
* if queue does not support NOWAIT. * if queue does not support NOWAIT.
...@@ -1059,7 +1054,6 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios) ...@@ -1059,7 +1054,6 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
plug->rq_count = 0; plug->rq_count = 0;
plug->multiple_queues = false; plug->multiple_queues = false;
plug->has_elevator = false; plug->has_elevator = false;
plug->nowait = false;
INIT_LIST_HEAD(&plug->cb_list); INIT_LIST_HEAD(&plug->cb_list);
/* /*
......
...@@ -3301,11 +3301,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, ...@@ -3301,11 +3301,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
if (qos[QOS_MIN] > qos[QOS_MAX]) if (qos[QOS_MIN] > qos[QOS_MAX])
goto einval; goto einval;
if (enable) { if (enable && !ioc->enabled) {
blk_stat_enable_accounting(disk->queue); blk_stat_enable_accounting(disk->queue);
blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
ioc->enabled = true; ioc->enabled = true;
} else { } else if (!enable && ioc->enabled) {
blk_stat_disable_accounting(disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
ioc->enabled = false; ioc->enabled = false;
} }
......
...@@ -358,13 +358,14 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, ...@@ -358,13 +358,14 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
task_io_account_write(bio->bi_iter.bi_size); task_io_account_write(bio->bi_iter.bi_size);
} }
if (iocb->ki_flags & IOCB_NOWAIT)
bio->bi_opf |= REQ_NOWAIT;
if (iocb->ki_flags & IOCB_HIPRI) { if (iocb->ki_flags & IOCB_HIPRI) {
bio->bi_opf |= REQ_POLLED | REQ_NOWAIT; bio->bi_opf |= REQ_POLLED;
submit_bio(bio); submit_bio(bio);
WRITE_ONCE(iocb->private, bio); WRITE_ONCE(iocb->private, bio);
} else { } else {
if (iocb->ki_flags & IOCB_NOWAIT)
bio->bi_opf |= REQ_NOWAIT;
submit_bio(bio); submit_bio(bio);
} }
return -EIOCBQUEUED; return -EIOCBQUEUED;
......
...@@ -1870,15 +1870,16 @@ static void zram_bio_discard(struct zram *zram, struct bio *bio) ...@@ -1870,15 +1870,16 @@ static void zram_bio_discard(struct zram *zram, struct bio *bio)
static void zram_bio_read(struct zram *zram, struct bio *bio) static void zram_bio_read(struct zram *zram, struct bio *bio)
{ {
struct bvec_iter iter; unsigned long start_time = bio_start_io_acct(bio);
struct bio_vec bv; struct bvec_iter iter = bio->bi_iter;
unsigned long start_time;
start_time = bio_start_io_acct(bio); do {
bio_for_each_segment(bv, bio, iter) {
u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
SECTOR_SHIFT; SECTOR_SHIFT;
struct bio_vec bv = bio_iter_iovec(bio, iter);
bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) { if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) {
atomic64_inc(&zram->stats.failed_reads); atomic64_inc(&zram->stats.failed_reads);
...@@ -1890,22 +1891,26 @@ static void zram_bio_read(struct zram *zram, struct bio *bio) ...@@ -1890,22 +1891,26 @@ static void zram_bio_read(struct zram *zram, struct bio *bio)
zram_slot_lock(zram, index); zram_slot_lock(zram, index);
zram_accessed(zram, index); zram_accessed(zram, index);
zram_slot_unlock(zram, index); zram_slot_unlock(zram, index);
}
bio_advance_iter_single(bio, &iter, bv.bv_len);
} while (iter.bi_size);
bio_end_io_acct(bio, start_time); bio_end_io_acct(bio, start_time);
bio_endio(bio); bio_endio(bio);
} }
static void zram_bio_write(struct zram *zram, struct bio *bio) static void zram_bio_write(struct zram *zram, struct bio *bio)
{ {
struct bvec_iter iter; unsigned long start_time = bio_start_io_acct(bio);
struct bio_vec bv; struct bvec_iter iter = bio->bi_iter;
unsigned long start_time;
start_time = bio_start_io_acct(bio); do {
bio_for_each_segment(bv, bio, iter) {
u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
SECTOR_SHIFT; SECTOR_SHIFT;
struct bio_vec bv = bio_iter_iovec(bio, iter);
bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) { if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) {
atomic64_inc(&zram->stats.failed_writes); atomic64_inc(&zram->stats.failed_writes);
...@@ -1916,7 +1921,10 @@ static void zram_bio_write(struct zram *zram, struct bio *bio) ...@@ -1916,7 +1921,10 @@ static void zram_bio_write(struct zram *zram, struct bio *bio)
zram_slot_lock(zram, index); zram_slot_lock(zram, index);
zram_accessed(zram, index); zram_accessed(zram, index);
zram_slot_unlock(zram, index); zram_slot_unlock(zram, index);
}
bio_advance_iter_single(bio, &iter, bv.bv_len);
} while (iter.bi_size);
bio_end_io_acct(bio, start_time); bio_end_io_acct(bio, start_time);
bio_endio(bio); bio_endio(bio);
} }
......
...@@ -3933,6 +3933,12 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) ...@@ -3933,6 +3933,12 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
*/ */
nvme_mpath_clear_ctrl_paths(ctrl); nvme_mpath_clear_ctrl_paths(ctrl);
/*
* Unquiesce io queues so any pending IO won't hang, especially
* those submitted from scan work
*/
nvme_unquiesce_io_queues(ctrl);
/* prevent racing with ns scanning */ /* prevent racing with ns scanning */
flush_work(&ctrl->scan_work); flush_work(&ctrl->scan_work);
...@@ -3942,10 +3948,8 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) ...@@ -3942,10 +3948,8 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
* removing the namespaces' disks; fail all the queues now to avoid * removing the namespaces' disks; fail all the queues now to avoid
* potentially having to clean up the failed sync later. * potentially having to clean up the failed sync later.
*/ */
if (ctrl->state == NVME_CTRL_DEAD) { if (ctrl->state == NVME_CTRL_DEAD)
nvme_mark_namespaces_dead(ctrl); nvme_mark_namespaces_dead(ctrl);
nvme_unquiesce_io_queues(ctrl);
}
/* this is a no-op when called from the controller reset handler */ /* this is a no-op when called from the controller reset handler */
nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO); nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO);
......
...@@ -786,11 +786,9 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd, ...@@ -786,11 +786,9 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
if (!(ioucmd->flags & IORING_URING_CMD_POLLED)) if (!(ioucmd->flags & IORING_URING_CMD_POLLED))
return 0; return 0;
rcu_read_lock();
req = READ_ONCE(ioucmd->cookie); req = READ_ONCE(ioucmd->cookie);
if (req && blk_rq_is_poll(req)) if (req && blk_rq_is_poll(req))
ret = blk_rq_poll(req, iob, poll_flags); ret = blk_rq_poll(req, iob, poll_flags);
rcu_read_unlock();
return ret; return ret;
} }
#ifdef CONFIG_NVME_MULTIPATH #ifdef CONFIG_NVME_MULTIPATH
......
...@@ -3402,7 +3402,8 @@ static const struct pci_device_id nvme_id_table[] = { ...@@ -3402,7 +3402,8 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_DEVICE(0x1d97, 0x2263), /* SPCC */ { PCI_DEVICE(0x1d97, 0x2263), /* SPCC */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x144d, 0xa80b), /* Samsung PM9B1 256G and 512G */ { PCI_DEVICE(0x144d, 0xa80b), /* Samsung PM9B1 256G and 512G */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES |
NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x144d, 0xa809), /* Samsung MZALQ256HBJD 256G */ { PCI_DEVICE(0x144d, 0xa809), /* Samsung MZALQ256HBJD 256G */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x144d, 0xa802), /* Samsung SM953 */ { PCI_DEVICE(0x144d, 0xa802), /* Samsung SM953 */
......
...@@ -883,6 +883,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) ...@@ -883,6 +883,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
goto out_cleanup_tagset; goto out_cleanup_tagset;
if (!new) { if (!new) {
nvme_start_freeze(&ctrl->ctrl);
nvme_unquiesce_io_queues(&ctrl->ctrl); nvme_unquiesce_io_queues(&ctrl->ctrl);
if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) { if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) {
/* /*
...@@ -891,6 +892,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) ...@@ -891,6 +892,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
* to be safe. * to be safe.
*/ */
ret = -ENODEV; ret = -ENODEV;
nvme_unfreeze(&ctrl->ctrl);
goto out_wait_freeze_timed_out; goto out_wait_freeze_timed_out;
} }
blk_mq_update_nr_hw_queues(ctrl->ctrl.tagset, blk_mq_update_nr_hw_queues(ctrl->ctrl.tagset,
...@@ -940,7 +942,6 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, ...@@ -940,7 +942,6 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
bool remove) bool remove)
{ {
if (ctrl->ctrl.queue_count > 1) { if (ctrl->ctrl.queue_count > 1) {
nvme_start_freeze(&ctrl->ctrl);
nvme_quiesce_io_queues(&ctrl->ctrl); nvme_quiesce_io_queues(&ctrl->ctrl);
nvme_sync_io_queues(&ctrl->ctrl); nvme_sync_io_queues(&ctrl->ctrl);
nvme_rdma_stop_io_queues(ctrl); nvme_rdma_stop_io_queues(ctrl);
......
...@@ -1868,6 +1868,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) ...@@ -1868,6 +1868,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
goto out_cleanup_connect_q; goto out_cleanup_connect_q;
if (!new) { if (!new) {
nvme_start_freeze(ctrl);
nvme_unquiesce_io_queues(ctrl); nvme_unquiesce_io_queues(ctrl);
if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) { if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) {
/* /*
...@@ -1876,6 +1877,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) ...@@ -1876,6 +1877,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
* to be safe. * to be safe.
*/ */
ret = -ENODEV; ret = -ENODEV;
nvme_unfreeze(ctrl);
goto out_wait_freeze_timed_out; goto out_wait_freeze_timed_out;
} }
blk_mq_update_nr_hw_queues(ctrl->tagset, blk_mq_update_nr_hw_queues(ctrl->tagset,
...@@ -1980,7 +1982,6 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, ...@@ -1980,7 +1982,6 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
if (ctrl->queue_count <= 1) if (ctrl->queue_count <= 1)
return; return;
nvme_quiesce_admin_queue(ctrl); nvme_quiesce_admin_queue(ctrl);
nvme_start_freeze(ctrl);
nvme_quiesce_io_queues(ctrl); nvme_quiesce_io_queues(ctrl);
nvme_sync_io_queues(ctrl); nvme_sync_io_queues(ctrl);
nvme_tcp_stop_io_queues(ctrl); nvme_tcp_stop_io_queues(ctrl);
......
...@@ -791,7 +791,7 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page, ...@@ -791,7 +791,7 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb)
{ {
bio->bi_opf |= REQ_POLLED; bio->bi_opf |= REQ_POLLED;
if (!is_sync_kiocb(kiocb)) if (kiocb->ki_flags & IOCB_NOWAIT)
bio->bi_opf |= REQ_NOWAIT; bio->bi_opf |= REQ_NOWAIT;
} }
......
...@@ -969,7 +969,6 @@ struct blk_plug { ...@@ -969,7 +969,6 @@ struct blk_plug {
bool multiple_queues; bool multiple_queues;
bool has_elevator; bool has_elevator;
bool nowait;
struct list_head cb_list; /* md requires an unplug callback */ struct list_head cb_list; /* md requires an unplug callback */
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment