Commit 9d0281b5 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'block-6.3-2023-03-03' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - NVMe pull request via Christoph:
      - Don't access released socket during error recovery (Akinobu
        Mita)
      - Bring back auto-removal of deleted namespaces during sequential
        scan (Christoph Hellwig)
      - Fix an error code in nvme_auth_process_dhchap_challenge (Dan
        Carpenter)
      - Show well known discovery name (Daniel Wagner)
      - Add a missing endianess conversion in effects masking (Keith
        Busch)

 - Fix for a regression introduced in blk-rq-qos during init in this
   merge window (Breno)

 - Reorder a few fields in struct blk_mq_tag_set, eliminating a few
   holes and shrinking it (Christophe)

 - Remove redundant bdev_get_queue() NULL checks (Juhyung)

 - Add sed-opal single user mode support flag (Luca)

 - Remove SQE128 check in ublk as it isn't needed, saving some memory
   (Ming)

 - Op specific segment checking for cloned requests (Uday)

 - Exclusive open partition scan fixes (Yu)

 - Loop offset/size checking before assigning them in the device (Zhong)

 - Bio polling fixes (me)

* tag 'block-6.3-2023-03-03' of git://git.kernel.dk/linux:
  blk-mq: enforce op-specific segment limits in blk_insert_cloned_request
  nvme-fabrics: show well known discovery name
  nvme-tcp: don't access released socket during error recovery
  nvme-auth: fix an error code in nvme_auth_process_dhchap_challenge()
  nvme: bring back auto-removal of deleted namespaces during sequential scan
  blk-iocost: Pass gendisk to ioc_refresh_params
  nvme: fix sparse warning on effects masking
  block: be a bit more careful in checking for NULL bdev while polling
  block: clear bio->bi_bdev when putting a bio back in the cache
  loop: loop_set_status_from_info() check before assignment
  ublk: remove check IO_URING_F_SQE128 in ublk_ch_uring_cmd
  block: remove more NULL checks after bdev_get_queue()
  blk-mq: Reorder fields in 'struct blk_mq_tag_set'
  block: fix scan partition for exclusively open device again
  block: Revert "block: Do not reread partition table on exclusively open device"
  sed-opal: add support flag for SUM in status ioctl
parents 1bd1aee6 49d24398
......@@ -772,6 +772,7 @@ static inline void bio_put_percpu_cache(struct bio *bio)
if ((bio->bi_opf & REQ_POLLED) && !WARN_ON_ONCE(in_interrupt())) {
bio->bi_next = cache->free_list;
bio->bi_bdev = NULL;
cache->free_list = bio;
cache->nr++;
} else {
......
......@@ -858,10 +858,16 @@ EXPORT_SYMBOL(submit_bio);
*/
int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
blk_qc_t cookie = READ_ONCE(bio->bi_cookie);
struct block_device *bdev;
struct request_queue *q;
int ret = 0;
bdev = READ_ONCE(bio->bi_bdev);
if (!bdev)
return 0;
q = bdev_get_queue(bdev);
if (cookie == BLK_QC_T_NONE ||
!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
return 0;
......@@ -930,7 +936,7 @@ int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob,
*/
rcu_read_lock();
bio = READ_ONCE(kiocb->private);
if (bio && bio->bi_bdev)
if (bio)
ret = bio_poll(bio, iob, flags);
rcu_read_unlock();
......
......@@ -800,7 +800,11 @@ static void ioc_refresh_period_us(struct ioc *ioc)
ioc_refresh_margins(ioc);
}
static int ioc_autop_idx(struct ioc *ioc)
/*
* ioc->rqos.disk isn't initialized when this function is called from
* the init path.
*/
static int ioc_autop_idx(struct ioc *ioc, struct gendisk *disk)
{
int idx = ioc->autop_idx;
const struct ioc_params *p = &autop[idx];
......@@ -808,11 +812,11 @@ static int ioc_autop_idx(struct ioc *ioc)
u64 now_ns;
/* rotational? */
if (!blk_queue_nonrot(ioc->rqos.disk->queue))
if (!blk_queue_nonrot(disk->queue))
return AUTOP_HDD;
/* handle SATA SSDs w/ broken NCQ */
if (blk_queue_depth(ioc->rqos.disk->queue) == 1)
if (blk_queue_depth(disk->queue) == 1)
return AUTOP_SSD_QD1;
/* use one of the normal ssd sets */
......@@ -901,14 +905,19 @@ static void ioc_refresh_lcoefs(struct ioc *ioc)
&c[LCOEF_WPAGE], &c[LCOEF_WSEQIO], &c[LCOEF_WRANDIO]);
}
static bool ioc_refresh_params(struct ioc *ioc, bool force)
/*
* struct gendisk is required as an argument because ioc->rqos.disk
* is not properly initialized when called from the init path.
*/
static bool ioc_refresh_params_disk(struct ioc *ioc, bool force,
struct gendisk *disk)
{
const struct ioc_params *p;
int idx;
lockdep_assert_held(&ioc->lock);
idx = ioc_autop_idx(ioc);
idx = ioc_autop_idx(ioc, disk);
p = &autop[idx];
if (idx == ioc->autop_idx && !force)
......@@ -939,6 +948,11 @@ static bool ioc_refresh_params(struct ioc *ioc, bool force)
return true;
}
static bool ioc_refresh_params(struct ioc *ioc, bool force)
{
return ioc_refresh_params_disk(ioc, force, ioc->rqos.disk);
}
/*
* When an iocg accumulates too much vtime or gets deactivated, we throw away
* some vtime, which lowers the overall device utilization. As the exact amount
......@@ -2880,7 +2894,7 @@ static int blk_iocost_init(struct gendisk *disk)
spin_lock_irq(&ioc->lock);
ioc->autop_idx = AUTOP_INVALID;
ioc_refresh_params(ioc, true);
ioc_refresh_params_disk(ioc, true, disk);
spin_unlock_irq(&ioc->lock);
/*
......
......@@ -587,13 +587,6 @@ int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
}
EXPORT_SYMBOL(__blk_rq_map_sg);
static inline unsigned int blk_rq_get_max_segments(struct request *rq)
{
if (req_op(rq) == REQ_OP_DISCARD)
return queue_max_discard_segments(rq->q);
return queue_max_segments(rq->q);
}
static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
sector_t offset)
{
......
......@@ -3000,6 +3000,7 @@ blk_status_t blk_insert_cloned_request(struct request *rq)
{
struct request_queue *q = rq->q;
unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
unsigned int max_segments = blk_rq_get_max_segments(rq);
blk_status_t ret;
if (blk_rq_sectors(rq) > max_sectors) {
......@@ -3026,9 +3027,9 @@ blk_status_t blk_insert_cloned_request(struct request *rq)
* original queue.
*/
rq->nr_phys_segments = blk_recalc_rq_segments(rq);
if (rq->nr_phys_segments > queue_max_segments(q)) {
printk(KERN_ERR "%s: over max segments limit. (%hu > %hu)\n",
__func__, rq->nr_phys_segments, queue_max_segments(q));
if (rq->nr_phys_segments > max_segments) {
printk(KERN_ERR "%s: over max segments limit. (%u > %u)\n",
__func__, rq->nr_phys_segments, max_segments);
return BLK_STS_IOERR;
}
......
......@@ -334,17 +334,12 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
{
void __user *argp = (void __user *)arg;
struct zone_report_args args;
struct request_queue *q;
struct blk_zone_report rep;
int ret;
if (!argp)
return -EINVAL;
q = bdev_get_queue(bdev);
if (!q)
return -ENXIO;
if (!bdev_is_zoned(bdev))
return -ENOTTY;
......@@ -391,7 +386,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
void __user *argp = (void __user *)arg;
struct request_queue *q;
struct blk_zone_range zrange;
enum req_op op;
int ret;
......@@ -399,10 +393,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
if (!argp)
return -EINVAL;
q = bdev_get_queue(bdev);
if (!q)
return -ENXIO;
if (!bdev_is_zoned(bdev))
return -ENOTTY;
......
......@@ -156,6 +156,13 @@ static inline bool blk_discard_mergable(struct request *req)
return false;
}
static inline unsigned int blk_rq_get_max_segments(struct request *rq)
{
if (req_op(rq) == REQ_OP_DISCARD)
return queue_max_discard_segments(rq->q);
return queue_max_segments(rq->q);
}
static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
enum req_op op)
{
......@@ -427,7 +434,7 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct request_queue *blk_alloc_queue(int node_id);
int disk_scan_partitions(struct gendisk *disk, fmode_t mode, void *owner);
int disk_scan_partitions(struct gendisk *disk, fmode_t mode);
int disk_alloc_events(struct gendisk *disk);
void disk_add_events(struct gendisk *disk);
......
......@@ -356,9 +356,10 @@ void disk_uevent(struct gendisk *disk, enum kobject_action action)
}
EXPORT_SYMBOL_GPL(disk_uevent);
int disk_scan_partitions(struct gendisk *disk, fmode_t mode, void *owner)
int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
{
struct block_device *bdev;
int ret = 0;
if (disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN))
return -EINVAL;
......@@ -366,16 +367,29 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode, void *owner)
return -EINVAL;
if (disk->open_partitions)
return -EBUSY;
/* Someone else has bdev exclusively open? */
if (disk->part0->bd_holder && disk->part0->bd_holder != owner)
return -EBUSY;
set_bit(GD_NEED_PART_SCAN, &disk->state);
bdev = blkdev_get_by_dev(disk_devt(disk), mode, NULL);
/*
* If the device is opened exclusively by current thread already, it's
* safe to scan partitons, otherwise, use bd_prepare_to_claim() to
* synchronize with other exclusive openers and other partition
* scanners.
*/
if (!(mode & FMODE_EXCL)) {
ret = bd_prepare_to_claim(disk->part0, disk_scan_partitions);
if (ret)
return ret;
}
bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~FMODE_EXCL, NULL);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
ret = PTR_ERR(bdev);
else
blkdev_put(bdev, mode);
return 0;
if (!(mode & FMODE_EXCL))
bd_abort_claiming(disk->part0, disk_scan_partitions);
return ret;
}
/**
......@@ -497,9 +511,14 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
if (ret)
goto out_unregister_bdi;
/* Make sure the first partition scan will be proceed */
if (get_capacity(disk) && !(disk->flags & GENHD_FL_NO_PART) &&
!test_bit(GD_SUPPRESS_PART_SCAN, &disk->state))
set_bit(GD_NEED_PART_SCAN, &disk->state);
bdev_add(disk->part0, ddev->devt);
if (get_capacity(disk))
disk_scan_partitions(disk, FMODE_READ, NULL);
disk_scan_partitions(disk, FMODE_READ);
/*
* Announce the disk and partitions after all partitions are
......
......@@ -467,10 +467,10 @@ static int blkdev_bszset(struct block_device *bdev, fmode_t mode,
* user space. Note the separate arg/argp parameters that are needed
* to deal with the compat_ptr() conversion.
*/
static int blkdev_common_ioctl(struct file *file, fmode_t mode, unsigned cmd,
unsigned long arg, void __user *argp)
static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg,
void __user *argp)
{
struct block_device *bdev = I_BDEV(file->f_mapping->host);
unsigned int max_sectors;
switch (cmd) {
......@@ -528,8 +528,7 @@ static int blkdev_common_ioctl(struct file *file, fmode_t mode, unsigned cmd,
return -EACCES;
if (bdev_is_partition(bdev))
return -EINVAL;
return disk_scan_partitions(bdev->bd_disk, mode & ~FMODE_EXCL,
file);
return disk_scan_partitions(bdev->bd_disk, mode);
case BLKTRACESTART:
case BLKTRACESTOP:
case BLKTRACETEARDOWN:
......@@ -607,7 +606,7 @@ long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
break;
}
ret = blkdev_common_ioctl(file, mode, cmd, arg, argp);
ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp);
if (ret != -ENOIOCTLCMD)
return ret;
......@@ -676,7 +675,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
break;
}
ret = blkdev_common_ioctl(file, mode, cmd, arg, argp);
ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp);
if (ret == -ENOIOCTLCMD && disk->fops->compat_ioctl)
ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg);
......
......@@ -487,6 +487,8 @@ static int opal_discovery0_end(struct opal_dev *dev)
break;
case FC_SINGLEUSER:
single_user = check_sum(body->features);
if (single_user)
dev->flags |= OPAL_FL_SUM_SUPPORTED;
break;
case FC_GEOMETRY:
check_geometry(dev, body);
......
......@@ -977,13 +977,13 @@ loop_set_status_from_info(struct loop_device *lo,
return -EINVAL;
}
/* Avoid assigning overflow values */
if (info->lo_offset > LLONG_MAX || info->lo_sizelimit > LLONG_MAX)
return -EOVERFLOW;
lo->lo_offset = info->lo_offset;
lo->lo_sizelimit = info->lo_sizelimit;
/* loff_t vars have been assigned __u64 */
if (lo->lo_offset < 0 || lo->lo_sizelimit < 0)
return -EOVERFLOW;
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
lo->lo_file_name[LO_NAME_SIZE-1] = 0;
lo->lo_flags = info->lo_flags;
......
......@@ -1271,9 +1271,6 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
__func__, cmd->cmd_op, ub_cmd->q_id, tag,
ub_cmd->result);
if (!(issue_flags & IO_URING_F_SQE128))
goto out;
if (ub_cmd->q_id >= ub->dev_info.nr_hw_queues)
goto out;
......
......@@ -256,7 +256,7 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl,
chap->qid, ret, gid_name);
chap->status = NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE;
chap->dh_tfm = NULL;
return -ret;
return ret;
}
dev_dbg(ctrl->device, "qid %d: selected DH group %s\n",
chap->qid, gid_name);
......
......@@ -38,6 +38,7 @@ struct nvme_ns_info {
bool is_shared;
bool is_readonly;
bool is_ready;
bool is_removed;
};
unsigned int admin_timeout = 60;
......@@ -1402,16 +1403,8 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
error = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id));
if (error) {
dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error);
goto out_free_id;
}
error = NVME_SC_INVALID_NS | NVME_SC_DNR;
if ((*id)->ncap == 0) /* namespace not allocated or attached */
goto out_free_id;
return 0;
out_free_id:
kfree(*id);
}
return error;
}
......@@ -1425,6 +1418,13 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl,
ret = nvme_identify_ns(ctrl, info->nsid, &id);
if (ret)
return ret;
if (id->ncap == 0) {
/* namespace not allocated or attached */
info->is_removed = true;
return -ENODEV;
}
info->anagrpid = id->anagrpid;
info->is_shared = id->nmic & NVME_NS_NMIC_SHARED;
info->is_readonly = id->nsattr & NVME_NS_ATTR_RO;
......@@ -3104,7 +3104,7 @@ static void nvme_init_known_nvm_effects(struct nvme_ctrl *ctrl)
* Rather than blindly freezing the IO queues for this effect that
* doesn't even apply to IO, mask it off.
*/
log->acs[nvme_admin_security_recv] &= ~NVME_CMD_EFFECTS_CSE_MASK;
log->acs[nvme_admin_security_recv] &= cpu_to_le32(~NVME_CMD_EFFECTS_CSE_MASK);
log->iocs[nvme_cmd_write] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC);
log->iocs[nvme_cmd_write_zeroes] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC);
......@@ -4429,6 +4429,7 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns_info info = { .nsid = nsid };
struct nvme_ns *ns;
int ret;
if (nvme_identify_ns_descs(ctrl, &info))
return;
......@@ -4445,19 +4446,19 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
* set up a namespace. If not fall back to the legacy version.
*/
if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) ||
(info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS)) {
if (nvme_ns_info_from_id_cs_indep(ctrl, &info))
return;
} else {
if (nvme_ns_info_from_identify(ctrl, &info))
return;
}
(info.ids.csi != NVME_CSI_NVM && info.ids.csi != NVME_CSI_ZNS))
ret = nvme_ns_info_from_id_cs_indep(ctrl, &info);
else
ret = nvme_ns_info_from_identify(ctrl, &info);
if (info.is_removed)
nvme_ns_remove_by_nsid(ctrl, nsid);
/*
* Ignore the namespace if it is not ready. We will get an AEN once it
* becomes ready and restart the scan.
*/
if (!info.is_ready)
if (ret || !info.is_ready)
return;
ns = nvme_find_get_ns(ctrl, nsid);
......
......@@ -189,7 +189,8 @@ nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl,
static inline char *nvmf_ctrl_subsysnqn(struct nvme_ctrl *ctrl)
{
if (!ctrl->subsys)
if (!ctrl->subsys ||
!strcmp(ctrl->opts->subsysnqn, NVME_DISC_SUBSYS_NAME))
return ctrl->opts->subsysnqn;
return ctrl->subsys->subnqn;
}
......
......@@ -2492,6 +2492,10 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
len = nvmf_get_address(ctrl, buf, size);
mutex_lock(&queue->queue_lock);
if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
goto done;
ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr);
if (ret > 0) {
if (len > 0)
......@@ -2499,6 +2503,8 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
len += scnprintf(buf + len, size - len, "%ssrc_addr=%pISc\n",
(len) ? "," : "", &src_addr);
}
done:
mutex_unlock(&queue->queue_lock);
return len;
}
......
......@@ -473,6 +473,7 @@ enum hctx_type {
/**
* struct blk_mq_tag_set - tag set that can be shared between request queues
* @ops: Pointers to functions that implement block driver behavior.
* @map: One or more ctx -> hctx mappings. One map exists for each
* hardware queue type (enum hctx_type) that the driver wishes
* to support. There are no restrictions on maps being of the
......@@ -480,7 +481,6 @@ enum hctx_type {
* types.
* @nr_maps: Number of elements in the @map array. A number in the range
* [1, HCTX_MAX_TYPES].
* @ops: Pointers to functions that implement block driver behavior.
* @nr_hw_queues: Number of hardware queues supported by the block driver that
* owns this data structure.
* @queue_depth: Number of tags per hardware queue, reserved tags included.
......@@ -505,9 +505,9 @@ enum hctx_type {
* (BLK_MQ_F_BLOCKING).
*/
struct blk_mq_tag_set {
const struct blk_mq_ops *ops;
struct blk_mq_queue_map map[HCTX_MAX_TYPES];
unsigned int nr_maps;
const struct blk_mq_ops *ops;
unsigned int nr_hw_queues;
unsigned int queue_depth;
unsigned int reserved_tags;
......
......@@ -1283,12 +1283,7 @@ static inline bool bdev_nowait(struct block_device *bdev)
static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
if (q)
return blk_queue_zoned_model(q);
return BLK_ZONED_NONE;
return blk_queue_zoned_model(bdev_get_queue(bdev));
}
static inline bool bdev_is_zoned(struct block_device *bdev)
......
......@@ -144,6 +144,7 @@ struct opal_read_write_table {
#define OPAL_FL_LOCKED 0x00000008
#define OPAL_FL_MBR_ENABLED 0x00000010
#define OPAL_FL_MBR_DONE 0x00000020
#define OPAL_FL_SUM_SUPPORTED 0x00000040
struct opal_status {
__u32 flags;
......
......@@ -729,14 +729,10 @@ EXPORT_SYMBOL_GPL(blk_trace_startstop);
**/
int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
{
struct request_queue *q;
struct request_queue *q = bdev_get_queue(bdev);
int ret, start = 0;
char b[BDEVNAME_SIZE];
q = bdev_get_queue(bdev);
if (!q)
return -ENXIO;
mutex_lock(&q->debugfs_mutex);
switch (cmd) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment