Commit 0f7223a3 authored by Jens Axboe's avatar Jens Axboe

Merge tag 'nvme-6.9-2024-03-07' of git://git.infradead.org/nvme into for-6.9/block

Pull NVMe updates from Keith:

"nvme updates for Linux 6.9

 - RDMA target enhancements (Max)
 - Fabrics fixes (Max, Guixin, Hannes)
 - Atomic queue_limits usage (Christoph)
 - Const use for class_register (Ricardo)
 - Identification error handling fixes (Shin'ichiro, Keith)"

* tag 'nvme-6.9-2024-03-07' of git://git.infradead.org/nvme: (31 commits)
  nvme: clear caller pointer on identify failure
  nvme: host: fix double-free of struct nvme_id_ns in ns_update_nuse()
  nvme: fcloop: make fcloop_class constant
  nvme: fabrics: make nvmf_class constant
  nvme: core: constify struct class usage
  nvme-fabrics: typo in nvmf_parse_key()
  nvme-multipath: use atomic queue limits API for stacking limits
  nvme-multipath: pass queue_limits to blk_alloc_disk
  nvme: use the atomic queue limits update API
  nvme: cleanup nvme_configure_metadata
  nvme: don't query identify data in configure_metadata
  nvme: split out a nvme_identify_ns_nvm helper
  nvme: move common logic into nvme_update_ns_info
  nvme: move setting the write cache flags out of nvme_set_queue_limits
  nvme: move a few things out of nvme_update_disk_info
  nvme: don't use nvme_update_disk_info for the multipath disk
  nvme: move blk_integrity_unregister into nvme_init_integrity
  nvme: cleanup the nvme_init_integrity calling conventions
  nvme: move max_integrity_segments handling out of nvme_init_integrity
  nvme: remove nvme_revalidate_zones
  ...
parents d37977f0 7e80eb79
...@@ -114,12 +114,21 @@ static DEFINE_MUTEX(nvme_subsystems_lock); ...@@ -114,12 +114,21 @@ static DEFINE_MUTEX(nvme_subsystems_lock);
static DEFINE_IDA(nvme_instance_ida); static DEFINE_IDA(nvme_instance_ida);
static dev_t nvme_ctrl_base_chr_devt; static dev_t nvme_ctrl_base_chr_devt;
static struct class *nvme_class; static int nvme_class_uevent(const struct device *dev, struct kobj_uevent_env *env);
static struct class *nvme_subsys_class; static const struct class nvme_class = {
.name = "nvme",
.dev_uevent = nvme_class_uevent,
};
static const struct class nvme_subsys_class = {
.name = "nvme-subsystem",
};
static DEFINE_IDA(nvme_ns_chr_minor_ida); static DEFINE_IDA(nvme_ns_chr_minor_ida);
static dev_t nvme_ns_chr_devt; static dev_t nvme_ns_chr_devt;
static struct class *nvme_ns_chr_class; static const struct class nvme_ns_chr_class = {
.name = "nvme-generic",
};
static void nvme_put_subsystem(struct nvme_subsystem *subsys); static void nvme_put_subsystem(struct nvme_subsystem *subsys);
static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
...@@ -1394,8 +1403,10 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) ...@@ -1394,8 +1403,10 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
sizeof(struct nvme_id_ctrl)); sizeof(struct nvme_id_ctrl));
if (error) if (error) {
kfree(*id); kfree(*id);
*id = NULL;
}
return error; return error;
} }
...@@ -1524,6 +1535,7 @@ int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, ...@@ -1524,6 +1535,7 @@ int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
if (error) { if (error) {
dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error); dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error);
kfree(*id); kfree(*id);
*id = NULL;
} }
return error; return error;
} }
...@@ -1723,12 +1735,23 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) ...@@ -1723,12 +1735,23 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return 0; return 0;
} }
#ifdef CONFIG_BLK_DEV_INTEGRITY static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head)
static void nvme_init_integrity(struct gendisk *disk,
struct nvme_ns_head *head, u32 max_integrity_segments)
{ {
struct blk_integrity integrity = { }; struct blk_integrity integrity = { };
blk_integrity_unregister(disk);
if (!head->ms)
return true;
/*
* PI can always be supported as we can ask the controller to simply
* insert/strip it, which is not possible for other kinds of metadata.
*/
if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) ||
!(head->features & NVME_NS_METADATA_SUPPORTED))
return nvme_ns_has_pi(head);
switch (head->pi_type) { switch (head->pi_type) {
case NVME_NS_DPS_PI_TYPE3: case NVME_NS_DPS_PI_TYPE3:
switch (head->guard_type) { switch (head->guard_type) {
...@@ -1773,52 +1796,30 @@ static void nvme_init_integrity(struct gendisk *disk, ...@@ -1773,52 +1796,30 @@ static void nvme_init_integrity(struct gendisk *disk,
integrity.tuple_size = head->ms; integrity.tuple_size = head->ms;
integrity.pi_offset = head->pi_offset; integrity.pi_offset = head->pi_offset;
blk_integrity_register(disk, &integrity); blk_integrity_register(disk, &integrity);
blk_queue_max_integrity_segments(disk->queue, max_integrity_segments); return true;
}
#else
static void nvme_init_integrity(struct gendisk *disk,
struct nvme_ns_head *head, u32 max_integrity_segments)
{
} }
#endif /* CONFIG_BLK_DEV_INTEGRITY */
static void nvme_config_discard(struct nvme_ctrl *ctrl, struct gendisk *disk, static void nvme_config_discard(struct nvme_ns *ns, struct queue_limits *lim)
struct nvme_ns_head *head)
{ {
struct request_queue *queue = disk->queue; struct nvme_ctrl *ctrl = ns->ctrl;
u32 max_discard_sectors;
if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(head, UINT_MAX)) {
max_discard_sectors = nvme_lba_to_sect(head, ctrl->dmrsl);
} else if (ctrl->oncs & NVME_CTRL_ONCS_DSM) {
max_discard_sectors = UINT_MAX;
} else {
blk_queue_max_discard_sectors(queue, 0);
return;
}
BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) < BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
NVME_DSM_MAX_RANGES); NVME_DSM_MAX_RANGES);
/* if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns->head, UINT_MAX))
* If discard is already enabled, don't reset queue limits. lim->max_hw_discard_sectors =
* nvme_lba_to_sect(ns->head, ctrl->dmrsl);
* This works around the fact that the block layer can't cope well with else if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
* updating the hardware limits when overridden through sysfs. This is lim->max_hw_discard_sectors = UINT_MAX;
* harmless because discard limits in NVMe are purely advisory. else
*/ lim->max_hw_discard_sectors = 0;
if (queue->limits.max_discard_sectors)
return; lim->discard_granularity = lim->logical_block_size;
blk_queue_max_discard_sectors(queue, max_discard_sectors);
if (ctrl->dmrl) if (ctrl->dmrl)
blk_queue_max_discard_segments(queue, ctrl->dmrl); lim->max_discard_segments = ctrl->dmrl;
else else
blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES); lim->max_discard_segments = NVME_DSM_MAX_RANGES;
queue->limits.discard_granularity = queue_logical_block_size(queue);
if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
} }
static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
...@@ -1829,42 +1830,38 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) ...@@ -1829,42 +1830,38 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
a->csi == b->csi; a->csi == b->csi;
} }
static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, static int nvme_identify_ns_nvm(struct nvme_ctrl *ctrl, unsigned int nsid,
struct nvme_id_ns *id) struct nvme_id_ns_nvm **nvmp)
{ {
bool first = id->dps & NVME_NS_DPS_PI_FIRST; struct nvme_command c = {
unsigned lbaf = nvme_lbaf_index(id->flbas); .identify.opcode = nvme_admin_identify,
struct nvme_command c = { }; .identify.nsid = cpu_to_le32(nsid),
.identify.cns = NVME_ID_CNS_CS_NS,
.identify.csi = NVME_CSI_NVM,
};
struct nvme_id_ns_nvm *nvm; struct nvme_id_ns_nvm *nvm;
int ret = 0; int ret;
u32 elbaf;
head->pi_size = 0;
head->ms = le16_to_cpu(id->lbaf[lbaf].ms);
if (!(ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) {
head->pi_size = sizeof(struct t10_pi_tuple);
head->guard_type = NVME_NVM_NS_16B_GUARD;
goto set_pi;
}
nvm = kzalloc(sizeof(*nvm), GFP_KERNEL); nvm = kzalloc(sizeof(*nvm), GFP_KERNEL);
if (!nvm) if (!nvm)
return -ENOMEM; return -ENOMEM;
c.identify.opcode = nvme_admin_identify;
c.identify.nsid = cpu_to_le32(head->ns_id);
c.identify.cns = NVME_ID_CNS_CS_NS;
c.identify.csi = NVME_CSI_NVM;
ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, nvm, sizeof(*nvm)); ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, nvm, sizeof(*nvm));
if (ret) if (ret)
goto free_data; kfree(nvm);
else
*nvmp = nvm;
return ret;
}
elbaf = le32_to_cpu(nvm->elbaf[lbaf]); static void nvme_configure_pi_elbas(struct nvme_ns_head *head,
struct nvme_id_ns *id, struct nvme_id_ns_nvm *nvm)
{
u32 elbaf = le32_to_cpu(nvm->elbaf[nvme_lbaf_index(id->flbas)]);
/* no support for storage tag formats right now */ /* no support for storage tag formats right now */
if (nvme_elbaf_sts(elbaf)) if (nvme_elbaf_sts(elbaf))
goto free_data; return;
head->guard_type = nvme_elbaf_guard_type(elbaf); head->guard_type = nvme_elbaf_guard_type(elbaf);
switch (head->guard_type) { switch (head->guard_type) {
...@@ -1877,35 +1874,31 @@ static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, ...@@ -1877,35 +1874,31 @@ static int nvme_init_ms(struct nvme_ctrl *ctrl, struct nvme_ns_head *head,
default: default:
break; break;
} }
free_data:
kfree(nvm);
set_pi:
if (head->pi_size && head->ms >= head->pi_size)
head->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
else
head->pi_type = 0;
if (first)
head->pi_offset = 0;
else
head->pi_offset = head->ms - head->pi_size;
return ret;
} }
static int nvme_configure_metadata(struct nvme_ctrl *ctrl, static void nvme_configure_metadata(struct nvme_ctrl *ctrl,
struct nvme_ns_head *head, struct nvme_id_ns *id) struct nvme_ns_head *head, struct nvme_id_ns *id,
struct nvme_id_ns_nvm *nvm)
{ {
int ret;
ret = nvme_init_ms(ctrl, head, id);
if (ret)
return ret;
head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS);
head->pi_type = 0;
head->pi_size = 0;
head->pi_offset = 0;
head->ms = le16_to_cpu(id->lbaf[nvme_lbaf_index(id->flbas)].ms);
if (!head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) if (!head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
return 0; return;
if (nvm && (ctrl->ctratt & NVME_CTRL_ATTR_ELBAS)) {
nvme_configure_pi_elbas(head, id, nvm);
} else {
head->pi_size = sizeof(struct t10_pi_tuple);
head->guard_type = NVME_NVM_NS_16B_GUARD;
}
if (head->pi_size && head->ms >= head->pi_size)
head->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
if (!(id->dps & NVME_NS_DPS_PI_FIRST))
head->pi_offset = head->ms - head->pi_size;
if (ctrl->ops->flags & NVME_F_FABRICS) { if (ctrl->ops->flags & NVME_F_FABRICS) {
/* /*
...@@ -1914,7 +1907,7 @@ static int nvme_configure_metadata(struct nvme_ctrl *ctrl, ...@@ -1914,7 +1907,7 @@ static int nvme_configure_metadata(struct nvme_ctrl *ctrl,
* remap the separate metadata buffer from the block layer. * remap the separate metadata buffer from the block layer.
*/ */
if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT))) if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT)))
return 0; return;
head->features |= NVME_NS_EXT_LBAS; head->features |= NVME_NS_EXT_LBAS;
...@@ -1941,33 +1934,32 @@ static int nvme_configure_metadata(struct nvme_ctrl *ctrl, ...@@ -1941,33 +1934,32 @@ static int nvme_configure_metadata(struct nvme_ctrl *ctrl,
else else
head->features |= NVME_NS_METADATA_SUPPORTED; head->features |= NVME_NS_METADATA_SUPPORTED;
} }
return 0;
} }
static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, static u32 nvme_max_drv_segments(struct nvme_ctrl *ctrl)
struct request_queue *q)
{ {
bool vwc = ctrl->vwc & NVME_CTRL_VWC_PRESENT; return ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> SECTOR_SHIFT) + 1;
}
if (ctrl->max_hw_sectors) {
u32 max_segments =
(ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> 9)) + 1;
max_segments = min_not_zero(max_segments, ctrl->max_segments); static void nvme_set_ctrl_limits(struct nvme_ctrl *ctrl,
blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); struct queue_limits *lim)
blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); {
} lim->max_hw_sectors = ctrl->max_hw_sectors;
blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1); lim->max_segments = min_t(u32, USHRT_MAX,
blk_queue_dma_alignment(q, 3); min_not_zero(nvme_max_drv_segments(ctrl), ctrl->max_segments));
blk_queue_write_cache(q, vwc, vwc); lim->max_integrity_segments = ctrl->max_integrity_segments;
lim->virt_boundary_mask = NVME_CTRL_PAGE_SIZE - 1;
lim->max_segment_size = UINT_MAX;
lim->dma_alignment = 3;
} }
static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
struct nvme_ns_head *head, struct nvme_id_ns *id) struct queue_limits *lim)
{ {
sector_t capacity = nvme_lba_to_sect(head, le64_to_cpu(id->nsze)); struct nvme_ns_head *head = ns->head;
u32 bs = 1U << head->lba_shift; u32 bs = 1U << head->lba_shift;
u32 atomic_bs, phys_bs, io_opt = 0; u32 atomic_bs, phys_bs, io_opt = 0;
bool valid = true;
/* /*
* The block layer can't support LBA sizes larger than the page size * The block layer can't support LBA sizes larger than the page size
...@@ -1975,12 +1967,10 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, ...@@ -1975,12 +1967,10 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk,
* allow block I/O. * allow block I/O.
*/ */
if (head->lba_shift > PAGE_SHIFT || head->lba_shift < SECTOR_SHIFT) { if (head->lba_shift > PAGE_SHIFT || head->lba_shift < SECTOR_SHIFT) {
capacity = 0;
bs = (1 << 9); bs = (1 << 9);
valid = false;
} }
blk_integrity_unregister(disk);
atomic_bs = phys_bs = bs; atomic_bs = phys_bs = bs;
if (id->nabo == 0) { if (id->nabo == 0) {
/* /*
...@@ -1991,7 +1981,7 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, ...@@ -1991,7 +1981,7 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk,
if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf)
atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
else else
atomic_bs = (1 + ctrl->subsys->awupf) * bs; atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
} }
if (id->nsfeat & NVME_NS_FEAT_IO_OPT) { if (id->nsfeat & NVME_NS_FEAT_IO_OPT) {
...@@ -2001,36 +1991,20 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk, ...@@ -2001,36 +1991,20 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk,
io_opt = bs * (1 + le16_to_cpu(id->nows)); io_opt = bs * (1 + le16_to_cpu(id->nows));
} }
blk_queue_logical_block_size(disk->queue, bs);
/* /*
* Linux filesystems assume writing a single physical block is * Linux filesystems assume writing a single physical block is
* an atomic operation. Hence limit the physical block size to the * an atomic operation. Hence limit the physical block size to the
* value of the Atomic Write Unit Power Fail parameter. * value of the Atomic Write Unit Power Fail parameter.
*/ */
blk_queue_physical_block_size(disk->queue, min(phys_bs, atomic_bs)); lim->logical_block_size = bs;
blk_queue_io_min(disk->queue, phys_bs); lim->physical_block_size = min(phys_bs, atomic_bs);
blk_queue_io_opt(disk->queue, io_opt); lim->io_min = phys_bs;
lim->io_opt = io_opt;
/* if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
* Register a metadata profile for PI, or the plain non-integrity NVMe lim->max_write_zeroes_sectors = UINT_MAX;
* metadata masquerading as Type 0 if supported, otherwise reject block else
* I/O to namespaces with metadata except when the namespace supports lim->max_write_zeroes_sectors = ns->ctrl->max_zeroes_sectors;
* PI, as it can strip/insert in that case. return valid;
*/
if (head->ms) {
if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
(head->features & NVME_NS_METADATA_SUPPORTED))
nvme_init_integrity(disk, head,
ctrl->max_integrity_segments);
else if (!nvme_ns_has_pi(head))
capacity = 0;
}
set_capacity_and_notify(disk, capacity);
nvme_config_discard(ctrl, disk, head);
blk_queue_max_write_zeroes_sectors(disk->queue,
ctrl->max_zeroes_sectors);
} }
static bool nvme_ns_is_readonly(struct nvme_ns *ns, struct nvme_ns_info *info) static bool nvme_ns_is_readonly(struct nvme_ns *ns, struct nvme_ns_info *info)
...@@ -2044,7 +2018,8 @@ static inline bool nvme_first_scan(struct gendisk *disk) ...@@ -2044,7 +2018,8 @@ static inline bool nvme_first_scan(struct gendisk *disk)
return !disk_live(disk); return !disk_live(disk);
} }
static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id) static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id,
struct queue_limits *lim)
{ {
struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_ctrl *ctrl = ns->ctrl;
u32 iob; u32 iob;
...@@ -2072,38 +2047,36 @@ static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id) ...@@ -2072,38 +2047,36 @@ static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id)
return; return;
} }
blk_queue_chunk_sectors(ns->queue, iob); lim->chunk_sectors = iob;
} }
static int nvme_update_ns_info_generic(struct nvme_ns *ns, static int nvme_update_ns_info_generic(struct nvme_ns *ns,
struct nvme_ns_info *info) struct nvme_ns_info *info)
{ {
struct queue_limits lim;
int ret;
blk_mq_freeze_queue(ns->disk->queue); blk_mq_freeze_queue(ns->disk->queue);
nvme_set_queue_limits(ns->ctrl, ns->queue); lim = queue_limits_start_update(ns->disk->queue);
nvme_set_ctrl_limits(ns->ctrl, &lim);
ret = queue_limits_commit_update(ns->disk->queue, &lim);
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
blk_mq_unfreeze_queue(ns->disk->queue); blk_mq_unfreeze_queue(ns->disk->queue);
if (nvme_ns_head_multipath(ns->head)) {
blk_mq_freeze_queue(ns->head->disk->queue);
set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
nvme_mpath_revalidate_paths(ns);
blk_stack_limits(&ns->head->disk->queue->limits,
&ns->queue->limits, 0);
ns->head->disk->flags |= GENHD_FL_HIDDEN;
blk_mq_unfreeze_queue(ns->head->disk->queue);
}
/* Hide the block-interface for these devices */ /* Hide the block-interface for these devices */
ns->disk->flags |= GENHD_FL_HIDDEN; if (!ret)
set_bit(NVME_NS_READY, &ns->flags); ret = -ENODEV;
return ret;
return 0;
} }
static int nvme_update_ns_info_block(struct nvme_ns *ns, static int nvme_update_ns_info_block(struct nvme_ns *ns,
struct nvme_ns_info *info) struct nvme_ns_info *info)
{ {
bool vwc = ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT;
struct queue_limits lim;
struct nvme_id_ns_nvm *nvm = NULL;
struct nvme_id_ns *id; struct nvme_id_ns *id;
sector_t capacity;
unsigned lbaf; unsigned lbaf;
int ret; int ret;
...@@ -2115,30 +2088,52 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, ...@@ -2115,30 +2088,52 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
/* namespace not allocated or attached */ /* namespace not allocated or attached */
info->is_removed = true; info->is_removed = true;
ret = -ENODEV; ret = -ENODEV;
goto error; goto out;
}
if (ns->ctrl->ctratt & NVME_CTRL_ATTR_ELBAS) {
ret = nvme_identify_ns_nvm(ns->ctrl, info->nsid, &nvm);
if (ret < 0)
goto out;
} }
blk_mq_freeze_queue(ns->disk->queue); blk_mq_freeze_queue(ns->disk->queue);
lbaf = nvme_lbaf_index(id->flbas); lbaf = nvme_lbaf_index(id->flbas);
ns->head->lba_shift = id->lbaf[lbaf].ds; ns->head->lba_shift = id->lbaf[lbaf].ds;
ns->head->nuse = le64_to_cpu(id->nuse); ns->head->nuse = le64_to_cpu(id->nuse);
nvme_set_queue_limits(ns->ctrl, ns->queue); capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze));
ret = nvme_configure_metadata(ns->ctrl, ns->head, id); lim = queue_limits_start_update(ns->disk->queue);
if (ret < 0) { nvme_set_ctrl_limits(ns->ctrl, &lim);
nvme_configure_metadata(ns->ctrl, ns->head, id, nvm);
nvme_set_chunk_sectors(ns, id, &lim);
if (!nvme_update_disk_info(ns, id, &lim))
capacity = 0;
nvme_config_discard(ns, &lim);
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
ns->head->ids.csi == NVME_CSI_ZNS) {
ret = nvme_update_zone_info(ns, lbaf, &lim);
if (ret) {
blk_mq_unfreeze_queue(ns->disk->queue); blk_mq_unfreeze_queue(ns->disk->queue);
goto out; goto out;
} }
nvme_set_chunk_sectors(ns, id); }
nvme_update_disk_info(ns->ctrl, ns->disk, ns->head, id); ret = queue_limits_commit_update(ns->disk->queue, &lim);
if (ns->head->ids.csi == NVME_CSI_ZNS) {
ret = nvme_update_zone_info(ns, lbaf);
if (ret) { if (ret) {
blk_mq_unfreeze_queue(ns->disk->queue); blk_mq_unfreeze_queue(ns->disk->queue);
goto out; goto out;
} }
}
/*
* Register a metadata profile for PI, or the plain non-integrity NVMe
* metadata masquerading as Type 0 if supported, otherwise reject block
* I/O to namespaces with metadata except when the namespace supports
* PI, as it can strip/insert in that case.
*/
if (!nvme_init_integrity(ns->disk, ns->head))
capacity = 0;
set_capacity_and_notify(ns->disk, capacity);
/* /*
* Only set the DEAC bit if the device guarantees that reads from * Only set the DEAC bit if the device guarantees that reads from
...@@ -2149,62 +2144,81 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, ...@@ -2149,62 +2144,81 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3))) if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3)))
ns->head->features |= NVME_NS_DEAC; ns->head->features |= NVME_NS_DEAC;
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info)); set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
blk_queue_write_cache(ns->disk->queue, vwc, vwc);
set_bit(NVME_NS_READY, &ns->flags); set_bit(NVME_NS_READY, &ns->flags);
blk_mq_unfreeze_queue(ns->disk->queue); blk_mq_unfreeze_queue(ns->disk->queue);
if (blk_queue_is_zoned(ns->queue)) { if (blk_queue_is_zoned(ns->queue)) {
ret = nvme_revalidate_zones(ns); ret = blk_revalidate_disk_zones(ns->disk, NULL);
if (ret && !nvme_first_scan(ns->disk)) if (ret && !nvme_first_scan(ns->disk))
goto out; goto out;
} }
if (nvme_ns_head_multipath(ns->head)) {
blk_mq_freeze_queue(ns->head->disk->queue);
nvme_update_disk_info(ns->ctrl, ns->head->disk, ns->head, id);
set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
nvme_mpath_revalidate_paths(ns);
blk_stack_limits(&ns->head->disk->queue->limits,
&ns->queue->limits, 0);
disk_update_readahead(ns->head->disk);
blk_mq_unfreeze_queue(ns->head->disk->queue);
}
ret = 0; ret = 0;
out: out:
/* kfree(nvm);
* If probing fails due an unsupported feature, hide the block device,
* but still allow other access.
*/
if (ret == -ENODEV) {
ns->disk->flags |= GENHD_FL_HIDDEN;
set_bit(NVME_NS_READY, &ns->flags);
ret = 0;
}
error:
kfree(id); kfree(id);
return ret; return ret;
} }
static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
{ {
bool unsupported = false;
int ret;
switch (info->ids.csi) { switch (info->ids.csi) {
case NVME_CSI_ZNS: case NVME_CSI_ZNS:
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) { if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
dev_info(ns->ctrl->device, dev_info(ns->ctrl->device,
"block device for nsid %u not supported without CONFIG_BLK_DEV_ZONED\n", "block device for nsid %u not supported without CONFIG_BLK_DEV_ZONED\n",
info->nsid); info->nsid);
return nvme_update_ns_info_generic(ns, info); ret = nvme_update_ns_info_generic(ns, info);
break;
} }
return nvme_update_ns_info_block(ns, info); ret = nvme_update_ns_info_block(ns, info);
break;
case NVME_CSI_NVM: case NVME_CSI_NVM:
return nvme_update_ns_info_block(ns, info); ret = nvme_update_ns_info_block(ns, info);
break;
default: default:
dev_info(ns->ctrl->device, dev_info(ns->ctrl->device,
"block device for nsid %u not supported (csi %u)\n", "block device for nsid %u not supported (csi %u)\n",
info->nsid, info->ids.csi); info->nsid, info->ids.csi);
return nvme_update_ns_info_generic(ns, info); ret = nvme_update_ns_info_generic(ns, info);
break;
}
/*
* If probing fails due an unsupported feature, hide the block device,
* but still allow other access.
*/
if (ret == -ENODEV) {
ns->disk->flags |= GENHD_FL_HIDDEN;
set_bit(NVME_NS_READY, &ns->flags);
unsupported = true;
ret = 0;
} }
if (!ret && nvme_ns_head_multipath(ns->head)) {
struct queue_limits lim;
blk_mq_freeze_queue(ns->head->disk->queue);
if (unsupported)
ns->head->disk->flags |= GENHD_FL_HIDDEN;
else
nvme_init_integrity(ns->head->disk, ns->head);
set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk));
set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
nvme_mpath_revalidate_paths(ns);
lim = queue_limits_start_update(ns->head->disk->queue);
queue_limits_stack_bdev(&lim, ns->disk->part0, 0,
ns->head->disk->disk_name);
ret = queue_limits_commit_update(ns->head->disk->queue, &lim);
blk_mq_unfreeze_queue(ns->head->disk->queue);
}
return ret;
} }
#ifdef CONFIG_BLK_SED_OPAL #ifdef CONFIG_BLK_SED_OPAL
...@@ -2879,7 +2893,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) ...@@ -2879,7 +2893,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
subsys->awupf = le16_to_cpu(id->awupf); subsys->awupf = le16_to_cpu(id->awupf);
nvme_mpath_default_iopolicy(subsys); nvme_mpath_default_iopolicy(subsys);
subsys->dev.class = nvme_subsys_class; subsys->dev.class = &nvme_subsys_class;
subsys->dev.release = nvme_release_subsystem; subsys->dev.release = nvme_release_subsystem;
subsys->dev.groups = nvme_subsys_attrs_groups; subsys->dev.groups = nvme_subsys_attrs_groups;
dev_set_name(&subsys->dev, "nvme-subsys%d", ctrl->instance); dev_set_name(&subsys->dev, "nvme-subsys%d", ctrl->instance);
...@@ -3119,11 +3133,17 @@ static int nvme_check_ctrl_fabric_info(struct nvme_ctrl *ctrl, struct nvme_id_ct ...@@ -3119,11 +3133,17 @@ static int nvme_check_ctrl_fabric_info(struct nvme_ctrl *ctrl, struct nvme_id_ct
return -EINVAL; return -EINVAL;
} }
if (!ctrl->maxcmd) {
dev_err(ctrl->device, "Maximum outstanding commands is 0\n");
return -EINVAL;
}
return 0; return 0;
} }
static int nvme_init_identify(struct nvme_ctrl *ctrl) static int nvme_init_identify(struct nvme_ctrl *ctrl)
{ {
struct queue_limits lim;
struct nvme_id_ctrl *id; struct nvme_id_ctrl *id;
u32 max_hw_sectors; u32 max_hw_sectors;
bool prev_apst_enabled; bool prev_apst_enabled;
...@@ -3190,7 +3210,12 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) ...@@ -3190,7 +3210,12 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->max_hw_sectors = ctrl->max_hw_sectors =
min_not_zero(ctrl->max_hw_sectors, max_hw_sectors); min_not_zero(ctrl->max_hw_sectors, max_hw_sectors);
nvme_set_queue_limits(ctrl, ctrl->admin_q); lim = queue_limits_start_update(ctrl->admin_q);
nvme_set_ctrl_limits(ctrl, &lim);
ret = queue_limits_commit_update(ctrl->admin_q, &lim);
if (ret)
goto out_free;
ctrl->sgls = le32_to_cpu(id->sgls); ctrl->sgls = le32_to_cpu(id->sgls);
ctrl->kas = le16_to_cpu(id->kas); ctrl->kas = le16_to_cpu(id->kas);
ctrl->max_namespaces = le32_to_cpu(id->mnan); ctrl->max_namespaces = le32_to_cpu(id->mnan);
...@@ -3422,7 +3447,7 @@ int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device, ...@@ -3422,7 +3447,7 @@ int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
if (minor < 0) if (minor < 0)
return minor; return minor;
cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor); cdev_device->devt = MKDEV(MAJOR(nvme_ns_chr_devt), minor);
cdev_device->class = nvme_ns_chr_class; cdev_device->class = &nvme_ns_chr_class;
cdev_device->release = nvme_cdev_rel; cdev_device->release = nvme_cdev_rel;
device_initialize(cdev_device); device_initialize(cdev_device);
cdev_init(cdev, fops); cdev_init(cdev, fops);
...@@ -4353,6 +4378,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event); ...@@ -4353,6 +4378,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
const struct blk_mq_ops *ops, unsigned int cmd_size) const struct blk_mq_ops *ops, unsigned int cmd_size)
{ {
struct queue_limits lim = {};
int ret; int ret;
memset(set, 0, sizeof(*set)); memset(set, 0, sizeof(*set));
...@@ -4372,7 +4398,7 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, ...@@ -4372,7 +4398,7 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
if (ret) if (ret)
return ret; return ret;
ctrl->admin_q = blk_mq_alloc_queue(set, NULL, NULL); ctrl->admin_q = blk_mq_alloc_queue(set, &lim, NULL);
if (IS_ERR(ctrl->admin_q)) { if (IS_ERR(ctrl->admin_q)) {
ret = PTR_ERR(ctrl->admin_q); ret = PTR_ERR(ctrl->admin_q);
goto out_free_tagset; goto out_free_tagset;
...@@ -4613,7 +4639,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ...@@ -4613,7 +4639,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->device = &ctrl->ctrl_device; ctrl->device = &ctrl->ctrl_device;
ctrl->device->devt = MKDEV(MAJOR(nvme_ctrl_base_chr_devt), ctrl->device->devt = MKDEV(MAJOR(nvme_ctrl_base_chr_devt),
ctrl->instance); ctrl->instance);
ctrl->device->class = nvme_class; ctrl->device->class = &nvme_class;
ctrl->device->parent = ctrl->dev; ctrl->device->parent = ctrl->dev;
if (ops->dev_attr_groups) if (ops->dev_attr_groups)
ctrl->device->groups = ops->dev_attr_groups; ctrl->device->groups = ops->dev_attr_groups;
...@@ -4846,42 +4872,36 @@ static int __init nvme_core_init(void) ...@@ -4846,42 +4872,36 @@ static int __init nvme_core_init(void)
if (result < 0) if (result < 0)
goto destroy_delete_wq; goto destroy_delete_wq;
nvme_class = class_create("nvme"); result = class_register(&nvme_class);
if (IS_ERR(nvme_class)) { if (result)
result = PTR_ERR(nvme_class);
goto unregister_chrdev; goto unregister_chrdev;
}
nvme_class->dev_uevent = nvme_class_uevent;
nvme_subsys_class = class_create("nvme-subsystem"); result = class_register(&nvme_subsys_class);
if (IS_ERR(nvme_subsys_class)) { if (result)
result = PTR_ERR(nvme_subsys_class);
goto destroy_class; goto destroy_class;
}
result = alloc_chrdev_region(&nvme_ns_chr_devt, 0, NVME_MINORS, result = alloc_chrdev_region(&nvme_ns_chr_devt, 0, NVME_MINORS,
"nvme-generic"); "nvme-generic");
if (result < 0) if (result < 0)
goto destroy_subsys_class; goto destroy_subsys_class;
nvme_ns_chr_class = class_create("nvme-generic"); result = class_register(&nvme_ns_chr_class);
if (IS_ERR(nvme_ns_chr_class)) { if (result)
result = PTR_ERR(nvme_ns_chr_class);
goto unregister_generic_ns; goto unregister_generic_ns;
}
result = nvme_init_auth(); result = nvme_init_auth();
if (result) if (result)
goto destroy_ns_chr; goto destroy_ns_chr;
return 0; return 0;
destroy_ns_chr: destroy_ns_chr:
class_destroy(nvme_ns_chr_class); class_unregister(&nvme_ns_chr_class);
unregister_generic_ns: unregister_generic_ns:
unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS); unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS);
destroy_subsys_class: destroy_subsys_class:
class_destroy(nvme_subsys_class); class_unregister(&nvme_subsys_class);
destroy_class: destroy_class:
class_destroy(nvme_class); class_unregister(&nvme_class);
unregister_chrdev: unregister_chrdev:
unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS); unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
destroy_delete_wq: destroy_delete_wq:
...@@ -4897,9 +4917,9 @@ static int __init nvme_core_init(void) ...@@ -4897,9 +4917,9 @@ static int __init nvme_core_init(void)
static void __exit nvme_core_exit(void) static void __exit nvme_core_exit(void)
{ {
nvme_exit_auth(); nvme_exit_auth();
class_destroy(nvme_ns_chr_class); class_unregister(&nvme_ns_chr_class);
class_destroy(nvme_subsys_class); class_unregister(&nvme_subsys_class);
class_destroy(nvme_class); class_unregister(&nvme_class);
unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS); unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS);
unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS); unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS);
destroy_workqueue(nvme_delete_wq); destroy_workqueue(nvme_delete_wq);
......
...@@ -637,7 +637,7 @@ static struct key *nvmf_parse_key(int key_id) ...@@ -637,7 +637,7 @@ static struct key *nvmf_parse_key(int key_id)
} }
key = key_lookup(key_id); key = key_lookup(key_id);
if (!IS_ERR(key)) if (IS_ERR(key))
pr_err("key id %08x not found\n", key_id); pr_err("key id %08x not found\n", key_id);
else else
pr_debug("Using key id %08x\n", key_id); pr_debug("Using key id %08x\n", key_id);
...@@ -1318,7 +1318,10 @@ nvmf_create_ctrl(struct device *dev, const char *buf) ...@@ -1318,7 +1318,10 @@ nvmf_create_ctrl(struct device *dev, const char *buf)
return ERR_PTR(ret); return ERR_PTR(ret);
} }
static struct class *nvmf_class; static const struct class nvmf_class = {
.name = "nvme-fabrics",
};
static struct device *nvmf_device; static struct device *nvmf_device;
static DEFINE_MUTEX(nvmf_dev_mutex); static DEFINE_MUTEX(nvmf_dev_mutex);
...@@ -1438,15 +1441,14 @@ static int __init nvmf_init(void) ...@@ -1438,15 +1441,14 @@ static int __init nvmf_init(void)
if (!nvmf_default_host) if (!nvmf_default_host)
return -ENOMEM; return -ENOMEM;
nvmf_class = class_create("nvme-fabrics"); ret = class_register(&nvmf_class);
if (IS_ERR(nvmf_class)) { if (ret) {
pr_err("couldn't register class nvme-fabrics\n"); pr_err("couldn't register class nvme-fabrics\n");
ret = PTR_ERR(nvmf_class);
goto out_free_host; goto out_free_host;
} }
nvmf_device = nvmf_device =
device_create(nvmf_class, NULL, MKDEV(0, 0), NULL, "ctl"); device_create(&nvmf_class, NULL, MKDEV(0, 0), NULL, "ctl");
if (IS_ERR(nvmf_device)) { if (IS_ERR(nvmf_device)) {
pr_err("couldn't create nvme-fabrics device!\n"); pr_err("couldn't create nvme-fabrics device!\n");
ret = PTR_ERR(nvmf_device); ret = PTR_ERR(nvmf_device);
...@@ -1462,9 +1464,9 @@ static int __init nvmf_init(void) ...@@ -1462,9 +1464,9 @@ static int __init nvmf_init(void)
return 0; return 0;
out_destroy_device: out_destroy_device:
device_destroy(nvmf_class, MKDEV(0, 0)); device_destroy(&nvmf_class, MKDEV(0, 0));
out_destroy_class: out_destroy_class:
class_destroy(nvmf_class); class_unregister(&nvmf_class);
out_free_host: out_free_host:
nvmf_host_put(nvmf_default_host); nvmf_host_put(nvmf_default_host);
return ret; return ret;
...@@ -1473,8 +1475,8 @@ static int __init nvmf_init(void) ...@@ -1473,8 +1475,8 @@ static int __init nvmf_init(void)
static void __exit nvmf_exit(void) static void __exit nvmf_exit(void)
{ {
misc_deregister(&nvmf_misc); misc_deregister(&nvmf_misc);
device_destroy(nvmf_class, MKDEV(0, 0)); device_destroy(&nvmf_class, MKDEV(0, 0));
class_destroy(nvmf_class); class_unregister(&nvmf_class);
nvmf_host_put(nvmf_default_host); nvmf_host_put(nvmf_default_host);
BUILD_BUG_ON(sizeof(struct nvmf_common_command) != 64); BUILD_BUG_ON(sizeof(struct nvmf_common_command) != 64);
......
...@@ -516,6 +516,7 @@ static void nvme_requeue_work(struct work_struct *work) ...@@ -516,6 +516,7 @@ static void nvme_requeue_work(struct work_struct *work)
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
{ {
struct queue_limits lim;
bool vwc = false; bool vwc = false;
mutex_init(&head->lock); mutex_init(&head->lock);
...@@ -532,7 +533,12 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) ...@@ -532,7 +533,12 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
!nvme_is_unique_nsid(ctrl, head) || !multipath) !nvme_is_unique_nsid(ctrl, head) || !multipath)
return 0; return 0;
head->disk = blk_alloc_disk(NULL, ctrl->numa_node); blk_set_stacking_limits(&lim);
lim.dma_alignment = 3;
if (head->ids.csi != NVME_CSI_ZNS)
lim.max_zone_append_sectors = 0;
head->disk = blk_alloc_disk(&lim, ctrl->numa_node);
if (IS_ERR(head->disk)) if (IS_ERR(head->disk))
return PTR_ERR(head->disk); return PTR_ERR(head->disk);
head->disk->fops = &nvme_ns_head_ops; head->disk->fops = &nvme_ns_head_ops;
...@@ -553,11 +559,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) ...@@ -553,11 +559,6 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
ctrl->tagset->map[HCTX_TYPE_POLL].nr_queues) ctrl->tagset->map[HCTX_TYPE_POLL].nr_queues)
blk_queue_flag_set(QUEUE_FLAG_POLL, head->disk->queue); blk_queue_flag_set(QUEUE_FLAG_POLL, head->disk->queue);
/* set to a default value of 512 until the disk is validated */
blk_queue_logical_block_size(head->disk->queue, 512);
blk_set_stacking_limits(&head->disk->queue->limits);
blk_queue_dma_alignment(head->disk->queue, 3);
/* we need to propagate up the VMC settings */ /* we need to propagate up the VMC settings */
if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
vwc = true; vwc = true;
......
...@@ -1036,11 +1036,11 @@ static inline bool nvme_disk_is_ns_head(struct gendisk *disk) ...@@ -1036,11 +1036,11 @@ static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
} }
#endif /* CONFIG_NVME_MULTIPATH */ #endif /* CONFIG_NVME_MULTIPATH */
int nvme_revalidate_zones(struct nvme_ns *ns);
int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data); unsigned int nr_zones, report_zones_cb cb, void *data);
int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf,
struct queue_limits *lim);
#ifdef CONFIG_BLK_DEV_ZONED #ifdef CONFIG_BLK_DEV_ZONED
int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf);
blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req, blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmnd, struct nvme_command *cmnd,
enum nvme_zone_mgmt_action action); enum nvme_zone_mgmt_action action);
...@@ -1051,13 +1051,6 @@ static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, ...@@ -1051,13 +1051,6 @@ static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns,
{ {
return BLK_STS_NOTSUPP; return BLK_STS_NOTSUPP;
} }
static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
{
dev_warn(ns->ctrl->device,
"Please enable CONFIG_BLK_DEV_ZONED to support ZNS devices\n");
return -EPROTONOSUPPORT;
}
#endif #endif
static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
......
...@@ -1006,6 +1006,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) ...@@ -1006,6 +1006,7 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
{ {
int ret; int ret;
bool changed; bool changed;
u16 max_queue_size;
ret = nvme_rdma_configure_admin_queue(ctrl, new); ret = nvme_rdma_configure_admin_queue(ctrl, new);
if (ret) if (ret)
...@@ -1030,11 +1031,16 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) ...@@ -1030,11 +1031,16 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1); ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1);
} }
if (ctrl->ctrl.sqsize + 1 > NVME_RDMA_MAX_QUEUE_SIZE) { if (ctrl->ctrl.max_integrity_segments)
max_queue_size = NVME_RDMA_MAX_METADATA_QUEUE_SIZE;
else
max_queue_size = NVME_RDMA_MAX_QUEUE_SIZE;
if (ctrl->ctrl.sqsize + 1 > max_queue_size) {
dev_warn(ctrl->ctrl.device, dev_warn(ctrl->ctrl.device,
"ctrl sqsize %u > max queue size %u, clamping down\n", "ctrl sqsize %u > max queue size %u, clamping down\n",
ctrl->ctrl.sqsize + 1, NVME_RDMA_MAX_QUEUE_SIZE); ctrl->ctrl.sqsize + 1, max_queue_size);
ctrl->ctrl.sqsize = NVME_RDMA_MAX_QUEUE_SIZE - 1; ctrl->ctrl.sqsize = max_queue_size - 1;
} }
if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) { if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
......
...@@ -221,14 +221,11 @@ static int ns_update_nuse(struct nvme_ns *ns) ...@@ -221,14 +221,11 @@ static int ns_update_nuse(struct nvme_ns *ns)
ret = nvme_identify_ns(ns->ctrl, ns->head->ns_id, &id); ret = nvme_identify_ns(ns->ctrl, ns->head->ns_id, &id);
if (ret) if (ret)
goto out_free_id; return ret;
ns->head->nuse = le64_to_cpu(id->nuse); ns->head->nuse = le64_to_cpu(id->nuse);
out_free_id:
kfree(id); kfree(id);
return 0;
return ret;
} }
static ssize_t nuse_show(struct device *dev, struct device_attribute *attr, static ssize_t nuse_show(struct device *dev, struct device_attribute *attr,
......
...@@ -7,16 +7,6 @@ ...@@ -7,16 +7,6 @@
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include "nvme.h" #include "nvme.h"
int nvme_revalidate_zones(struct nvme_ns *ns)
{
struct request_queue *q = ns->queue;
blk_queue_chunk_sectors(q, ns->head->zsze);
blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
return blk_revalidate_disk_zones(ns->disk, NULL);
}
static int nvme_set_max_append(struct nvme_ctrl *ctrl) static int nvme_set_max_append(struct nvme_ctrl *ctrl)
{ {
struct nvme_command c = { }; struct nvme_command c = { };
...@@ -45,10 +35,10 @@ static int nvme_set_max_append(struct nvme_ctrl *ctrl) ...@@ -45,10 +35,10 @@ static int nvme_set_max_append(struct nvme_ctrl *ctrl)
return 0; return 0;
} }
int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf,
struct queue_limits *lim)
{ {
struct nvme_effects_log *log = ns->head->effects; struct nvme_effects_log *log = ns->head->effects;
struct request_queue *q = ns->queue;
struct nvme_command c = { }; struct nvme_command c = { };
struct nvme_id_ns_zns *id; struct nvme_id_ns_zns *id;
int status; int status;
...@@ -109,10 +99,12 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) ...@@ -109,10 +99,12 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
goto free_data; goto free_data;
} }
disk_set_zoned(ns->disk); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue);
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); lim->zoned = 1;
disk_set_max_open_zones(ns->disk, le32_to_cpu(id->mor) + 1); lim->max_open_zones = le32_to_cpu(id->mor) + 1;
disk_set_max_active_zones(ns->disk, le32_to_cpu(id->mar) + 1); lim->max_active_zones = le32_to_cpu(id->mar) + 1;
lim->chunk_sectors = ns->head->zsze;
lim->max_zone_append_sectors = ns->ctrl->max_zone_append;
free_data: free_data:
kfree(id); kfree(id);
return status; return status;
......
...@@ -428,7 +428,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) ...@@ -428,7 +428,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->cqes = (0x4 << 4) | 0x4; id->cqes = (0x4 << 4) | 0x4;
/* no enforcement soft-limit for maxcmd - pick arbitrary high value */ /* no enforcement soft-limit for maxcmd - pick arbitrary high value */
id->maxcmd = cpu_to_le16(NVMET_MAX_CMD); id->maxcmd = cpu_to_le16(NVMET_MAX_CMD(ctrl));
id->nn = cpu_to_le32(NVMET_MAX_NAMESPACES); id->nn = cpu_to_le32(NVMET_MAX_NAMESPACES);
id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES); id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES);
......
...@@ -273,6 +273,32 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item, ...@@ -273,6 +273,32 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item,
CONFIGFS_ATTR(nvmet_, param_inline_data_size); CONFIGFS_ATTR(nvmet_, param_inline_data_size);
static ssize_t nvmet_param_max_queue_size_show(struct config_item *item,
char *page)
{
struct nvmet_port *port = to_nvmet_port(item);
return snprintf(page, PAGE_SIZE, "%d\n", port->max_queue_size);
}
static ssize_t nvmet_param_max_queue_size_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_port *port = to_nvmet_port(item);
int ret;
if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
ret = kstrtoint(page, 0, &port->max_queue_size);
if (ret) {
pr_err("Invalid value '%s' for max_queue_size\n", page);
return -EINVAL;
}
return count;
}
CONFIGFS_ATTR(nvmet_, param_max_queue_size);
#ifdef CONFIG_BLK_DEV_INTEGRITY #ifdef CONFIG_BLK_DEV_INTEGRITY
static ssize_t nvmet_param_pi_enable_show(struct config_item *item, static ssize_t nvmet_param_pi_enable_show(struct config_item *item,
char *page) char *page)
...@@ -1859,6 +1885,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = { ...@@ -1859,6 +1885,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = {
&nvmet_attr_addr_trtype, &nvmet_attr_addr_trtype,
&nvmet_attr_addr_tsas, &nvmet_attr_addr_tsas,
&nvmet_attr_param_inline_data_size, &nvmet_attr_param_inline_data_size,
&nvmet_attr_param_max_queue_size,
#ifdef CONFIG_BLK_DEV_INTEGRITY #ifdef CONFIG_BLK_DEV_INTEGRITY
&nvmet_attr_param_pi_enable, &nvmet_attr_param_pi_enable,
#endif #endif
...@@ -1917,6 +1944,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group, ...@@ -1917,6 +1944,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
INIT_LIST_HEAD(&port->subsystems); INIT_LIST_HEAD(&port->subsystems);
INIT_LIST_HEAD(&port->referrals); INIT_LIST_HEAD(&port->referrals);
port->inline_data_size = -1; /* < 0 == let the transport choose */ port->inline_data_size = -1; /* < 0 == let the transport choose */
port->max_queue_size = -1; /* < 0 == let the transport choose */
port->disc_addr.portid = cpu_to_le16(portid); port->disc_addr.portid = cpu_to_le16(portid);
port->disc_addr.adrfam = NVMF_ADDR_FAMILY_MAX; port->disc_addr.adrfam = NVMF_ADDR_FAMILY_MAX;
......
...@@ -358,6 +358,18 @@ int nvmet_enable_port(struct nvmet_port *port) ...@@ -358,6 +358,18 @@ int nvmet_enable_port(struct nvmet_port *port)
if (port->inline_data_size < 0) if (port->inline_data_size < 0)
port->inline_data_size = 0; port->inline_data_size = 0;
/*
* If the transport didn't set the max_queue_size properly, then clamp
* it to the target limits. Also set default values in case the
* transport didn't set it at all.
*/
if (port->max_queue_size < 0)
port->max_queue_size = NVMET_MAX_QUEUE_SIZE;
else
port->max_queue_size = clamp_t(int, port->max_queue_size,
NVMET_MIN_QUEUE_SIZE,
NVMET_MAX_QUEUE_SIZE);
port->enabled = true; port->enabled = true;
port->tr_ops = ops; port->tr_ops = ops;
return 0; return 0;
...@@ -1223,9 +1235,10 @@ static void nvmet_init_cap(struct nvmet_ctrl *ctrl) ...@@ -1223,9 +1235,10 @@ static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
ctrl->cap |= (15ULL << 24); ctrl->cap |= (15ULL << 24);
/* maximum queue entries supported: */ /* maximum queue entries supported: */
if (ctrl->ops->get_max_queue_size) if (ctrl->ops->get_max_queue_size)
ctrl->cap |= ctrl->ops->get_max_queue_size(ctrl) - 1; ctrl->cap |= min_t(u16, ctrl->ops->get_max_queue_size(ctrl),
ctrl->port->max_queue_size) - 1;
else else
ctrl->cap |= NVMET_QUEUE_SIZE - 1; ctrl->cap |= ctrl->port->max_queue_size - 1;
if (nvmet_is_passthru_subsys(ctrl->subsys)) if (nvmet_is_passthru_subsys(ctrl->subsys))
nvmet_passthrough_override_cap(ctrl); nvmet_passthrough_override_cap(ctrl);
...@@ -1411,6 +1424,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, ...@@ -1411,6 +1424,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
kref_init(&ctrl->ref); kref_init(&ctrl->ref);
ctrl->subsys = subsys; ctrl->subsys = subsys;
ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support;
nvmet_init_cap(ctrl); nvmet_init_cap(ctrl);
WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL); WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL);
......
...@@ -282,7 +282,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req) ...@@ -282,7 +282,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req)
id->lpa = (1 << 2); id->lpa = (1 << 2);
/* no enforcement soft-limit for maxcmd - pick arbitrary high value */ /* no enforcement soft-limit for maxcmd - pick arbitrary high value */
id->maxcmd = cpu_to_le16(NVMET_MAX_CMD); id->maxcmd = cpu_to_le16(NVMET_MAX_CMD(ctrl));
id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */ id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
if (ctrl->ops->flags & NVMF_KEYED_SGLS) if (ctrl->ops->flags & NVMF_KEYED_SGLS)
......
...@@ -157,7 +157,8 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) ...@@ -157,7 +157,8 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
} }
if (sqsize > mqes) { /* for fabrics, this value applies to only the I/O Submission Queues */
if (qid && sqsize > mqes) {
pr_warn("sqsize %u is larger than MQES supported %u cntlid %d\n", pr_warn("sqsize %u is larger than MQES supported %u cntlid %d\n",
sqsize, mqes, ctrl->cntlid); sqsize, mqes, ctrl->cntlid);
req->error_loc = offsetof(struct nvmf_connect_command, sqsize); req->error_loc = offsetof(struct nvmf_connect_command, sqsize);
...@@ -251,8 +252,6 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) ...@@ -251,8 +252,6 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
if (status) if (status)
goto out; goto out;
ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support;
uuid_copy(&ctrl->hostid, &d->hostid); uuid_copy(&ctrl->hostid, &d->hostid);
ret = nvmet_setup_auth(ctrl); ret = nvmet_setup_auth(ctrl);
......
...@@ -1556,7 +1556,9 @@ static const struct attribute_group *fcloop_dev_attr_groups[] = { ...@@ -1556,7 +1556,9 @@ static const struct attribute_group *fcloop_dev_attr_groups[] = {
NULL, NULL,
}; };
static struct class *fcloop_class; static const struct class fcloop_class = {
.name = "fcloop",
};
static struct device *fcloop_device; static struct device *fcloop_device;
...@@ -1564,15 +1566,14 @@ static int __init fcloop_init(void) ...@@ -1564,15 +1566,14 @@ static int __init fcloop_init(void)
{ {
int ret; int ret;
fcloop_class = class_create("fcloop"); ret = class_register(&fcloop_class);
if (IS_ERR(fcloop_class)) { if (ret) {
pr_err("couldn't register class fcloop\n"); pr_err("couldn't register class fcloop\n");
ret = PTR_ERR(fcloop_class);
return ret; return ret;
} }
fcloop_device = device_create_with_groups( fcloop_device = device_create_with_groups(
fcloop_class, NULL, MKDEV(0, 0), NULL, &fcloop_class, NULL, MKDEV(0, 0), NULL,
fcloop_dev_attr_groups, "ctl"); fcloop_dev_attr_groups, "ctl");
if (IS_ERR(fcloop_device)) { if (IS_ERR(fcloop_device)) {
pr_err("couldn't create ctl device!\n"); pr_err("couldn't create ctl device!\n");
...@@ -1585,7 +1586,7 @@ static int __init fcloop_init(void) ...@@ -1585,7 +1586,7 @@ static int __init fcloop_init(void)
return 0; return 0;
out_destroy_class: out_destroy_class:
class_destroy(fcloop_class); class_unregister(&fcloop_class);
return ret; return ret;
} }
...@@ -1643,8 +1644,8 @@ static void __exit fcloop_exit(void) ...@@ -1643,8 +1644,8 @@ static void __exit fcloop_exit(void)
put_device(fcloop_device); put_device(fcloop_device);
device_destroy(fcloop_class, MKDEV(0, 0)); device_destroy(&fcloop_class, MKDEV(0, 0));
class_destroy(fcloop_class); class_unregister(&fcloop_class);
} }
module_init(fcloop_init); module_init(fcloop_init);
......
...@@ -163,6 +163,7 @@ struct nvmet_port { ...@@ -163,6 +163,7 @@ struct nvmet_port {
void *priv; void *priv;
bool enabled; bool enabled;
int inline_data_size; int inline_data_size;
int max_queue_size;
const struct nvmet_fabrics_ops *tr_ops; const struct nvmet_fabrics_ops *tr_ops;
bool pi_enable; bool pi_enable;
}; };
...@@ -543,9 +544,10 @@ void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys, ...@@ -543,9 +544,10 @@ void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys,
void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
u8 event_info, u8 log_page); u8 event_info, u8 log_page);
#define NVMET_QUEUE_SIZE 1024 #define NVMET_MIN_QUEUE_SIZE 16
#define NVMET_MAX_QUEUE_SIZE 1024
#define NVMET_NR_QUEUES 128 #define NVMET_NR_QUEUES 128
#define NVMET_MAX_CMD NVMET_QUEUE_SIZE #define NVMET_MAX_CMD(ctrl) (NVME_CAP_MQES(ctrl->cap) + 1)
/* /*
* Nice round number that makes a list of nsids fit into a page. * Nice round number that makes a list of nsids fit into a page.
......
...@@ -132,7 +132,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req) ...@@ -132,7 +132,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
id->sqes = min_t(__u8, ((0x6 << 4) | 0x6), id->sqes); id->sqes = min_t(__u8, ((0x6 << 4) | 0x6), id->sqes);
id->cqes = min_t(__u8, ((0x4 << 4) | 0x4), id->cqes); id->cqes = min_t(__u8, ((0x4 << 4) | 0x4), id->cqes);
id->maxcmd = cpu_to_le16(NVMET_MAX_CMD); id->maxcmd = cpu_to_le16(NVMET_MAX_CMD(ctrl));
/* don't support fuse commands */ /* don't support fuse commands */
id->fuses = 0; id->fuses = 0;
......
...@@ -1956,6 +1956,14 @@ static int nvmet_rdma_add_port(struct nvmet_port *nport) ...@@ -1956,6 +1956,14 @@ static int nvmet_rdma_add_port(struct nvmet_port *nport)
nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE;
} }
if (nport->max_queue_size < 0) {
nport->max_queue_size = NVME_RDMA_DEFAULT_QUEUE_SIZE;
} else if (nport->max_queue_size > NVME_RDMA_MAX_QUEUE_SIZE) {
pr_warn("max_queue_size %u is too large, reducing to %u\n",
nport->max_queue_size, NVME_RDMA_MAX_QUEUE_SIZE);
nport->max_queue_size = NVME_RDMA_MAX_QUEUE_SIZE;
}
ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr, ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr,
nport->disc_addr.trsvcid, &port->addr); nport->disc_addr.trsvcid, &port->addr);
if (ret) { if (ret) {
...@@ -2015,6 +2023,8 @@ static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl) ...@@ -2015,6 +2023,8 @@ static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl)
static u16 nvmet_rdma_get_max_queue_size(const struct nvmet_ctrl *ctrl) static u16 nvmet_rdma_get_max_queue_size(const struct nvmet_ctrl *ctrl)
{ {
if (ctrl->pi_support)
return NVME_RDMA_MAX_METADATA_QUEUE_SIZE;
return NVME_RDMA_MAX_QUEUE_SIZE; return NVME_RDMA_MAX_QUEUE_SIZE;
} }
......
...@@ -6,7 +6,11 @@ ...@@ -6,7 +6,11 @@
#ifndef _LINUX_NVME_RDMA_H #ifndef _LINUX_NVME_RDMA_H
#define _LINUX_NVME_RDMA_H #define _LINUX_NVME_RDMA_H
#define NVME_RDMA_MAX_QUEUE_SIZE 128 #define NVME_RDMA_IP_PORT 4420
#define NVME_RDMA_MAX_QUEUE_SIZE 256
#define NVME_RDMA_MAX_METADATA_QUEUE_SIZE 128
#define NVME_RDMA_DEFAULT_QUEUE_SIZE 128
enum nvme_rdma_cm_fmt { enum nvme_rdma_cm_fmt {
NVME_RDMA_CM_FMT_1_0 = 0x0, NVME_RDMA_CM_FMT_1_0 = 0x0,
......
...@@ -23,8 +23,6 @@ ...@@ -23,8 +23,6 @@
#define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery" #define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery"
#define NVME_RDMA_IP_PORT 4420
#define NVME_NSID_ALL 0xffffffff #define NVME_NSID_ALL 0xffffffff
enum nvme_subsys_type { enum nvme_subsys_type {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment