Commit 80ee071b authored by Jens Axboe's avatar Jens Axboe

Merge branch 'nvme-5.9' of git://git.infradead.org/nvme into for-5.9/drivers

Pull NVMe updates from Christoph:

"Below is the current large chunk we have in the nvme tree for 5.9:

 - ZNS support (Aravind, Keith, Matias, Niklas)
  - misc cleanups and optimizations
     (Baolin, Chaitanya, David, Dongli, Max, Sagi)"

* 'nvme-5.9' of git://git.infradead.org/nvme: (28 commits)
  nvme: remove ns->disk checks
  nvme-pci: use standard block status symbolic names
  nvme-pci: use the consistent return type of nvme_pci_iod_alloc_size()
  nvme-pci: add a blank line after declarations
  nvme-pci: fix some comments issues
  nvme-pci: remove redundant segment validation
  nvme: document quirked Intel models
  nvme: expose reconnect_delay and ctrl_loss_tmo via sysfs
  nvme: support for zoned namespaces
  nvme: support for multiple Command Sets Supported and Effects log pages
  nvme: implement multiple I/O Command Set support
  null_blk: introduce zone capacity for zoned device
  block: add capacity field to zone descriptors
  nvme: use USEC_PER_SEC instead of magic numbers
  nvmet-tcp: simplify nvmet_process_resp_list
  nvme-tcp: optimize network stack with setting msg flags according to batch size
  nvme-tcp: leverage request plugging
  nvme-tcp: have queue prod/cons send list become a llist
  nvme-fcloop: verify wwnn and wwpn format
  nvmet: use unsigned type for u64
  ...
parents 482c6b61 3913f4f3
...@@ -86,9 +86,10 @@ config BLK_DEV_ZONED ...@@ -86,9 +86,10 @@ config BLK_DEV_ZONED
select MQ_IOSCHED_DEADLINE select MQ_IOSCHED_DEADLINE
help help
Block layer zoned block device support. This option enables Block layer zoned block device support. This option enables
support for ZAC/ZBC host-managed and host-aware zoned block devices. support for ZAC/ZBC/ZNS host-managed and host-aware zoned block
devices.
Say yes here if you have a ZAC or ZBC storage device. Say yes here if you have a ZAC, ZBC, or ZNS storage device.
config BLK_DEV_THROTTLING config BLK_DEV_THROTTLING
bool "Block layer bio throttling support" bool "Block layer bio throttling support"
......
...@@ -312,6 +312,7 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, ...@@ -312,6 +312,7 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
return ret; return ret;
rep.nr_zones = ret; rep.nr_zones = ret;
rep.flags = BLK_ZONE_REP_CAPACITY;
if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report)))
return -EFAULT; return -EFAULT;
return 0; return 0;
......
...@@ -49,6 +49,7 @@ struct nullb_device { ...@@ -49,6 +49,7 @@ struct nullb_device {
unsigned long completion_nsec; /* time in ns to complete a request */ unsigned long completion_nsec; /* time in ns to complete a request */
unsigned long cache_size; /* disk cache size in MB */ unsigned long cache_size; /* disk cache size in MB */
unsigned long zone_size; /* zone size in MB if device is zoned */ unsigned long zone_size; /* zone size in MB if device is zoned */
unsigned long zone_capacity; /* zone capacity in MB if device is zoned */
unsigned int zone_nr_conv; /* number of conventional zones */ unsigned int zone_nr_conv; /* number of conventional zones */
unsigned int submit_queues; /* number of submission queues */ unsigned int submit_queues; /* number of submission queues */
unsigned int home_node; /* home node for the device */ unsigned int home_node; /* home node for the device */
......
...@@ -200,6 +200,10 @@ static unsigned long g_zone_size = 256; ...@@ -200,6 +200,10 @@ static unsigned long g_zone_size = 256;
module_param_named(zone_size, g_zone_size, ulong, S_IRUGO); module_param_named(zone_size, g_zone_size, ulong, S_IRUGO);
MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256"); MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256");
static unsigned long g_zone_capacity;
module_param_named(zone_capacity, g_zone_capacity, ulong, 0444);
MODULE_PARM_DESC(zone_capacity, "Zone capacity in MB when block device is zoned. Can be less than or equal to zone size. Default: Zone size");
static unsigned int g_zone_nr_conv; static unsigned int g_zone_nr_conv;
module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444); module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444);
MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0"); MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0");
...@@ -341,6 +345,7 @@ NULLB_DEVICE_ATTR(mbps, uint, NULL); ...@@ -341,6 +345,7 @@ NULLB_DEVICE_ATTR(mbps, uint, NULL);
NULLB_DEVICE_ATTR(cache_size, ulong, NULL); NULLB_DEVICE_ATTR(cache_size, ulong, NULL);
NULLB_DEVICE_ATTR(zoned, bool, NULL); NULLB_DEVICE_ATTR(zoned, bool, NULL);
NULLB_DEVICE_ATTR(zone_size, ulong, NULL); NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL); NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
static ssize_t nullb_device_power_show(struct config_item *item, char *page) static ssize_t nullb_device_power_show(struct config_item *item, char *page)
...@@ -457,6 +462,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { ...@@ -457,6 +462,7 @@ static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_badblocks, &nullb_device_attr_badblocks,
&nullb_device_attr_zoned, &nullb_device_attr_zoned,
&nullb_device_attr_zone_size, &nullb_device_attr_zone_size,
&nullb_device_attr_zone_capacity,
&nullb_device_attr_zone_nr_conv, &nullb_device_attr_zone_nr_conv,
NULL, NULL,
}; };
...@@ -510,7 +516,8 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item) ...@@ -510,7 +516,8 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
static ssize_t memb_group_features_show(struct config_item *item, char *page) static ssize_t memb_group_features_show(struct config_item *item, char *page)
{ {
return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_nr_conv\n"); return snprintf(page, PAGE_SIZE,
"memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv\n");
} }
CONFIGFS_ATTR_RO(memb_group_, features); CONFIGFS_ATTR_RO(memb_group_, features);
...@@ -571,6 +578,7 @@ static struct nullb_device *null_alloc_dev(void) ...@@ -571,6 +578,7 @@ static struct nullb_device *null_alloc_dev(void)
dev->use_per_node_hctx = g_use_per_node_hctx; dev->use_per_node_hctx = g_use_per_node_hctx;
dev->zoned = g_zoned; dev->zoned = g_zoned;
dev->zone_size = g_zone_size; dev->zone_size = g_zone_size;
dev->zone_capacity = g_zone_capacity;
dev->zone_nr_conv = g_zone_nr_conv; dev->zone_nr_conv = g_zone_nr_conv;
return dev; return dev;
} }
......
...@@ -28,6 +28,15 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) ...@@ -28,6 +28,15 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
return -EINVAL; return -EINVAL;
} }
if (!dev->zone_capacity)
dev->zone_capacity = dev->zone_size;
if (dev->zone_capacity > dev->zone_size) {
pr_err("null_blk: zone capacity (%lu MB) larger than zone size (%lu MB)\n",
dev->zone_capacity, dev->zone_size);
return -EINVAL;
}
dev->zone_size_sects = dev->zone_size << ZONE_SIZE_SHIFT; dev->zone_size_sects = dev->zone_size << ZONE_SIZE_SHIFT;
dev->nr_zones = dev_size >> dev->nr_zones = dev_size >>
(SECTOR_SHIFT + ilog2(dev->zone_size_sects)); (SECTOR_SHIFT + ilog2(dev->zone_size_sects));
...@@ -47,6 +56,7 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) ...@@ -47,6 +56,7 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
zone->start = sector; zone->start = sector;
zone->len = dev->zone_size_sects; zone->len = dev->zone_size_sects;
zone->capacity = zone->len;
zone->wp = zone->start + zone->len; zone->wp = zone->start + zone->len;
zone->type = BLK_ZONE_TYPE_CONVENTIONAL; zone->type = BLK_ZONE_TYPE_CONVENTIONAL;
zone->cond = BLK_ZONE_COND_NOT_WP; zone->cond = BLK_ZONE_COND_NOT_WP;
...@@ -59,6 +69,7 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) ...@@ -59,6 +69,7 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
zone->start = zone->wp = sector; zone->start = zone->wp = sector;
zone->len = dev->zone_size_sects; zone->len = dev->zone_size_sects;
zone->capacity = dev->zone_capacity << ZONE_SIZE_SHIFT;
zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ; zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ;
zone->cond = BLK_ZONE_COND_EMPTY; zone->cond = BLK_ZONE_COND_EMPTY;
...@@ -185,6 +196,9 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, ...@@ -185,6 +196,9 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
return BLK_STS_IOERR; return BLK_STS_IOERR;
} }
if (zone->wp + nr_sectors > zone->start + zone->capacity)
return BLK_STS_IOERR;
if (zone->cond != BLK_ZONE_COND_EXP_OPEN) if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
zone->cond = BLK_ZONE_COND_IMP_OPEN; zone->cond = BLK_ZONE_COND_IMP_OPEN;
...@@ -193,7 +207,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, ...@@ -193,7 +207,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
return ret; return ret;
zone->wp += nr_sectors; zone->wp += nr_sectors;
if (zone->wp == zone->start + zone->len) if (zone->wp == zone->start + zone->capacity)
zone->cond = BLK_ZONE_COND_FULL; zone->cond = BLK_ZONE_COND_FULL;
return BLK_STS_OK; return BLK_STS_OK;
default: default:
......
...@@ -13,6 +13,7 @@ nvme-core-y := core.o ...@@ -13,6 +13,7 @@ nvme-core-y := core.o
nvme-core-$(CONFIG_TRACING) += trace.o nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
nvme-core-$(CONFIG_NVM) += lightnvm.o nvme-core-$(CONFIG_NVM) += lightnvm.o
nvme-core-$(CONFIG_BLK_DEV_ZONED) += zns.o
nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o
......
...@@ -89,7 +89,7 @@ static dev_t nvme_chr_devt; ...@@ -89,7 +89,7 @@ static dev_t nvme_chr_devt;
static struct class *nvme_class; static struct class *nvme_class;
static struct class *nvme_subsys_class; static struct class *nvme_subsys_class;
static int nvme_revalidate_disk(struct gendisk *disk); static int _nvme_revalidate_disk(struct gendisk *disk);
static void nvme_put_subsystem(struct nvme_subsystem *subsys); static void nvme_put_subsystem(struct nvme_subsystem *subsys);
static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
unsigned nsid); unsigned nsid);
...@@ -100,7 +100,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns) ...@@ -100,7 +100,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
* Revalidating a dead namespace sets capacity to 0. This will end * Revalidating a dead namespace sets capacity to 0. This will end
* buffered writers dirtying pages that can't be synced. * buffered writers dirtying pages that can't be synced.
*/ */
if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags)) if (test_and_set_bit(NVME_NS_DEAD, &ns->flags))
return; return;
blk_set_queue_dying(ns->queue); blk_set_queue_dying(ns->queue);
/* Forcibly unquiesce queues to avoid blocking dispatch */ /* Forcibly unquiesce queues to avoid blocking dispatch */
...@@ -287,6 +287,10 @@ void nvme_complete_rq(struct request *req) ...@@ -287,6 +287,10 @@ void nvme_complete_rq(struct request *req)
nvme_retry_req(req); nvme_retry_req(req);
return; return;
} }
} else if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
req_op(req) == REQ_OP_ZONE_APPEND) {
req->__sector = nvme_lba_to_sect(req->q->queuedata,
le64_to_cpu(nvme_req(req)->result.u64));
} }
nvme_trace_bio_complete(req, status); nvme_trace_bio_complete(req, status);
...@@ -555,7 +559,7 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl) ...@@ -555,7 +559,7 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl)
goto out_disable_stream; goto out_disable_stream;
} }
ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1); ctrl->nr_streams = min_t(u16, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams); dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
return 0; return 0;
...@@ -673,7 +677,8 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, ...@@ -673,7 +677,8 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
} }
static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
struct request *req, struct nvme_command *cmnd) struct request *req, struct nvme_command *cmnd,
enum nvme_opcode op)
{ {
struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_ctrl *ctrl = ns->ctrl;
u16 control = 0; u16 control = 0;
...@@ -687,7 +692,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, ...@@ -687,7 +692,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
if (req->cmd_flags & REQ_RAHEAD) if (req->cmd_flags & REQ_RAHEAD)
dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read); cmnd->rw.opcode = op;
cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id); cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
...@@ -716,6 +721,8 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, ...@@ -716,6 +721,8 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
case NVME_NS_DPS_PI_TYPE2: case NVME_NS_DPS_PI_TYPE2:
control |= NVME_RW_PRINFO_PRCHK_GUARD | control |= NVME_RW_PRINFO_PRCHK_GUARD |
NVME_RW_PRINFO_PRCHK_REF; NVME_RW_PRINFO_PRCHK_REF;
if (op == nvme_cmd_zone_append)
control |= NVME_RW_APPEND_PIREMAP;
cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req)); cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req));
break; break;
} }
...@@ -756,6 +763,19 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, ...@@ -756,6 +763,19 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
case REQ_OP_FLUSH: case REQ_OP_FLUSH:
nvme_setup_flush(ns, cmd); nvme_setup_flush(ns, cmd);
break; break;
case REQ_OP_ZONE_RESET_ALL:
case REQ_OP_ZONE_RESET:
ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_RESET);
break;
case REQ_OP_ZONE_OPEN:
ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_OPEN);
break;
case REQ_OP_ZONE_CLOSE:
ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_CLOSE);
break;
case REQ_OP_ZONE_FINISH:
ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_FINISH);
break;
case REQ_OP_WRITE_ZEROES: case REQ_OP_WRITE_ZEROES:
ret = nvme_setup_write_zeroes(ns, req, cmd); ret = nvme_setup_write_zeroes(ns, req, cmd);
break; break;
...@@ -763,8 +783,13 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, ...@@ -763,8 +783,13 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
ret = nvme_setup_discard(ns, req, cmd); ret = nvme_setup_discard(ns, req, cmd);
break; break;
case REQ_OP_READ: case REQ_OP_READ:
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_read);
break;
case REQ_OP_WRITE: case REQ_OP_WRITE:
ret = nvme_setup_rw(ns, req, cmd); ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_write);
break;
case REQ_OP_ZONE_APPEND:
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_zone_append);
break; break;
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
...@@ -1056,8 +1081,13 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) ...@@ -1056,8 +1081,13 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
return error; return error;
} }
static bool nvme_multi_css(struct nvme_ctrl *ctrl)
{
return (ctrl->ctrl_config & NVME_CC_CSS_MASK) == NVME_CC_CSS_CSI;
}
static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
struct nvme_ns_id_desc *cur) struct nvme_ns_id_desc *cur, bool *csi_seen)
{ {
const char *warn_str = "ctrl returned bogus length:"; const char *warn_str = "ctrl returned bogus length:";
void *data = cur; void *data = cur;
...@@ -1087,6 +1117,15 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids, ...@@ -1087,6 +1117,15 @@ static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
} }
uuid_copy(&ids->uuid, data + sizeof(*cur)); uuid_copy(&ids->uuid, data + sizeof(*cur));
return NVME_NIDT_UUID_LEN; return NVME_NIDT_UUID_LEN;
case NVME_NIDT_CSI:
if (cur->nidl != NVME_NIDT_CSI_LEN) {
dev_warn(ctrl->device, "%s %d for NVME_NIDT_CSI\n",
warn_str, cur->nidl);
return -1;
}
memcpy(&ids->csi, data + sizeof(*cur), NVME_NIDT_CSI_LEN);
*csi_seen = true;
return NVME_NIDT_CSI_LEN;
default: default:
/* Skip unknown types */ /* Skip unknown types */
return cur->nidl; return cur->nidl;
...@@ -1097,10 +1136,9 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, ...@@ -1097,10 +1136,9 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
struct nvme_ns_ids *ids) struct nvme_ns_ids *ids)
{ {
struct nvme_command c = { }; struct nvme_command c = { };
int status; bool csi_seen = false;
int status, pos, len;
void *data; void *data;
int pos;
int len;
c.identify.opcode = nvme_admin_identify; c.identify.opcode = nvme_admin_identify;
c.identify.nsid = cpu_to_le32(nsid); c.identify.nsid = cpu_to_le32(nsid);
...@@ -1125,7 +1163,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, ...@@ -1125,7 +1163,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
* device just because of a temporal retry-able error (such * device just because of a temporal retry-able error (such
* as path of transport errors). * as path of transport errors).
*/ */
if (status > 0 && (status & NVME_SC_DNR)) if (status > 0 && (status & NVME_SC_DNR) && !nvme_multi_css(ctrl))
status = 0; status = 0;
goto free_data; goto free_data;
} }
...@@ -1136,12 +1174,19 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, ...@@ -1136,12 +1174,19 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
if (cur->nidl == 0) if (cur->nidl == 0)
break; break;
len = nvme_process_ns_desc(ctrl, ids, cur); len = nvme_process_ns_desc(ctrl, ids, cur, &csi_seen);
if (len < 0) if (len < 0)
goto free_data; break;
len += sizeof(*cur); len += sizeof(*cur);
} }
if (nvme_multi_css(ctrl) && !csi_seen) {
dev_warn(ctrl->device, "Command set not reported for nsid:%d\n",
nsid);
status = -EINVAL;
}
free_data: free_data:
kfree(data); kfree(data);
return status; return status;
...@@ -1350,8 +1395,8 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -1350,8 +1395,8 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u32 effects = 0; u32 effects = 0;
if (ns) { if (ns) {
if (ctrl->effects) if (ns->head->effects)
effects = le32_to_cpu(ctrl->effects->iocs[opcode]); effects = le32_to_cpu(ns->head->effects->iocs[opcode]);
if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC)) if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))
dev_warn(ctrl->device, dev_warn(ctrl->device,
"IO command:%02x has unhandled effects:%08x\n", "IO command:%02x has unhandled effects:%08x\n",
...@@ -1378,14 +1423,23 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -1378,14 +1423,23 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
return effects; return effects;
} }
static void nvme_update_formats(struct nvme_ctrl *ctrl) static void nvme_update_formats(struct nvme_ctrl *ctrl, u32 *effects)
{ {
struct nvme_ns *ns; struct nvme_ns *ns;
down_read(&ctrl->namespaces_rwsem); down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list) list_for_each_entry(ns, &ctrl->namespaces, list)
if (ns->disk && nvme_revalidate_disk(ns->disk)) if (_nvme_revalidate_disk(ns->disk))
nvme_set_queue_dying(ns); nvme_set_queue_dying(ns);
else if (blk_queue_is_zoned(ns->disk->queue)) {
/*
* IO commands are required to fully revalidate a zoned
* device. Force the command effects to trigger rescan
* work so report zones can run in a context with
* unfrozen IO queues.
*/
*effects |= NVME_CMD_EFFECTS_NCC;
}
up_read(&ctrl->namespaces_rwsem); up_read(&ctrl->namespaces_rwsem);
} }
...@@ -1397,7 +1451,7 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) ...@@ -1397,7 +1451,7 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
* this command. * this command.
*/ */
if (effects & NVME_CMD_EFFECTS_LBCC) if (effects & NVME_CMD_EFFECTS_LBCC)
nvme_update_formats(ctrl); nvme_update_formats(ctrl, &effects);
if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
nvme_unfreeze(ctrl); nvme_unfreeze(ctrl);
nvme_mpath_unfreeze(ctrl->subsys); nvme_mpath_unfreeze(ctrl->subsys);
...@@ -1512,7 +1566,7 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -1512,7 +1566,7 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
* Issue ioctl requests on the first available path. Note that unlike normal * Issue ioctl requests on the first available path. Note that unlike normal
* block layer requests we will not retry failed request on another controller. * block layer requests we will not retry failed request on another controller.
*/ */
static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk, struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
struct nvme_ns_head **head, int *srcu_idx) struct nvme_ns_head **head, int *srcu_idx)
{ {
#ifdef CONFIG_NVME_MULTIPATH #ifdef CONFIG_NVME_MULTIPATH
...@@ -1532,7 +1586,7 @@ static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk, ...@@ -1532,7 +1586,7 @@ static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
return disk->private_data; return disk->private_data;
} }
static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx) void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)
{ {
if (head) if (head)
srcu_read_unlock(&head->srcu, idx); srcu_read_unlock(&head->srcu, idx);
...@@ -1798,7 +1852,7 @@ static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, ...@@ -1798,7 +1852,7 @@ static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
memcpy(ids->eui64, id->eui64, sizeof(id->eui64)); memcpy(ids->eui64, id->eui64, sizeof(id->eui64));
if (ctrl->vs >= NVME_VS(1, 2, 0)) if (ctrl->vs >= NVME_VS(1, 2, 0))
memcpy(ids->nguid, id->nguid, sizeof(id->nguid)); memcpy(ids->nguid, id->nguid, sizeof(id->nguid));
if (ctrl->vs >= NVME_VS(1, 3, 0)) if (ctrl->vs >= NVME_VS(1, 3, 0) || nvme_multi_css(ctrl))
return nvme_identify_ns_descs(ctrl, nsid, ids); return nvme_identify_ns_descs(ctrl, nsid, ids);
return 0; return 0;
} }
...@@ -1814,7 +1868,8 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) ...@@ -1814,7 +1868,8 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
{ {
return uuid_equal(&a->uuid, &b->uuid) && return uuid_equal(&a->uuid, &b->uuid) &&
memcmp(&a->nguid, &b->nguid, sizeof(a->nguid)) == 0 && memcmp(&a->nguid, &b->nguid, sizeof(a->nguid)) == 0 &&
memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0; memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0 &&
a->csi == b->csi;
} }
static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns, static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
...@@ -1924,18 +1979,38 @@ static void nvme_update_disk_info(struct gendisk *disk, ...@@ -1924,18 +1979,38 @@ static void nvme_update_disk_info(struct gendisk *disk,
static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{ {
unsigned lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
struct nvme_ns *ns = disk->private_data; struct nvme_ns *ns = disk->private_data;
struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_ctrl *ctrl = ns->ctrl;
int ret;
u32 iob; u32 iob;
/* /*
* If identify namespace failed, use default 512 byte block size so * If identify namespace failed, use default 512 byte block size so
* block layer can use before failing read/write for 0 capacity. * block layer can use before failing read/write for 0 capacity.
*/ */
ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds; ns->lba_shift = id->lbaf[lbaf].ds;
if (ns->lba_shift == 0) if (ns->lba_shift == 0)
ns->lba_shift = 9; ns->lba_shift = 9;
switch (ns->head->ids.csi) {
case NVME_CSI_NVM:
break;
case NVME_CSI_ZNS:
ret = nvme_update_zone_info(disk, ns, lbaf);
if (ret) {
dev_warn(ctrl->device,
"failed to add zoned namespace:%u ret:%d\n",
ns->head->ns_id, ret);
return ret;
}
break;
default:
dev_warn(ctrl->device, "unknown csi:%u ns:%u\n",
ns->head->ids.csi, ns->head->ns_id);
return -ENODEV;
}
if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
is_power_of_2(ctrl->max_hw_sectors)) is_power_of_2(ctrl->max_hw_sectors))
iob = ctrl->max_hw_sectors; iob = ctrl->max_hw_sectors;
...@@ -1943,7 +2018,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ...@@ -1943,7 +2018,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));
ns->features = 0; ns->features = 0;
ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
/* the PI implementation requires metadata equal t10 pi tuple size */ /* the PI implementation requires metadata equal t10 pi tuple size */
if (ns->ms == sizeof(struct t10_pi_tuple)) if (ns->ms == sizeof(struct t10_pi_tuple))
ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
...@@ -1985,7 +2060,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ...@@ -1985,7 +2060,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
return 0; return 0;
} }
static int nvme_revalidate_disk(struct gendisk *disk) static int _nvme_revalidate_disk(struct gendisk *disk)
{ {
struct nvme_ns *ns = disk->private_data; struct nvme_ns *ns = disk->private_data;
struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_ctrl *ctrl = ns->ctrl;
...@@ -2033,6 +2108,28 @@ static int nvme_revalidate_disk(struct gendisk *disk) ...@@ -2033,6 +2108,28 @@ static int nvme_revalidate_disk(struct gendisk *disk)
return ret; return ret;
} }
static int nvme_revalidate_disk(struct gendisk *disk)
{
int ret;
ret = _nvme_revalidate_disk(disk);
if (ret)
return ret;
#ifdef CONFIG_BLK_DEV_ZONED
if (blk_queue_is_zoned(disk->queue)) {
struct nvme_ns *ns = disk->private_data;
struct nvme_ctrl *ctrl = ns->ctrl;
ret = blk_revalidate_disk_zones(disk, NULL);
if (!ret)
blk_queue_max_zone_append_sectors(disk->queue,
ctrl->max_zone_append);
}
#endif
return ret;
}
static char nvme_pr_type(enum pr_type type) static char nvme_pr_type(enum pr_type type)
{ {
switch (type) { switch (type) {
...@@ -2163,6 +2260,7 @@ static const struct block_device_operations nvme_fops = { ...@@ -2163,6 +2260,7 @@ static const struct block_device_operations nvme_fops = {
.release = nvme_release, .release = nvme_release,
.getgeo = nvme_getgeo, .getgeo = nvme_getgeo,
.revalidate_disk= nvme_revalidate_disk, .revalidate_disk= nvme_revalidate_disk,
.report_zones = nvme_report_zones,
.pr_ops = &nvme_pr_ops, .pr_ops = &nvme_pr_ops,
}; };
...@@ -2189,6 +2287,7 @@ const struct block_device_operations nvme_ns_head_ops = { ...@@ -2189,6 +2287,7 @@ const struct block_device_operations nvme_ns_head_ops = {
.ioctl = nvme_ioctl, .ioctl = nvme_ioctl,
.compat_ioctl = nvme_compat_ioctl, .compat_ioctl = nvme_compat_ioctl,
.getgeo = nvme_getgeo, .getgeo = nvme_getgeo,
.report_zones = nvme_report_zones,
.pr_ops = &nvme_pr_ops, .pr_ops = &nvme_pr_ops,
}; };
#endif /* CONFIG_NVME_MULTIPATH */ #endif /* CONFIG_NVME_MULTIPATH */
...@@ -2270,6 +2369,9 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) ...@@ -2270,6 +2369,9 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
ctrl->page_size = 1 << page_shift; ctrl->page_size = 1 << page_shift;
if (NVME_CAP_CSS(ctrl->cap) & NVME_CAP_CSS_CSI)
ctrl->ctrl_config = NVME_CC_CSS_CSI;
else
ctrl->ctrl_config = NVME_CC_CSS_NVM; ctrl->ctrl_config = NVME_CC_CSS_NVM;
ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
...@@ -2818,7 +2920,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) ...@@ -2818,7 +2920,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
return ret; return ret;
} }
int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
void *log, size_t size, u64 offset) void *log, size_t size, u64 offset)
{ {
struct nvme_command c = { }; struct nvme_command c = { };
...@@ -2832,27 +2934,55 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, ...@@ -2832,27 +2934,55 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
c.get_log_page.numdu = cpu_to_le16(dwlen >> 16); c.get_log_page.numdu = cpu_to_le16(dwlen >> 16);
c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset)); c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset));
c.get_log_page.lpou = cpu_to_le32(upper_32_bits(offset)); c.get_log_page.lpou = cpu_to_le32(upper_32_bits(offset));
c.get_log_page.csi = csi;
return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size); return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
} }
static int nvme_get_effects_log(struct nvme_ctrl *ctrl) static struct nvme_cel *nvme_find_cel(struct nvme_ctrl *ctrl, u8 csi)
{ {
struct nvme_cel *cel, *ret = NULL;
spin_lock(&ctrl->lock);
list_for_each_entry(cel, &ctrl->cels, entry) {
if (cel->csi == csi) {
ret = cel;
break;
}
}
spin_unlock(&ctrl->lock);
return ret;
}
static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi,
struct nvme_effects_log **log)
{
struct nvme_cel *cel = nvme_find_cel(ctrl, csi);
int ret; int ret;
if (!ctrl->effects) if (cel)
ctrl->effects = kzalloc(sizeof(*ctrl->effects), GFP_KERNEL); goto out;
if (!ctrl->effects) cel = kzalloc(sizeof(*cel), GFP_KERNEL);
return 0; if (!cel)
return -ENOMEM;
ret = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CMD_EFFECTS, 0, ret = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CMD_EFFECTS, 0, csi,
ctrl->effects, sizeof(*ctrl->effects), 0); &cel->log, sizeof(cel->log), 0);
if (ret) { if (ret) {
kfree(ctrl->effects); kfree(cel);
ctrl->effects = NULL;
}
return ret; return ret;
}
cel->csi = csi;
spin_lock(&ctrl->lock);
list_add_tail(&cel->entry, &ctrl->cels);
spin_unlock(&ctrl->lock);
out:
*log = &cel->log;
return 0;
} }
/* /*
...@@ -2873,7 +3003,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ...@@ -2873,7 +3003,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
return ret; return ret;
} }
page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12; page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
ctrl->sqsize = min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->sqsize); ctrl->sqsize = min_t(u16, NVME_CAP_MQES(ctrl->cap), ctrl->sqsize);
if (ctrl->vs >= NVME_VS(1, 1, 0)) if (ctrl->vs >= NVME_VS(1, 1, 0))
ctrl->subsystem = NVME_CAP_NSSRC(ctrl->cap); ctrl->subsystem = NVME_CAP_NSSRC(ctrl->cap);
...@@ -2885,7 +3015,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ...@@ -2885,7 +3015,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
} }
if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) { if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) {
ret = nvme_get_effects_log(ctrl); ret = nvme_get_effects_log(ctrl, NVME_CSI_NVM, &ctrl->effects);
if (ret < 0) if (ret < 0)
goto out_free; goto out_free;
} }
...@@ -2947,7 +3077,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ...@@ -2947,7 +3077,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
if (id->rtd3e) { if (id->rtd3e) {
/* us -> s */ /* us -> s */
u32 transition_time = le32_to_cpu(id->rtd3e) / 1000000; u32 transition_time = le32_to_cpu(id->rtd3e) / USEC_PER_SEC;
ctrl->shutdown_timeout = clamp_t(unsigned int, transition_time, ctrl->shutdown_timeout = clamp_t(unsigned int, transition_time,
shutdown_timeout, 60); shutdown_timeout, 60);
...@@ -3405,6 +3535,66 @@ static ssize_t nvme_sysfs_show_address(struct device *dev, ...@@ -3405,6 +3535,66 @@ static ssize_t nvme_sysfs_show_address(struct device *dev,
} }
static DEVICE_ATTR(address, S_IRUGO, nvme_sysfs_show_address, NULL); static DEVICE_ATTR(address, S_IRUGO, nvme_sysfs_show_address, NULL);
static ssize_t nvme_ctrl_loss_tmo_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
struct nvmf_ctrl_options *opts = ctrl->opts;
if (ctrl->opts->max_reconnects == -1)
return sprintf(buf, "off\n");
return sprintf(buf, "%d\n",
opts->max_reconnects * opts->reconnect_delay);
}
static ssize_t nvme_ctrl_loss_tmo_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
struct nvmf_ctrl_options *opts = ctrl->opts;
int ctrl_loss_tmo, err;
err = kstrtoint(buf, 10, &ctrl_loss_tmo);
if (err)
return -EINVAL;
else if (ctrl_loss_tmo < 0)
opts->max_reconnects = -1;
else
opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
opts->reconnect_delay);
return count;
}
static DEVICE_ATTR(ctrl_loss_tmo, S_IRUGO | S_IWUSR,
nvme_ctrl_loss_tmo_show, nvme_ctrl_loss_tmo_store);
static ssize_t nvme_ctrl_reconnect_delay_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
if (ctrl->opts->reconnect_delay == -1)
return sprintf(buf, "off\n");
return sprintf(buf, "%d\n", ctrl->opts->reconnect_delay);
}
static ssize_t nvme_ctrl_reconnect_delay_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
unsigned int v;
int err;
err = kstrtou32(buf, 10, &v);
if (err || v > UINT_MAX)
return -EINVAL;
ctrl->opts->reconnect_delay = v;
return count;
}
static DEVICE_ATTR(reconnect_delay, S_IRUGO | S_IWUSR,
nvme_ctrl_reconnect_delay_show, nvme_ctrl_reconnect_delay_store);
static struct attribute *nvme_dev_attrs[] = { static struct attribute *nvme_dev_attrs[] = {
&dev_attr_reset_controller.attr, &dev_attr_reset_controller.attr,
&dev_attr_rescan_controller.attr, &dev_attr_rescan_controller.attr,
...@@ -3422,6 +3612,8 @@ static struct attribute *nvme_dev_attrs[] = { ...@@ -3422,6 +3612,8 @@ static struct attribute *nvme_dev_attrs[] = {
&dev_attr_sqsize.attr, &dev_attr_sqsize.attr,
&dev_attr_hostnqn.attr, &dev_attr_hostnqn.attr,
&dev_attr_hostid.attr, &dev_attr_hostid.attr,
&dev_attr_ctrl_loss_tmo.attr,
&dev_attr_reconnect_delay.attr,
NULL NULL
}; };
...@@ -3518,6 +3710,13 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, ...@@ -3518,6 +3710,13 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
goto out_cleanup_srcu; goto out_cleanup_srcu;
} }
if (head->ids.csi) {
ret = nvme_get_effects_log(ctrl, head->ids.csi, &head->effects);
if (ret)
goto out_cleanup_srcu;
} else
head->effects = ctrl->effects;
ret = nvme_mpath_alloc_disk(ctrl, head); ret = nvme_mpath_alloc_disk(ctrl, head);
if (ret) if (ret)
goto out_cleanup_srcu; goto out_cleanup_srcu;
...@@ -3734,7 +3933,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) ...@@ -3734,7 +3933,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
nvme_mpath_clear_current_path(ns); nvme_mpath_clear_current_path(ns);
synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */ synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */
if (ns->disk && ns->disk->flags & GENHD_FL_UP) { if (ns->disk->flags & GENHD_FL_UP) {
del_gendisk(ns->disk); del_gendisk(ns->disk);
blk_cleanup_queue(ns->queue); blk_cleanup_queue(ns->queue);
if (blk_get_integrity(ns->disk)) if (blk_get_integrity(ns->disk))
...@@ -3765,7 +3964,7 @@ static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid) ...@@ -3765,7 +3964,7 @@ static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
ns = nvme_find_get_ns(ctrl, nsid); ns = nvme_find_get_ns(ctrl, nsid);
if (ns) { if (ns) {
if (ns->disk && revalidate_disk(ns->disk)) if (revalidate_disk(ns->disk))
nvme_ns_remove(ns); nvme_ns_remove(ns);
nvme_put_ns(ns); nvme_put_ns(ns);
} else } else
...@@ -3858,8 +4057,8 @@ static void nvme_clear_changed_ns_log(struct nvme_ctrl *ctrl) ...@@ -3858,8 +4057,8 @@ static void nvme_clear_changed_ns_log(struct nvme_ctrl *ctrl)
* raced with us in reading the log page, which could cause us to miss * raced with us in reading the log page, which could cause us to miss
* updates. * updates.
*/ */
error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CHANGED_NS, 0, log, error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CHANGED_NS, 0,
log_size, 0); NVME_CSI_NVM, log, log_size, 0);
if (error) if (error)
dev_warn(ctrl->device, dev_warn(ctrl->device,
"reading changed ns log failed: %d\n", error); "reading changed ns log failed: %d\n", error);
...@@ -4003,8 +4202,8 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl) ...@@ -4003,8 +4202,8 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
if (!log) if (!log)
return; return;
if (nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_FW_SLOT, 0, log, if (nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_FW_SLOT, 0, NVME_CSI_NVM,
sizeof(*log), 0)) log, sizeof(*log), 0))
dev_warn(ctrl->device, "Get FW SLOT INFO log error\n"); dev_warn(ctrl->device, "Get FW SLOT INFO log error\n");
kfree(log); kfree(log);
} }
...@@ -4141,11 +4340,16 @@ static void nvme_free_ctrl(struct device *dev) ...@@ -4141,11 +4340,16 @@ static void nvme_free_ctrl(struct device *dev)
struct nvme_ctrl *ctrl = struct nvme_ctrl *ctrl =
container_of(dev, struct nvme_ctrl, ctrl_device); container_of(dev, struct nvme_ctrl, ctrl_device);
struct nvme_subsystem *subsys = ctrl->subsys; struct nvme_subsystem *subsys = ctrl->subsys;
struct nvme_cel *cel, *next;
if (subsys && ctrl->instance != subsys->instance) if (subsys && ctrl->instance != subsys->instance)
ida_simple_remove(&nvme_instance_ida, ctrl->instance); ida_simple_remove(&nvme_instance_ida, ctrl->instance);
kfree(ctrl->effects); list_for_each_entry_safe(cel, next, &ctrl->cels, entry) {
list_del(&cel->entry);
kfree(cel);
}
nvme_mpath_uninit(ctrl); nvme_mpath_uninit(ctrl);
__free_page(ctrl->discard_page); __free_page(ctrl->discard_page);
...@@ -4176,6 +4380,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ...@@ -4176,6 +4380,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
spin_lock_init(&ctrl->lock); spin_lock_init(&ctrl->lock);
mutex_init(&ctrl->scan_lock); mutex_init(&ctrl->scan_lock);
INIT_LIST_HEAD(&ctrl->namespaces); INIT_LIST_HEAD(&ctrl->namespaces);
INIT_LIST_HEAD(&ctrl->cels);
init_rwsem(&ctrl->namespaces_rwsem); init_rwsem(&ctrl->namespaces_rwsem);
ctrl->dev = dev; ctrl->dev = dev;
ctrl->ops = ops; ctrl->ops = ops;
...@@ -4372,6 +4577,8 @@ static inline void _nvme_check_size(void) ...@@ -4372,6 +4577,8 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_command) != 64); BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
......
...@@ -62,7 +62,7 @@ static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data) ...@@ -62,7 +62,7 @@ static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data)
int ret; int ret;
ret = nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0, ret = nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0,
&data->log, sizeof(data->log), 0); NVME_CSI_NVM, &data->log, sizeof(data->log), 0);
return ret <= 0 ? ret : -EIO; return ret <= 0 ? ret : -EIO;
} }
......
...@@ -593,8 +593,8 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, ...@@ -593,8 +593,8 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
dev_meta_off = dev_meta; dev_meta_off = dev_meta;
ret = nvme_get_log(ctrl, ns->head->ns_id, ret = nvme_get_log(ctrl, ns->head->ns_id,
NVME_NVM_LOG_REPORT_CHUNK, 0, dev_meta, len, NVME_NVM_LOG_REPORT_CHUNK, 0, NVME_CSI_NVM,
offset); dev_meta, len, offset);
if (ret) { if (ret) {
dev_err(ctrl->device, "Get REPORT CHUNK log error\n"); dev_err(ctrl->device, "Get REPORT CHUNK log error\n");
break; break;
......
...@@ -527,7 +527,7 @@ static int nvme_read_ana_log(struct nvme_ctrl *ctrl) ...@@ -527,7 +527,7 @@ static int nvme_read_ana_log(struct nvme_ctrl *ctrl)
int error; int error;
mutex_lock(&ctrl->ana_lock); mutex_lock(&ctrl->ana_lock);
error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA, 0, error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA, 0, NVME_CSI_NVM,
ctrl->ana_log_buf, ctrl->ana_log_size, 0); ctrl->ana_log_buf, ctrl->ana_log_size, 0);
if (error) { if (error) {
dev_warn(ctrl->device, "Failed to get ANA log: %d\n", error); dev_warn(ctrl->device, "Failed to get ANA log: %d\n", error);
......
...@@ -191,6 +191,12 @@ struct nvme_fault_inject { ...@@ -191,6 +191,12 @@ struct nvme_fault_inject {
#endif #endif
}; };
struct nvme_cel {
struct list_head entry;
struct nvme_effects_log log;
u8 csi;
};
struct nvme_ctrl { struct nvme_ctrl {
bool comp_seen; bool comp_seen;
enum nvme_ctrl_state state; enum nvme_ctrl_state state;
...@@ -232,6 +238,9 @@ struct nvme_ctrl { ...@@ -232,6 +238,9 @@ struct nvme_ctrl {
u32 max_hw_sectors; u32 max_hw_sectors;
u32 max_segments; u32 max_segments;
u32 max_integrity_segments; u32 max_integrity_segments;
#ifdef CONFIG_BLK_DEV_ZONED
u32 max_zone_append;
#endif
u16 crdt[3]; u16 crdt[3];
u16 oncs; u16 oncs;
u16 oacs; u16 oacs;
...@@ -257,6 +266,7 @@ struct nvme_ctrl { ...@@ -257,6 +266,7 @@ struct nvme_ctrl {
unsigned long quirks; unsigned long quirks;
struct nvme_id_power_state psd[32]; struct nvme_id_power_state psd[32];
struct nvme_effects_log *effects; struct nvme_effects_log *effects;
struct list_head cels;
struct work_struct scan_work; struct work_struct scan_work;
struct work_struct async_event_work; struct work_struct async_event_work;
struct delayed_work ka_work; struct delayed_work ka_work;
...@@ -339,6 +349,7 @@ struct nvme_ns_ids { ...@@ -339,6 +349,7 @@ struct nvme_ns_ids {
u8 eui64[8]; u8 eui64[8];
u8 nguid[16]; u8 nguid[16];
uuid_t uuid; uuid_t uuid;
u8 csi;
}; };
/* /*
...@@ -358,6 +369,7 @@ struct nvme_ns_head { ...@@ -358,6 +369,7 @@ struct nvme_ns_head {
struct kref ref; struct kref ref;
bool shared; bool shared;
int instance; int instance;
struct nvme_effects_log *effects;
#ifdef CONFIG_NVME_MULTIPATH #ifdef CONFIG_NVME_MULTIPATH
struct gendisk *disk; struct gendisk *disk;
struct bio_list requeue_list; struct bio_list requeue_list;
...@@ -395,6 +407,9 @@ struct nvme_ns { ...@@ -395,6 +407,9 @@ struct nvme_ns {
u16 sgs; u16 sgs;
u32 sws; u32 sws;
u8 pi_type; u8 pi_type;
#ifdef CONFIG_BLK_DEV_ZONED
u64 zsze;
#endif
unsigned long features; unsigned long features;
unsigned long flags; unsigned long flags;
#define NVME_NS_REMOVING 0 #define NVME_NS_REMOVING 0
...@@ -560,8 +575,11 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); ...@@ -560,8 +575,11 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
int nvme_try_sched_reset(struct nvme_ctrl *ctrl); int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
void *log, size_t size, u64 offset); void *log, size_t size, u64 offset);
struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
struct nvme_ns_head **head, int *srcu_idx);
void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx);
extern const struct attribute_group *nvme_ns_id_attr_groups[]; extern const struct attribute_group *nvme_ns_id_attr_groups[];
extern const struct block_device_operations nvme_ns_head_ops; extern const struct block_device_operations nvme_ns_head_ops;
...@@ -684,6 +702,36 @@ static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys) ...@@ -684,6 +702,36 @@ static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
} }
#endif /* CONFIG_NVME_MULTIPATH */ #endif /* CONFIG_NVME_MULTIPATH */
#ifdef CONFIG_BLK_DEV_ZONED
int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
unsigned lbaf);
int nvme_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmnd,
enum nvme_zone_mgmt_action action);
#else
#define nvme_report_zones NULL
static inline blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns,
struct request *req, struct nvme_command *cmnd,
enum nvme_zone_mgmt_action action)
{
return BLK_STS_NOTSUPP;
}
static inline int nvme_update_zone_info(struct gendisk *disk,
struct nvme_ns *ns,
unsigned lbaf)
{
dev_warn(ns->ctrl->device,
"Please enable CONFIG_BLK_DEV_ZONED to support ZNS devices\n");
return -EPROTONOSUPPORT;
}
#endif
#ifdef CONFIG_NVM #ifdef CONFIG_NVM
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
void nvme_nvm_unregister(struct nvme_ns *ns); void nvme_nvm_unregister(struct nvme_ns *ns);
......
...@@ -61,10 +61,10 @@ MODULE_PARM_DESC(sgl_threshold, ...@@ -61,10 +61,10 @@ MODULE_PARM_DESC(sgl_threshold,
static int io_queue_depth_set(const char *val, const struct kernel_param *kp); static int io_queue_depth_set(const char *val, const struct kernel_param *kp);
static const struct kernel_param_ops io_queue_depth_ops = { static const struct kernel_param_ops io_queue_depth_ops = {
.set = io_queue_depth_set, .set = io_queue_depth_set,
.get = param_get_int, .get = param_get_uint,
}; };
static int io_queue_depth = 1024; static unsigned int io_queue_depth = 1024;
module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
...@@ -115,7 +115,7 @@ struct nvme_dev { ...@@ -115,7 +115,7 @@ struct nvme_dev {
unsigned max_qid; unsigned max_qid;
unsigned io_queues[HCTX_MAX_TYPES]; unsigned io_queues[HCTX_MAX_TYPES];
unsigned int num_vecs; unsigned int num_vecs;
int q_depth; u16 q_depth;
int io_sqes; int io_sqes;
u32 db_stride; u32 db_stride;
void __iomem *bar; void __iomem *bar;
...@@ -151,13 +151,14 @@ struct nvme_dev { ...@@ -151,13 +151,14 @@ struct nvme_dev {
static int io_queue_depth_set(const char *val, const struct kernel_param *kp) static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
{ {
int n = 0, ret; int ret;
u16 n;
ret = kstrtoint(val, 10, &n); ret = kstrtou16(val, 10, &n);
if (ret != 0 || n < 2) if (ret != 0 || n < 2)
return -EINVAL; return -EINVAL;
return param_set_int(val, kp); return param_set_ushort(val, kp);
} }
static inline unsigned int sq_idx(unsigned int qid, u32 stride) static inline unsigned int sq_idx(unsigned int qid, u32 stride)
...@@ -361,7 +362,7 @@ static int nvme_pci_npages_sgl(unsigned int num_seg) ...@@ -361,7 +362,7 @@ static int nvme_pci_npages_sgl(unsigned int num_seg)
return DIV_ROUND_UP(num_seg * sizeof(struct nvme_sgl_desc), PAGE_SIZE); return DIV_ROUND_UP(num_seg * sizeof(struct nvme_sgl_desc), PAGE_SIZE);
} }
static unsigned int nvme_pci_iod_alloc_size(struct nvme_dev *dev, static size_t nvme_pci_iod_alloc_size(struct nvme_dev *dev,
unsigned int size, unsigned int nseg, bool use_sgl) unsigned int size, unsigned int nseg, bool use_sgl)
{ {
size_t alloc_size; size_t alloc_size;
...@@ -500,9 +501,6 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) ...@@ -500,9 +501,6 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
int nseg = blk_rq_nr_phys_segments(req); int nseg = blk_rq_nr_phys_segments(req);
unsigned int avg_seg_size; unsigned int avg_seg_size;
if (nseg == 0)
return false;
avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg); avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg);
if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1)))) if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1))))
...@@ -764,7 +762,7 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, ...@@ -764,7 +762,7 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev,
cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma); cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma);
if (bv->bv_len > first_prp_len) if (bv->bv_len > first_prp_len)
cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len); cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len);
return 0; return BLK_STS_OK;
} }
static blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev, static blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev,
...@@ -782,7 +780,7 @@ static blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev, ...@@ -782,7 +780,7 @@ static blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev,
cmnd->dptr.sgl.addr = cpu_to_le64(iod->first_dma); cmnd->dptr.sgl.addr = cpu_to_le64(iod->first_dma);
cmnd->dptr.sgl.length = cpu_to_le32(iod->dma_len); cmnd->dptr.sgl.length = cpu_to_le32(iod->dma_len);
cmnd->dptr.sgl.type = NVME_SGL_FMT_DATA_DESC << 4; cmnd->dptr.sgl.type = NVME_SGL_FMT_DATA_DESC << 4;
return 0; return BLK_STS_OK;
} }
static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
...@@ -846,7 +844,7 @@ static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req, ...@@ -846,7 +844,7 @@ static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
if (dma_mapping_error(dev->dev, iod->meta_dma)) if (dma_mapping_error(dev->dev, iod->meta_dma))
return BLK_STS_IOERR; return BLK_STS_IOERR;
cmnd->rw.metadata = cpu_to_le64(iod->meta_dma); cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
return 0; return BLK_STS_OK;
} }
/* /*
...@@ -1019,6 +1017,7 @@ static irqreturn_t nvme_irq(int irq, void *data) ...@@ -1019,6 +1017,7 @@ static irqreturn_t nvme_irq(int irq, void *data)
static irqreturn_t nvme_irq_check(int irq, void *data) static irqreturn_t nvme_irq_check(int irq, void *data)
{ {
struct nvme_queue *nvmeq = data; struct nvme_queue *nvmeq = data;
if (nvme_cqe_pending(nvmeq)) if (nvme_cqe_pending(nvmeq))
return IRQ_WAKE_THREAD; return IRQ_WAKE_THREAD;
return IRQ_NONE; return IRQ_NONE;
...@@ -1154,7 +1153,6 @@ static void abort_endio(struct request *req, blk_status_t error) ...@@ -1154,7 +1153,6 @@ static void abort_endio(struct request *req, blk_status_t error)
static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
{ {
/* If true, indicates loss of adapter communication, possibly by a /* If true, indicates loss of adapter communication, possibly by a
* NVMe Subsystem reset. * NVMe Subsystem reset.
*/ */
...@@ -1402,6 +1400,7 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, ...@@ -1402,6 +1400,7 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
if (q_size_aligned * nr_io_queues > dev->cmb_size) { if (q_size_aligned * nr_io_queues > dev->cmb_size) {
u64 mem_per_q = div_u64(dev->cmb_size, nr_io_queues); u64 mem_per_q = div_u64(dev->cmb_size, nr_io_queues);
mem_per_q = round_down(mem_per_q, dev->ctrl.page_size); mem_per_q = round_down(mem_per_q, dev->ctrl.page_size);
q_depth = div_u64(mem_per_q, entry_size); q_depth = div_u64(mem_per_q, entry_size);
...@@ -1932,12 +1931,12 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, ...@@ -1932,12 +1931,12 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
{ {
u32 chunk_size; u64 min_chunk = min_t(u64, preferred, PAGE_SIZE * MAX_ORDER_NR_PAGES);
u64 hmminds = max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2);
u64 chunk_size;
/* start big and work our way down */ /* start big and work our way down */
for (chunk_size = min_t(u64, preferred, PAGE_SIZE * MAX_ORDER_NR_PAGES); for (chunk_size = min_chunk; chunk_size >= hmminds; chunk_size /= 2) {
chunk_size >= max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2);
chunk_size /= 2) {
if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) { if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) {
if (!min || dev->host_mem_size >= min) if (!min || dev->host_mem_size >= min)
return 0; return 0;
...@@ -2003,7 +2002,7 @@ static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs) ...@@ -2003,7 +2002,7 @@ static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs)
unsigned int nr_read_queues, nr_write_queues = dev->nr_write_queues; unsigned int nr_read_queues, nr_write_queues = dev->nr_write_queues;
/* /*
* If there is no interupt available for queues, ensure that * If there is no interrupt available for queues, ensure that
* the default queue is set to 1. The affinity set size is * the default queue is set to 1. The affinity set size is
* also set to one, but the irq core ignores it for this case. * also set to one, but the irq core ignores it for this case.
* *
...@@ -2261,8 +2260,8 @@ static void nvme_dev_add(struct nvme_dev *dev) ...@@ -2261,8 +2260,8 @@ static void nvme_dev_add(struct nvme_dev *dev)
dev->tagset.nr_maps++; dev->tagset.nr_maps++;
dev->tagset.timeout = NVME_IO_TIMEOUT; dev->tagset.timeout = NVME_IO_TIMEOUT;
dev->tagset.numa_node = dev->ctrl.numa_node; dev->tagset.numa_node = dev->ctrl.numa_node;
dev->tagset.queue_depth = dev->tagset.queue_depth = min_t(unsigned int, dev->q_depth,
min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; BLK_MQ_MAX_DEPTH) - 1;
dev->tagset.cmd_size = sizeof(struct nvme_iod); dev->tagset.cmd_size = sizeof(struct nvme_iod);
dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
dev->tagset.driver_data = dev; dev->tagset.driver_data = dev;
...@@ -2321,7 +2320,7 @@ static int nvme_pci_enable(struct nvme_dev *dev) ...@@ -2321,7 +2320,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP); dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1, dev->q_depth = min_t(u16, NVME_CAP_MQES(dev->ctrl.cap) + 1,
io_queue_depth); io_queue_depth);
dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */ dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */
dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap); dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
...@@ -2876,6 +2875,7 @@ static void nvme_reset_done(struct pci_dev *pdev) ...@@ -2876,6 +2875,7 @@ static void nvme_reset_done(struct pci_dev *pdev)
static void nvme_shutdown(struct pci_dev *pdev) static void nvme_shutdown(struct pci_dev *pdev)
{ {
struct nvme_dev *dev = pci_get_drvdata(pdev); struct nvme_dev *dev = pci_get_drvdata(pdev);
nvme_disable_prepare_reset(dev, true); nvme_disable_prepare_reset(dev, true);
} }
...@@ -3006,6 +3006,7 @@ static int nvme_suspend(struct device *dev) ...@@ -3006,6 +3006,7 @@ static int nvme_suspend(struct device *dev)
static int nvme_simple_suspend(struct device *dev) static int nvme_simple_suspend(struct device *dev)
{ {
struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev)); struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
return nvme_disable_prepare_reset(ndev, true); return nvme_disable_prepare_reset(ndev, true);
} }
...@@ -3079,16 +3080,16 @@ static const struct pci_error_handlers nvme_err_handler = { ...@@ -3079,16 +3080,16 @@ static const struct pci_error_handlers nvme_err_handler = {
}; };
static const struct pci_device_id nvme_id_table[] = { static const struct pci_device_id nvme_id_table[] = {
{ PCI_VDEVICE(INTEL, 0x0953), { PCI_VDEVICE(INTEL, 0x0953), /* Intel 750/P3500/P3600/P3700 */
.driver_data = NVME_QUIRK_STRIPE_SIZE | .driver_data = NVME_QUIRK_STRIPE_SIZE |
NVME_QUIRK_DEALLOCATE_ZEROES, }, NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0x0a53), { PCI_VDEVICE(INTEL, 0x0a53), /* Intel P3520 */
.driver_data = NVME_QUIRK_STRIPE_SIZE | .driver_data = NVME_QUIRK_STRIPE_SIZE |
NVME_QUIRK_DEALLOCATE_ZEROES, }, NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0x0a54), { PCI_VDEVICE(INTEL, 0x0a54), /* Intel P4500/P4600 */
.driver_data = NVME_QUIRK_STRIPE_SIZE | .driver_data = NVME_QUIRK_STRIPE_SIZE |
NVME_QUIRK_DEALLOCATE_ZEROES, }, NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0x0a55), { PCI_VDEVICE(INTEL, 0x0a55), /* Dell Express Flash P4600 */
.driver_data = NVME_QUIRK_STRIPE_SIZE | .driver_data = NVME_QUIRK_STRIPE_SIZE |
NVME_QUIRK_DEALLOCATE_ZEROES, }, NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */
......
...@@ -46,6 +46,7 @@ struct nvme_tcp_request { ...@@ -46,6 +46,7 @@ struct nvme_tcp_request {
u32 pdu_sent; u32 pdu_sent;
u16 ttag; u16 ttag;
struct list_head entry; struct list_head entry;
struct llist_node lentry;
__le32 ddgst; __le32 ddgst;
struct bio *curr_bio; struct bio *curr_bio;
...@@ -75,9 +76,10 @@ struct nvme_tcp_queue { ...@@ -75,9 +76,10 @@ struct nvme_tcp_queue {
struct work_struct io_work; struct work_struct io_work;
int io_cpu; int io_cpu;
spinlock_t lock;
struct mutex send_mutex; struct mutex send_mutex;
struct llist_head req_list;
struct list_head send_list; struct list_head send_list;
bool more_requests;
/* recv state */ /* recv state */
void *pdu; void *pdu;
...@@ -261,15 +263,13 @@ static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req, ...@@ -261,15 +263,13 @@ static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req,
} }
static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
bool sync) bool sync, bool last)
{ {
struct nvme_tcp_queue *queue = req->queue; struct nvme_tcp_queue *queue = req->queue;
bool empty; bool empty;
spin_lock(&queue->lock); empty = llist_add(&req->lentry, &queue->req_list) &&
empty = list_empty(&queue->send_list) && !queue->request; list_empty(&queue->send_list) && !queue->request;
list_add_tail(&req->entry, &queue->send_list);
spin_unlock(&queue->lock);
/* /*
* if we're the first on the send_list and we can try to send * if we're the first on the send_list and we can try to send
...@@ -278,25 +278,42 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, ...@@ -278,25 +278,42 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
*/ */
if (queue->io_cpu == smp_processor_id() && if (queue->io_cpu == smp_processor_id() &&
sync && empty && mutex_trylock(&queue->send_mutex)) { sync && empty && mutex_trylock(&queue->send_mutex)) {
queue->more_requests = !last;
nvme_tcp_try_send(queue); nvme_tcp_try_send(queue);
queue->more_requests = false;
mutex_unlock(&queue->send_mutex); mutex_unlock(&queue->send_mutex);
} else { } else if (last) {
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
} }
} }
static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue)
{
struct nvme_tcp_request *req;
struct llist_node *node;
for (node = llist_del_all(&queue->req_list); node; node = node->next) {
req = llist_entry(node, struct nvme_tcp_request, lentry);
list_add(&req->entry, &queue->send_list);
}
}
static inline struct nvme_tcp_request * static inline struct nvme_tcp_request *
nvme_tcp_fetch_request(struct nvme_tcp_queue *queue) nvme_tcp_fetch_request(struct nvme_tcp_queue *queue)
{ {
struct nvme_tcp_request *req; struct nvme_tcp_request *req;
spin_lock(&queue->lock);
req = list_first_entry_or_null(&queue->send_list, req = list_first_entry_or_null(&queue->send_list,
struct nvme_tcp_request, entry); struct nvme_tcp_request, entry);
if (req) if (!req) {
list_del(&req->entry); nvme_tcp_process_req_list(queue);
spin_unlock(&queue->lock); req = list_first_entry_or_null(&queue->send_list,
struct nvme_tcp_request, entry);
if (unlikely(!req))
return NULL;
}
list_del(&req->entry);
return req; return req;
} }
...@@ -596,7 +613,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, ...@@ -596,7 +613,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
req->state = NVME_TCP_SEND_H2C_PDU; req->state = NVME_TCP_SEND_H2C_PDU;
req->offset = 0; req->offset = 0;
nvme_tcp_queue_request(req, false); nvme_tcp_queue_request(req, false, true);
return 0; return 0;
} }
...@@ -863,6 +880,12 @@ static void nvme_tcp_state_change(struct sock *sk) ...@@ -863,6 +880,12 @@ static void nvme_tcp_state_change(struct sock *sk)
read_unlock(&sk->sk_callback_lock); read_unlock(&sk->sk_callback_lock);
} }
static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
{
return !list_empty(&queue->send_list) ||
!llist_empty(&queue->req_list) || queue->more_requests;
}
static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
{ {
queue->request = NULL; queue->request = NULL;
...@@ -884,7 +907,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) ...@@ -884,7 +907,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
bool last = nvme_tcp_pdu_last_send(req, len); bool last = nvme_tcp_pdu_last_send(req, len);
int ret, flags = MSG_DONTWAIT; int ret, flags = MSG_DONTWAIT;
if (last && !queue->data_digest) if (last && !queue->data_digest && !nvme_tcp_queue_more(queue))
flags |= MSG_EOR; flags |= MSG_EOR;
else else
flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
...@@ -931,7 +954,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req) ...@@ -931,7 +954,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
int flags = MSG_DONTWAIT; int flags = MSG_DONTWAIT;
int ret; int ret;
if (inline_data) if (inline_data || nvme_tcp_queue_more(queue))
flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
else else
flags |= MSG_EOR; flags |= MSG_EOR;
...@@ -996,12 +1019,17 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) ...@@ -996,12 +1019,17 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
{ {
struct nvme_tcp_queue *queue = req->queue; struct nvme_tcp_queue *queue = req->queue;
int ret; int ret;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR }; struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
struct kvec iov = { struct kvec iov = {
.iov_base = &req->ddgst + req->offset, .iov_base = &req->ddgst + req->offset,
.iov_len = NVME_TCP_DIGEST_LENGTH - req->offset .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset
}; };
if (nvme_tcp_queue_more(queue))
msg.msg_flags |= MSG_MORE;
else
msg.msg_flags |= MSG_EOR;
ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
if (unlikely(ret <= 0)) if (unlikely(ret <= 0))
return ret; return ret;
...@@ -1344,8 +1372,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, ...@@ -1344,8 +1372,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
int ret, rcv_pdu_size; int ret, rcv_pdu_size;
queue->ctrl = ctrl; queue->ctrl = ctrl;
init_llist_head(&queue->req_list);
INIT_LIST_HEAD(&queue->send_list); INIT_LIST_HEAD(&queue->send_list);
spin_lock_init(&queue->lock);
mutex_init(&queue->send_mutex); mutex_init(&queue->send_mutex);
INIT_WORK(&queue->io_work, nvme_tcp_io_work); INIT_WORK(&queue->io_work, nvme_tcp_io_work);
queue->queue_size = queue_size; queue->queue_size = queue_size;
...@@ -2106,7 +2134,7 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg) ...@@ -2106,7 +2134,7 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg)
ctrl->async_req.curr_bio = NULL; ctrl->async_req.curr_bio = NULL;
ctrl->async_req.data_len = 0; ctrl->async_req.data_len = 0;
nvme_tcp_queue_request(&ctrl->async_req, true); nvme_tcp_queue_request(&ctrl->async_req, true, true);
} }
static enum blk_eh_timer_return static enum blk_eh_timer_return
...@@ -2218,6 +2246,14 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, ...@@ -2218,6 +2246,14 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
return 0; return 0;
} }
static void nvme_tcp_commit_rqs(struct blk_mq_hw_ctx *hctx)
{
struct nvme_tcp_queue *queue = hctx->driver_data;
if (!llist_empty(&queue->req_list))
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
}
static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd) const struct blk_mq_queue_data *bd)
{ {
...@@ -2237,7 +2273,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -2237,7 +2273,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(rq); blk_mq_start_request(rq);
nvme_tcp_queue_request(req, true); nvme_tcp_queue_request(req, true, bd->last);
return BLK_STS_OK; return BLK_STS_OK;
} }
...@@ -2305,6 +2341,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx) ...@@ -2305,6 +2341,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
static const struct blk_mq_ops nvme_tcp_mq_ops = { static const struct blk_mq_ops nvme_tcp_mq_ops = {
.queue_rq = nvme_tcp_queue_rq, .queue_rq = nvme_tcp_queue_rq,
.commit_rqs = nvme_tcp_commit_rqs,
.complete = nvme_complete_rq, .complete = nvme_complete_rq,
.init_request = nvme_tcp_init_request, .init_request = nvme_tcp_init_request,
.exit_request = nvme_tcp_exit_request, .exit_request = nvme_tcp_exit_request,
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2020 Western Digital Corporation or its affiliates.
*/
#include <linux/blkdev.h>
#include <linux/vmalloc.h>
#include "nvme.h"
static int nvme_set_max_append(struct nvme_ctrl *ctrl)
{
struct nvme_command c = { };
struct nvme_id_ctrl_zns *id;
int status;
id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id)
return -ENOMEM;
c.identify.opcode = nvme_admin_identify;
c.identify.cns = NVME_ID_CNS_CS_CTRL;
c.identify.csi = NVME_CSI_ZNS;
status = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
if (status) {
kfree(id);
return status;
}
if (id->zasl)
ctrl->max_zone_append = 1 << (id->zasl + 3);
else
ctrl->max_zone_append = ctrl->max_hw_sectors;
kfree(id);
return 0;
}
int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
unsigned lbaf)
{
struct nvme_effects_log *log = ns->head->effects;
struct request_queue *q = disk->queue;
struct nvme_command c = { };
struct nvme_id_ns_zns *id;
int status;
/* Driver requires zone append support */
if (!(le32_to_cpu(log->iocs[nvme_cmd_zone_append]) &
NVME_CMD_EFFECTS_CSUPP)) {
dev_warn(ns->ctrl->device,
"append not supported for zoned namespace:%d\n",
ns->head->ns_id);
return -EINVAL;
}
/* Lazily query controller append limit for the first zoned namespace */
if (!ns->ctrl->max_zone_append) {
status = nvme_set_max_append(ns->ctrl);
if (status)
return status;
}
id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id)
return -ENOMEM;
c.identify.opcode = nvme_admin_identify;
c.identify.nsid = cpu_to_le32(ns->head->ns_id);
c.identify.cns = NVME_ID_CNS_CS_NS;
c.identify.csi = NVME_CSI_ZNS;
status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, id, sizeof(*id));
if (status)
goto free_data;
/*
* We currently do not handle devices requiring any of the zoned
* operation characteristics.
*/
if (id->zoc) {
dev_warn(ns->ctrl->device,
"zone operations:%x not supported for namespace:%u\n",
le16_to_cpu(id->zoc), ns->head->ns_id);
status = -EINVAL;
goto free_data;
}
ns->zsze = nvme_lba_to_sect(ns, le64_to_cpu(id->lbafe[lbaf].zsze));
if (!is_power_of_2(ns->zsze)) {
dev_warn(ns->ctrl->device,
"invalid zone size:%llu for namespace:%u\n",
ns->zsze, ns->head->ns_id);
status = -EINVAL;
goto free_data;
}
q->limits.zoned = BLK_ZONED_HM;
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
free_data:
kfree(id);
return status;
}
static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
unsigned int nr_zones, size_t *buflen)
{
struct request_queue *q = ns->disk->queue;
size_t bufsize;
void *buf;
const size_t min_bufsize = sizeof(struct nvme_zone_report) +
sizeof(struct nvme_zone_descriptor);
nr_zones = min_t(unsigned int, nr_zones,
get_capacity(ns->disk) >> ilog2(ns->zsze));
bufsize = sizeof(struct nvme_zone_report) +
nr_zones * sizeof(struct nvme_zone_descriptor);
bufsize = min_t(size_t, bufsize,
queue_max_hw_sectors(q) << SECTOR_SHIFT);
bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
while (bufsize >= min_bufsize) {
buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
if (buf) {
*buflen = bufsize;
return buf;
}
bufsize >>= 1;
}
return NULL;
}
static int __nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
struct nvme_zone_report *report,
size_t buflen)
{
struct nvme_command c = { };
int ret;
c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector));
c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
c.zmr.zra = NVME_ZRA_ZONE_REPORT;
c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
c.zmr.pr = NVME_REPORT_ZONE_PARTIAL;
ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen);
if (ret)
return ret;
return le64_to_cpu(report->nr_zones);
}
static int nvme_zone_parse_entry(struct nvme_ns *ns,
struct nvme_zone_descriptor *entry,
unsigned int idx, report_zones_cb cb,
void *data)
{
struct blk_zone zone = { };
if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) {
dev_err(ns->ctrl->device, "invalid zone type %#x\n",
entry->zt);
return -EINVAL;
}
zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
zone.cond = entry->zs >> 4;
zone.len = ns->zsze;
zone.capacity = nvme_lba_to_sect(ns, le64_to_cpu(entry->zcap));
zone.start = nvme_lba_to_sect(ns, le64_to_cpu(entry->zslba));
zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp));
return cb(&zone, idx, data);
}
static int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data)
{
struct nvme_zone_report *report;
int ret, zone_idx = 0;
unsigned int nz, i;
size_t buflen;
report = nvme_zns_alloc_report_buffer(ns, nr_zones, &buflen);
if (!report)
return -ENOMEM;
sector &= ~(ns->zsze - 1);
while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) {
memset(report, 0, buflen);
ret = __nvme_ns_report_zones(ns, sector, report, buflen);
if (ret < 0)
goto out_free;
nz = min_t(unsigned int, ret, nr_zones);
if (!nz)
break;
for (i = 0; i < nz && zone_idx < nr_zones; i++) {
ret = nvme_zone_parse_entry(ns, &report->entries[i],
zone_idx, cb, data);
if (ret)
goto out_free;
zone_idx++;
}
sector += ns->zsze * nz;
}
if (zone_idx > 0)
ret = zone_idx;
else
ret = -EINVAL;
out_free:
kvfree(report);
return ret;
}
int nvme_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data)
{
struct nvme_ns_head *head = NULL;
struct nvme_ns *ns;
int srcu_idx, ret;
ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
if (unlikely(!ns))
return -EWOULDBLOCK;
if (ns->head->ids.csi == NVME_CSI_ZNS)
ret = nvme_ns_report_zones(ns, sector, nr_zones, cb, data);
else
ret = -EINVAL;
nvme_put_ns_from_disk(head, srcu_idx);
return ret;
}
blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
struct nvme_command *c, enum nvme_zone_mgmt_action action)
{
c->zms.opcode = nvme_cmd_zone_mgmt_send;
c->zms.nsid = cpu_to_le32(ns->head->ns_id);
c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
c->zms.zsa = action;
if (req_op(req) == REQ_OP_ZONE_RESET_ALL)
c->zms.select_all = 1;
return BLK_STS_OK;
}
...@@ -427,7 +427,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) ...@@ -427,7 +427,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->awupf = 0; id->awupf = 0;
id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */ id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
if (ctrl->ops->has_keyed_sgls) if (ctrl->ops->flags & NVMF_KEYED_SGLS)
id->sgls |= cpu_to_le32(1 << 2); id->sgls |= cpu_to_le32(1 << 2);
if (req->port->inline_data_size) if (req->port->inline_data_size)
id->sgls |= cpu_to_le32(1 << 20); id->sgls |= cpu_to_le32(1 << 20);
......
...@@ -862,14 +862,14 @@ static ssize_t nvmet_subsys_attr_version_show(struct config_item *item, ...@@ -862,14 +862,14 @@ static ssize_t nvmet_subsys_attr_version_show(struct config_item *item,
struct nvmet_subsys *subsys = to_subsys(item); struct nvmet_subsys *subsys = to_subsys(item);
if (NVME_TERTIARY(subsys->ver)) if (NVME_TERTIARY(subsys->ver))
return snprintf(page, PAGE_SIZE, "%d.%d.%d\n", return snprintf(page, PAGE_SIZE, "%llu.%llu.%llu\n",
(int)NVME_MAJOR(subsys->ver), NVME_MAJOR(subsys->ver),
(int)NVME_MINOR(subsys->ver), NVME_MINOR(subsys->ver),
(int)NVME_TERTIARY(subsys->ver)); NVME_TERTIARY(subsys->ver));
return snprintf(page, PAGE_SIZE, "%d.%d\n", return snprintf(page, PAGE_SIZE, "%llu.%llu\n",
(int)NVME_MAJOR(subsys->ver), NVME_MAJOR(subsys->ver),
(int)NVME_MINOR(subsys->ver)); NVME_MINOR(subsys->ver));
} }
static ssize_t nvmet_subsys_attr_version_store(struct config_item *item, static ssize_t nvmet_subsys_attr_version_store(struct config_item *item,
......
...@@ -336,7 +336,7 @@ int nvmet_enable_port(struct nvmet_port *port) ...@@ -336,7 +336,7 @@ int nvmet_enable_port(struct nvmet_port *port)
* If the user requested PI support and the transport isn't pi capable, * If the user requested PI support and the transport isn't pi capable,
* don't enable the port. * don't enable the port.
*/ */
if (port->pi_enable && !ops->metadata_support) { if (port->pi_enable && !(ops->flags & NVMF_METADATA_SUPPORTED)) {
pr_err("T10-PI is not supported by transport type %d\n", pr_err("T10-PI is not supported by transport type %d\n",
port->disc_addr.trtype); port->disc_addr.trtype);
ret = -EINVAL; ret = -EINVAL;
......
...@@ -277,7 +277,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req) ...@@ -277,7 +277,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req)
id->maxcmd = cpu_to_le16(NVMET_MAX_CMD); id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */ id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */
if (ctrl->ops->has_keyed_sgls) if (ctrl->ops->flags & NVMF_KEYED_SGLS)
id->sgls |= cpu_to_le32(1 << 2); id->sgls |= cpu_to_le32(1 << 2);
if (req->port->inline_data_size) if (req->port->inline_data_size)
id->sgls |= cpu_to_le32(1 << 20); id->sgls |= cpu_to_le32(1 << 20);
......
...@@ -43,6 +43,17 @@ static const match_table_t opt_tokens = { ...@@ -43,6 +43,17 @@ static const match_table_t opt_tokens = {
{ NVMF_OPT_ERR, NULL } { NVMF_OPT_ERR, NULL }
}; };
static int fcloop_verify_addr(substring_t *s)
{
size_t blen = s->to - s->from + 1;
if (strnlen(s->from, blen) != NVME_FC_TRADDR_HEXNAMELEN + 2 ||
strncmp(s->from, "0x", 2))
return -EINVAL;
return 0;
}
static int static int
fcloop_parse_options(struct fcloop_ctrl_options *opts, fcloop_parse_options(struct fcloop_ctrl_options *opts,
const char *buf) const char *buf)
...@@ -64,14 +75,16 @@ fcloop_parse_options(struct fcloop_ctrl_options *opts, ...@@ -64,14 +75,16 @@ fcloop_parse_options(struct fcloop_ctrl_options *opts,
opts->mask |= token; opts->mask |= token;
switch (token) { switch (token) {
case NVMF_OPT_WWNN: case NVMF_OPT_WWNN:
if (match_u64(args, &token64)) { if (fcloop_verify_addr(args) ||
match_u64(args, &token64)) {
ret = -EINVAL; ret = -EINVAL;
goto out_free_options; goto out_free_options;
} }
opts->wwnn = token64; opts->wwnn = token64;
break; break;
case NVMF_OPT_WWPN: case NVMF_OPT_WWPN:
if (match_u64(args, &token64)) { if (fcloop_verify_addr(args) ||
match_u64(args, &token64)) {
ret = -EINVAL; ret = -EINVAL;
goto out_free_options; goto out_free_options;
} }
...@@ -92,14 +105,16 @@ fcloop_parse_options(struct fcloop_ctrl_options *opts, ...@@ -92,14 +105,16 @@ fcloop_parse_options(struct fcloop_ctrl_options *opts,
opts->fcaddr = token; opts->fcaddr = token;
break; break;
case NVMF_OPT_LPWWNN: case NVMF_OPT_LPWWNN:
if (match_u64(args, &token64)) { if (fcloop_verify_addr(args) ||
match_u64(args, &token64)) {
ret = -EINVAL; ret = -EINVAL;
goto out_free_options; goto out_free_options;
} }
opts->lpwwnn = token64; opts->lpwwnn = token64;
break; break;
case NVMF_OPT_LPWWPN: case NVMF_OPT_LPWWPN:
if (match_u64(args, &token64)) { if (fcloop_verify_addr(args) ||
match_u64(args, &token64)) {
ret = -EINVAL; ret = -EINVAL;
goto out_free_options; goto out_free_options;
} }
...@@ -141,14 +156,16 @@ fcloop_parse_nm_options(struct device *dev, u64 *nname, u64 *pname, ...@@ -141,14 +156,16 @@ fcloop_parse_nm_options(struct device *dev, u64 *nname, u64 *pname,
token = match_token(p, opt_tokens, args); token = match_token(p, opt_tokens, args);
switch (token) { switch (token) {
case NVMF_OPT_WWNN: case NVMF_OPT_WWNN:
if (match_u64(args, &token64)) { if (fcloop_verify_addr(args) ||
match_u64(args, &token64)) {
ret = -EINVAL; ret = -EINVAL;
goto out_free_options; goto out_free_options;
} }
*nname = token64; *nname = token64;
break; break;
case NVMF_OPT_WWPN: case NVMF_OPT_WWPN:
if (match_u64(args, &token64)) { if (fcloop_verify_addr(args) ||
match_u64(args, &token64)) {
ret = -EINVAL; ret = -EINVAL;
goto out_free_options; goto out_free_options;
} }
......
...@@ -36,7 +36,6 @@ struct nvme_loop_ctrl { ...@@ -36,7 +36,6 @@ struct nvme_loop_ctrl {
struct nvme_loop_iod async_event_iod; struct nvme_loop_iod async_event_iod;
struct nvme_ctrl ctrl; struct nvme_ctrl ctrl;
struct nvmet_ctrl *target_ctrl;
struct nvmet_port *port; struct nvmet_port *port;
}; };
......
...@@ -286,8 +286,9 @@ struct nvmet_fabrics_ops { ...@@ -286,8 +286,9 @@ struct nvmet_fabrics_ops {
struct module *owner; struct module *owner;
unsigned int type; unsigned int type;
unsigned int msdbd; unsigned int msdbd;
bool has_keyed_sgls : 1; unsigned int flags;
bool metadata_support : 1; #define NVMF_KEYED_SGLS (1 << 0)
#define NVMF_METADATA_SUPPORTED (1 << 1)
void (*queue_response)(struct nvmet_req *req); void (*queue_response)(struct nvmet_req *req);
int (*add_port)(struct nvmet_port *port); int (*add_port)(struct nvmet_port *port);
void (*remove_port)(struct nvmet_port *port); void (*remove_port)(struct nvmet_port *port);
......
...@@ -1970,8 +1970,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = { ...@@ -1970,8 +1970,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.type = NVMF_TRTYPE_RDMA, .type = NVMF_TRTYPE_RDMA,
.msdbd = 1, .msdbd = 1,
.has_keyed_sgls = 1, .flags = NVMF_KEYED_SGLS | NVMF_METADATA_SUPPORTED,
.metadata_support = 1,
.add_port = nvmet_rdma_add_port, .add_port = nvmet_rdma_add_port,
.remove_port = nvmet_rdma_remove_port, .remove_port = nvmet_rdma_remove_port,
.queue_response = nvmet_rdma_queue_response, .queue_response = nvmet_rdma_queue_response,
......
...@@ -459,17 +459,11 @@ static void nvmet_setup_response_pdu(struct nvmet_tcp_cmd *cmd) ...@@ -459,17 +459,11 @@ static void nvmet_setup_response_pdu(struct nvmet_tcp_cmd *cmd)
static void nvmet_tcp_process_resp_list(struct nvmet_tcp_queue *queue) static void nvmet_tcp_process_resp_list(struct nvmet_tcp_queue *queue)
{ {
struct llist_node *node; struct llist_node *node;
struct nvmet_tcp_cmd *cmd;
node = llist_del_all(&queue->resp_list); for (node = llist_del_all(&queue->resp_list); node; node = node->next) {
if (!node) cmd = llist_entry(node, struct nvmet_tcp_cmd, lentry);
return;
while (node) {
struct nvmet_tcp_cmd *cmd = llist_entry(node,
struct nvmet_tcp_cmd, lentry);
list_add(&cmd->entry, &queue->resp_send_list); list_add(&cmd->entry, &queue->resp_send_list);
node = node->next;
queue->send_list_len++; queue->send_list_len++;
} }
} }
...@@ -1717,7 +1711,6 @@ static const struct nvmet_fabrics_ops nvmet_tcp_ops = { ...@@ -1717,7 +1711,6 @@ static const struct nvmet_fabrics_ops nvmet_tcp_ops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.type = NVMF_TRTYPE_TCP, .type = NVMF_TRTYPE_TCP,
.msdbd = 1, .msdbd = 1,
.has_keyed_sgls = 0,
.add_port = nvmet_tcp_add_port, .add_port = nvmet_tcp_add_port,
.remove_port = nvmet_tcp_remove_port, .remove_port = nvmet_tcp_remove_port,
.queue_response = nvmet_tcp_queue_response, .queue_response = nvmet_tcp_queue_response,
......
...@@ -59,6 +59,7 @@ static int sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf, ...@@ -59,6 +59,7 @@ static int sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf,
zone.non_seq = 1; zone.non_seq = 1;
zone.len = logical_to_sectors(sdp, get_unaligned_be64(&buf[8])); zone.len = logical_to_sectors(sdp, get_unaligned_be64(&buf[8]));
zone.capacity = zone.len;
zone.start = logical_to_sectors(sdp, get_unaligned_be64(&buf[16])); zone.start = logical_to_sectors(sdp, get_unaligned_be64(&buf[16]));
zone.wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24])); zone.wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24]));
if (zone.type != ZBC_ZONE_TYPE_CONV && if (zone.type != ZBC_ZONE_TYPE_CONV &&
......
...@@ -132,6 +132,7 @@ enum { ...@@ -132,6 +132,7 @@ enum {
#define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff) #define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff)
#define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf) #define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf)
#define NVME_CAP_NSSRC(cap) (((cap) >> 36) & 0x1) #define NVME_CAP_NSSRC(cap) (((cap) >> 36) & 0x1)
#define NVME_CAP_CSS(cap) (((cap) >> 37) & 0xff)
#define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) #define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf)
#define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf) #define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf)
...@@ -162,7 +163,6 @@ enum { ...@@ -162,7 +163,6 @@ enum {
enum { enum {
NVME_CC_ENABLE = 1 << 0, NVME_CC_ENABLE = 1 << 0,
NVME_CC_CSS_NVM = 0 << 4,
NVME_CC_EN_SHIFT = 0, NVME_CC_EN_SHIFT = 0,
NVME_CC_CSS_SHIFT = 4, NVME_CC_CSS_SHIFT = 4,
NVME_CC_MPS_SHIFT = 7, NVME_CC_MPS_SHIFT = 7,
...@@ -170,6 +170,9 @@ enum { ...@@ -170,6 +170,9 @@ enum {
NVME_CC_SHN_SHIFT = 14, NVME_CC_SHN_SHIFT = 14,
NVME_CC_IOSQES_SHIFT = 16, NVME_CC_IOSQES_SHIFT = 16,
NVME_CC_IOCQES_SHIFT = 20, NVME_CC_IOCQES_SHIFT = 20,
NVME_CC_CSS_NVM = 0 << NVME_CC_CSS_SHIFT,
NVME_CC_CSS_CSI = 6 << NVME_CC_CSS_SHIFT,
NVME_CC_CSS_MASK = 7 << NVME_CC_CSS_SHIFT,
NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT,
NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT,
NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT,
...@@ -179,6 +182,8 @@ enum { ...@@ -179,6 +182,8 @@ enum {
NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT, NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT,
NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT, NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT,
NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT, NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT,
NVME_CAP_CSS_NVM = 1 << 0,
NVME_CAP_CSS_CSI = 1 << 6,
NVME_CSTS_RDY = 1 << 0, NVME_CSTS_RDY = 1 << 0,
NVME_CSTS_CFS = 1 << 1, NVME_CSTS_CFS = 1 << 1,
NVME_CSTS_NSSRO = 1 << 4, NVME_CSTS_NSSRO = 1 << 4,
...@@ -369,11 +374,37 @@ struct nvme_id_ns { ...@@ -369,11 +374,37 @@ struct nvme_id_ns {
__u8 vs[3712]; __u8 vs[3712];
}; };
struct nvme_zns_lbafe {
__le64 zsze;
__u8 zdes;
__u8 rsvd9[7];
};
struct nvme_id_ns_zns {
__le16 zoc;
__le16 ozcs;
__le32 mar;
__le32 mor;
__le32 rrl;
__le32 frl;
__u8 rsvd20[2796];
struct nvme_zns_lbafe lbafe[16];
__u8 rsvd3072[768];
__u8 vs[256];
};
struct nvme_id_ctrl_zns {
__u8 zasl;
__u8 rsvd1[4095];
};
enum { enum {
NVME_ID_CNS_NS = 0x00, NVME_ID_CNS_NS = 0x00,
NVME_ID_CNS_CTRL = 0x01, NVME_ID_CNS_CTRL = 0x01,
NVME_ID_CNS_NS_ACTIVE_LIST = 0x02, NVME_ID_CNS_NS_ACTIVE_LIST = 0x02,
NVME_ID_CNS_NS_DESC_LIST = 0x03, NVME_ID_CNS_NS_DESC_LIST = 0x03,
NVME_ID_CNS_CS_NS = 0x05,
NVME_ID_CNS_CS_CTRL = 0x06,
NVME_ID_CNS_NS_PRESENT_LIST = 0x10, NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
NVME_ID_CNS_NS_PRESENT = 0x11, NVME_ID_CNS_NS_PRESENT = 0x11,
NVME_ID_CNS_CTRL_NS_LIST = 0x12, NVME_ID_CNS_CTRL_NS_LIST = 0x12,
...@@ -383,6 +414,11 @@ enum { ...@@ -383,6 +414,11 @@ enum {
NVME_ID_CNS_UUID_LIST = 0x17, NVME_ID_CNS_UUID_LIST = 0x17,
}; };
enum {
NVME_CSI_NVM = 0,
NVME_CSI_ZNS = 2,
};
enum { enum {
NVME_DIR_IDENTIFY = 0x00, NVME_DIR_IDENTIFY = 0x00,
NVME_DIR_STREAMS = 0x01, NVME_DIR_STREAMS = 0x01,
...@@ -435,11 +471,13 @@ struct nvme_ns_id_desc { ...@@ -435,11 +471,13 @@ struct nvme_ns_id_desc {
#define NVME_NIDT_EUI64_LEN 8 #define NVME_NIDT_EUI64_LEN 8
#define NVME_NIDT_NGUID_LEN 16 #define NVME_NIDT_NGUID_LEN 16
#define NVME_NIDT_UUID_LEN 16 #define NVME_NIDT_UUID_LEN 16
#define NVME_NIDT_CSI_LEN 1
enum { enum {
NVME_NIDT_EUI64 = 0x01, NVME_NIDT_EUI64 = 0x01,
NVME_NIDT_NGUID = 0x02, NVME_NIDT_NGUID = 0x02,
NVME_NIDT_UUID = 0x03, NVME_NIDT_UUID = 0x03,
NVME_NIDT_CSI = 0x04,
}; };
struct nvme_smart_log { struct nvme_smart_log {
...@@ -519,6 +557,27 @@ struct nvme_ana_rsp_hdr { ...@@ -519,6 +557,27 @@ struct nvme_ana_rsp_hdr {
__le16 rsvd10[3]; __le16 rsvd10[3];
}; };
struct nvme_zone_descriptor {
__u8 zt;
__u8 zs;
__u8 za;
__u8 rsvd3[5];
__le64 zcap;
__le64 zslba;
__le64 wp;
__u8 rsvd32[32];
};
enum {
NVME_ZONE_TYPE_SEQWRITE_REQ = 0x2,
};
struct nvme_zone_report {
__le64 nr_zones;
__u8 resv8[56];
struct nvme_zone_descriptor entries[];
};
enum { enum {
NVME_SMART_CRIT_SPARE = 1 << 0, NVME_SMART_CRIT_SPARE = 1 << 0,
NVME_SMART_CRIT_TEMPERATURE = 1 << 1, NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
...@@ -613,6 +672,9 @@ enum nvme_opcode { ...@@ -613,6 +672,9 @@ enum nvme_opcode {
nvme_cmd_resv_report = 0x0e, nvme_cmd_resv_report = 0x0e,
nvme_cmd_resv_acquire = 0x11, nvme_cmd_resv_acquire = 0x11,
nvme_cmd_resv_release = 0x15, nvme_cmd_resv_release = 0x15,
nvme_cmd_zone_mgmt_send = 0x79,
nvme_cmd_zone_mgmt_recv = 0x7a,
nvme_cmd_zone_append = 0x7d,
}; };
#define nvme_opcode_name(opcode) { opcode, #opcode } #define nvme_opcode_name(opcode) { opcode, #opcode }
...@@ -751,6 +813,7 @@ struct nvme_rw_command { ...@@ -751,6 +813,7 @@ struct nvme_rw_command {
enum { enum {
NVME_RW_LR = 1 << 15, NVME_RW_LR = 1 << 15,
NVME_RW_FUA = 1 << 14, NVME_RW_FUA = 1 << 14,
NVME_RW_APPEND_PIREMAP = 1 << 9,
NVME_RW_DSM_FREQ_UNSPEC = 0, NVME_RW_DSM_FREQ_UNSPEC = 0,
NVME_RW_DSM_FREQ_TYPICAL = 1, NVME_RW_DSM_FREQ_TYPICAL = 1,
NVME_RW_DSM_FREQ_RARE = 2, NVME_RW_DSM_FREQ_RARE = 2,
...@@ -816,6 +879,53 @@ struct nvme_write_zeroes_cmd { ...@@ -816,6 +879,53 @@ struct nvme_write_zeroes_cmd {
__le16 appmask; __le16 appmask;
}; };
enum nvme_zone_mgmt_action {
NVME_ZONE_CLOSE = 0x1,
NVME_ZONE_FINISH = 0x2,
NVME_ZONE_OPEN = 0x3,
NVME_ZONE_RESET = 0x4,
NVME_ZONE_OFFLINE = 0x5,
NVME_ZONE_SET_DESC_EXT = 0x10,
};
struct nvme_zone_mgmt_send_cmd {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__le32 cdw2[2];
__le64 metadata;
union nvme_data_ptr dptr;
__le64 slba;
__le32 cdw12;
__u8 zsa;
__u8 select_all;
__u8 rsvd13[2];
__le32 cdw14[2];
};
struct nvme_zone_mgmt_recv_cmd {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__le64 rsvd2[2];
union nvme_data_ptr dptr;
__le64 slba;
__le32 numd;
__u8 zra;
__u8 zrasf;
__u8 pr;
__u8 rsvd13;
__le32 cdw14[2];
};
enum {
NVME_ZRA_ZONE_REPORT = 0,
NVME_ZRASF_ZONE_REPORT_ALL = 0,
NVME_REPORT_ZONE_PARTIAL = 1,
};
/* Features */ /* Features */
enum { enum {
...@@ -972,7 +1082,9 @@ struct nvme_identify { ...@@ -972,7 +1082,9 @@ struct nvme_identify {
__u8 cns; __u8 cns;
__u8 rsvd3; __u8 rsvd3;
__le16 ctrlid; __le16 ctrlid;
__u32 rsvd11[5]; __u8 rsvd11[3];
__u8 csi;
__u32 rsvd12[4];
}; };
#define NVME_IDENTIFY_DATA_SIZE 4096 #define NVME_IDENTIFY_DATA_SIZE 4096
...@@ -1086,7 +1198,9 @@ struct nvme_get_log_page_command { ...@@ -1086,7 +1198,9 @@ struct nvme_get_log_page_command {
}; };
__le64 lpo; __le64 lpo;
}; };
__u32 rsvd14[2]; __u8 rsvd14[3];
__u8 csi;
__u32 rsvd15;
}; };
struct nvme_directive_cmd { struct nvme_directive_cmd {
...@@ -1283,6 +1397,8 @@ struct nvme_command { ...@@ -1283,6 +1397,8 @@ struct nvme_command {
struct nvme_format_cmd format; struct nvme_format_cmd format;
struct nvme_dsm_cmd dsm; struct nvme_dsm_cmd dsm;
struct nvme_write_zeroes_cmd write_zeroes; struct nvme_write_zeroes_cmd write_zeroes;
struct nvme_zone_mgmt_send_cmd zms;
struct nvme_zone_mgmt_recv_cmd zmr;
struct nvme_abort_cmd abort; struct nvme_abort_cmd abort;
struct nvme_get_log_page_command get_log_page; struct nvme_get_log_page_command get_log_page;
struct nvmf_common_command fabrics; struct nvmf_common_command fabrics;
...@@ -1416,6 +1532,18 @@ enum { ...@@ -1416,6 +1532,18 @@ enum {
NVME_SC_DISCOVERY_RESTART = 0x190, NVME_SC_DISCOVERY_RESTART = 0x190,
NVME_SC_AUTH_REQUIRED = 0x191, NVME_SC_AUTH_REQUIRED = 0x191,
/*
* I/O Command Set Specific - Zoned commands:
*/
NVME_SC_ZONE_BOUNDARY_ERROR = 0x1b8,
NVME_SC_ZONE_FULL = 0x1b9,
NVME_SC_ZONE_READ_ONLY = 0x1ba,
NVME_SC_ZONE_OFFLINE = 0x1bb,
NVME_SC_ZONE_INVALID_WRITE = 0x1bc,
NVME_SC_ZONE_TOO_MANY_ACTIVE = 0x1bd,
NVME_SC_ZONE_TOO_MANY_OPEN = 0x1be,
NVME_SC_ZONE_INVALID_TRANSITION = 0x1bf,
/* /*
* Media and Data Integrity Errors: * Media and Data Integrity Errors:
*/ */
......
...@@ -73,6 +73,15 @@ enum blk_zone_cond { ...@@ -73,6 +73,15 @@ enum blk_zone_cond {
BLK_ZONE_COND_OFFLINE = 0xF, BLK_ZONE_COND_OFFLINE = 0xF,
}; };
/**
* enum blk_zone_report_flags - Feature flags of reported zone descriptors.
*
* @BLK_ZONE_REP_CAPACITY: Zone descriptor has capacity field.
*/
enum blk_zone_report_flags {
BLK_ZONE_REP_CAPACITY = (1 << 0),
};
/** /**
* struct blk_zone - Zone descriptor for BLKREPORTZONE ioctl. * struct blk_zone - Zone descriptor for BLKREPORTZONE ioctl.
* *
...@@ -99,7 +108,9 @@ struct blk_zone { ...@@ -99,7 +108,9 @@ struct blk_zone {
__u8 cond; /* Zone condition */ __u8 cond; /* Zone condition */
__u8 non_seq; /* Non-sequential write resources active */ __u8 non_seq; /* Non-sequential write resources active */
__u8 reset; /* Reset write pointer recommended */ __u8 reset; /* Reset write pointer recommended */
__u8 reserved[36]; __u8 resv[4];
__u64 capacity; /* Zone capacity in number of sectors */
__u8 reserved[24];
}; };
/** /**
...@@ -115,7 +126,7 @@ struct blk_zone { ...@@ -115,7 +126,7 @@ struct blk_zone {
struct blk_zone_report { struct blk_zone_report {
__u64 sector; __u64 sector;
__u32 nr_zones; __u32 nr_zones;
__u8 reserved[4]; __u32 flags;
struct blk_zone zones[0]; struct blk_zone zones[0];
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment