Commit 354201c5 authored by Christoph Hellwig's avatar Christoph Hellwig

nvme: add support for TP4084 - Time-to-Ready Enhancements

Add support for using longer timeouts during controller initialization
and letting the controller come up with namespaces that are not ready
for I/O yet.  We skip these not ready namespaces during scanning and
only bring them online once anoter scan is kicked off by the AEN that
is set when the NRDY bit gets set in the  I/O Command Set Independent
Identify Namespace Data Structure.   This asynchronous probing avoids
blocking the kernel boot when controllers take a very long time to
recover after unclean shutdowns (up to minutes).
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarKeith Busch <kbusch@kernel.org>
Reviewed-by: default avatarChaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: default avatarHannes Reinecke <hare@suse.de>
parent da14f237
...@@ -91,6 +91,7 @@ static const char * const nvme_statuses[] = { ...@@ -91,6 +91,7 @@ static const char * const nvme_statuses[] = {
[NVME_SC_NS_WRITE_PROTECTED] = "Namespace is Write Protected", [NVME_SC_NS_WRITE_PROTECTED] = "Namespace is Write Protected",
[NVME_SC_CMD_INTERRUPTED] = "Command Interrupted", [NVME_SC_CMD_INTERRUPTED] = "Command Interrupted",
[NVME_SC_TRANSIENT_TR_ERR] = "Transient Transport Error", [NVME_SC_TRANSIENT_TR_ERR] = "Transient Transport Error",
[NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY] = "Admin Command Media Not Ready",
[NVME_SC_INVALID_IO_CMD_SET] = "Invalid IO Command Set", [NVME_SC_INVALID_IO_CMD_SET] = "Invalid IO Command Set",
[NVME_SC_LBA_RANGE] = "LBA Out of Range", [NVME_SC_LBA_RANGE] = "LBA Out of Range",
[NVME_SC_CAP_EXCEEDED] = "Capacity Exceeded", [NVME_SC_CAP_EXCEEDED] = "Capacity Exceeded",
......
...@@ -1427,6 +1427,32 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, ...@@ -1427,6 +1427,32 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
return error; return error;
} }
static int nvme_identify_ns_cs_indep(struct nvme_ctrl *ctrl, unsigned nsid,
struct nvme_id_ns_cs_indep **id)
{
struct nvme_command c = {
.identify.opcode = nvme_admin_identify,
.identify.nsid = cpu_to_le32(nsid),
.identify.cns = NVME_ID_CNS_NS_CS_INDEP,
};
int ret;
*id = kmalloc(sizeof(**id), GFP_KERNEL);
if (!*id)
return -ENOMEM;
ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id));
if (ret) {
dev_warn(ctrl->device,
"Identify namespace (CS independent) failed (%d)\n",
ret);
kfree(*id);
return ret;
}
return 0;
}
static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid, static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
unsigned int dword11, void *buffer, size_t buflen, u32 *result) unsigned int dword11, void *buffer, size_t buflen, u32 *result)
{ {
...@@ -2103,10 +2129,9 @@ static const struct block_device_operations nvme_bdev_ops = { ...@@ -2103,10 +2129,9 @@ static const struct block_device_operations nvme_bdev_ops = {
.pr_ops = &nvme_pr_ops, .pr_ops = &nvme_pr_ops,
}; };
static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
{ {
unsigned long timeout = unsigned long timeout_jiffies = ((timeout + 1) * HZ / 2) + jiffies;
((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
u32 csts, bit = enabled ? NVME_CSTS_RDY : 0; u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
int ret; int ret;
...@@ -2119,7 +2144,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) ...@@ -2119,7 +2144,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
usleep_range(1000, 2000); usleep_range(1000, 2000);
if (fatal_signal_pending(current)) if (fatal_signal_pending(current))
return -EINTR; return -EINTR;
if (time_after(jiffies, timeout)) { if (time_after(jiffies, timeout_jiffies)) {
dev_err(ctrl->device, dev_err(ctrl->device,
"Device not ready; aborting %s, CSTS=0x%x\n", "Device not ready; aborting %s, CSTS=0x%x\n",
enabled ? "initialisation" : "reset", csts); enabled ? "initialisation" : "reset", csts);
...@@ -2150,13 +2175,14 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl) ...@@ -2150,13 +2175,14 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl)
if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
msleep(NVME_QUIRK_DELAY_AMOUNT); msleep(NVME_QUIRK_DELAY_AMOUNT);
return nvme_wait_ready(ctrl, ctrl->cap, false); return nvme_wait_ready(ctrl, NVME_CAP_TIMEOUT(ctrl->cap), false);
} }
EXPORT_SYMBOL_GPL(nvme_disable_ctrl); EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
int nvme_enable_ctrl(struct nvme_ctrl *ctrl) int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
{ {
unsigned dev_page_min; unsigned dev_page_min;
u32 timeout;
int ret; int ret;
ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap); ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
...@@ -2177,6 +2203,27 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) ...@@ -2177,6 +2203,27 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
ctrl->ctrl_config = NVME_CC_CSS_CSI; ctrl->ctrl_config = NVME_CC_CSS_CSI;
else else
ctrl->ctrl_config = NVME_CC_CSS_NVM; ctrl->ctrl_config = NVME_CC_CSS_NVM;
if (ctrl->cap & NVME_CAP_CRMS_CRWMS) {
u32 crto;
ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto);
if (ret) {
dev_err(ctrl->device, "Reading CRTO failed (%d)\n",
ret);
return ret;
}
if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
ctrl->ctrl_config |= NVME_CC_CRIME;
timeout = NVME_CRTO_CRIMT(crto);
} else {
timeout = NVME_CRTO_CRWMT(crto);
}
} else {
timeout = NVME_CAP_TIMEOUT(ctrl->cap);
}
ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT; ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
...@@ -2185,7 +2232,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) ...@@ -2185,7 +2232,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config); ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
if (ret) if (ret)
return ret; return ret;
return nvme_wait_ready(ctrl, ctrl->cap, true); return nvme_wait_ready(ctrl, timeout, true);
} }
EXPORT_SYMBOL_GPL(nvme_enable_ctrl); EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
...@@ -4092,11 +4139,26 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids) ...@@ -4092,11 +4139,26 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids)
static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{ {
struct nvme_ns_ids ids = { }; struct nvme_ns_ids ids = { };
struct nvme_id_ns_cs_indep *id;
struct nvme_ns *ns; struct nvme_ns *ns;
bool ready = true;
if (nvme_identify_ns_descs(ctrl, nsid, &ids)) if (nvme_identify_ns_descs(ctrl, nsid, &ids))
return; return;
/*
* Check if the namespace is ready. If not ignore it, we will get an
* AEN once it becomes ready and restart the scan.
*/
if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) &&
!nvme_identify_ns_cs_indep(ctrl, nsid, &id)) {
ready = id->nstat & NVME_NSTAT_NRDY;
kfree(id);
}
if (!ready)
return;
ns = nvme_find_get_ns(ctrl, nsid); ns = nvme_find_get_ns(ctrl, nsid);
if (ns) { if (ns) {
nvme_validate_ns(ns, &ids); nvme_validate_ns(ns, &ids);
...@@ -4841,6 +4903,8 @@ static inline void _nvme_check_size(void) ...@@ -4841,6 +4903,8 @@ static inline void _nvme_check_size(void)
BUILD_BUG_ON(sizeof(struct nvme_command) != 64); BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns_cs_indep) !=
NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ns_nvm) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns_nvm) != NVME_IDENTIFY_DATA_SIZE);
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE);
......
...@@ -137,6 +137,7 @@ enum { ...@@ -137,6 +137,7 @@ enum {
NVME_REG_CMBMSC = 0x0050, /* Controller Memory Buffer Memory NVME_REG_CMBMSC = 0x0050, /* Controller Memory Buffer Memory
* Space Control * Space Control
*/ */
NVME_REG_CRTO = 0x0068, /* Controller Ready Timeouts */
NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */ NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */
NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */ NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */
NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */ NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */
...@@ -161,6 +162,9 @@ enum { ...@@ -161,6 +162,9 @@ enum {
#define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7) #define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7)
#define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff) #define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff)
#define NVME_CRTO_CRIMT(crto) ((crto) >> 16)
#define NVME_CRTO_CRWMT(crto) ((crto) & 0xffff)
enum { enum {
NVME_CMBSZ_SQS = 1 << 0, NVME_CMBSZ_SQS = 1 << 0,
NVME_CMBSZ_CQS = 1 << 1, NVME_CMBSZ_CQS = 1 << 1,
...@@ -204,6 +208,7 @@ enum { ...@@ -204,6 +208,7 @@ enum {
NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT, NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT,
NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT, NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT,
NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT, NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT,
NVME_CC_CRIME = 1 << 24,
}; };
enum { enum {
...@@ -227,6 +232,11 @@ enum { ...@@ -227,6 +232,11 @@ enum {
NVME_CAP_CSS_CSI = 1 << 6, NVME_CAP_CSS_CSI = 1 << 6,
}; };
enum {
NVME_CAP_CRMS_CRIMS = 1ULL << 59,
NVME_CAP_CRMS_CRWMS = 1ULL << 60,
};
struct nvme_id_power_state { struct nvme_id_power_state {
__le16 max_power; /* centiwatts */ __le16 max_power; /* centiwatts */
__u8 rsvd2; __u8 rsvd2;
...@@ -414,6 +424,21 @@ struct nvme_id_ns { ...@@ -414,6 +424,21 @@ struct nvme_id_ns {
__u8 vs[3712]; __u8 vs[3712];
}; };
/* I/O Command Set Independent Identify Namespace Data Structure */
struct nvme_id_ns_cs_indep {
__u8 nsfeat;
__u8 nmic;
__u8 rescap;
__u8 fpi;
__le32 anagrpid;
__u8 nsattr;
__u8 rsvd9;
__le16 nvmsetid;
__le16 endgid;
__u8 nstat;
__u8 rsvd15[4081];
};
struct nvme_zns_lbafe { struct nvme_zns_lbafe {
__le64 zsze; __le64 zsze;
__u8 zdes; __u8 zdes;
...@@ -478,6 +503,7 @@ enum { ...@@ -478,6 +503,7 @@ enum {
NVME_ID_CNS_NS_DESC_LIST = 0x03, NVME_ID_CNS_NS_DESC_LIST = 0x03,
NVME_ID_CNS_CS_NS = 0x05, NVME_ID_CNS_CS_NS = 0x05,
NVME_ID_CNS_CS_CTRL = 0x06, NVME_ID_CNS_CS_CTRL = 0x06,
NVME_ID_CNS_NS_CS_INDEP = 0x08,
NVME_ID_CNS_NS_PRESENT_LIST = 0x10, NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
NVME_ID_CNS_NS_PRESENT = 0x11, NVME_ID_CNS_NS_PRESENT = 0x11,
NVME_ID_CNS_CTRL_NS_LIST = 0x12, NVME_ID_CNS_CTRL_NS_LIST = 0x12,
...@@ -531,6 +557,10 @@ enum { ...@@ -531,6 +557,10 @@ enum {
NVME_NS_DPS_PI_TYPE3 = 3, NVME_NS_DPS_PI_TYPE3 = 3,
}; };
enum {
NVME_NSTAT_NRDY = 1 << 0,
};
enum { enum {
NVME_NVM_NS_16B_GUARD = 0, NVME_NVM_NS_16B_GUARD = 0,
NVME_NVM_NS_32B_GUARD = 1, NVME_NVM_NS_32B_GUARD = 1,
...@@ -1592,6 +1622,7 @@ enum { ...@@ -1592,6 +1622,7 @@ enum {
NVME_SC_NS_WRITE_PROTECTED = 0x20, NVME_SC_NS_WRITE_PROTECTED = 0x20,
NVME_SC_CMD_INTERRUPTED = 0x21, NVME_SC_CMD_INTERRUPTED = 0x21,
NVME_SC_TRANSIENT_TR_ERR = 0x22, NVME_SC_TRANSIENT_TR_ERR = 0x22,
NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY = 0x24,
NVME_SC_INVALID_IO_CMD_SET = 0x2C, NVME_SC_INVALID_IO_CMD_SET = 0x2C,
NVME_SC_LBA_RANGE = 0x80, NVME_SC_LBA_RANGE = 0x80,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment