Commit f87b0f0d authored by Jens Axboe's avatar Jens Axboe

Merge branch 'nvme-4.19' of git://git.infradead.org/nvme into for-4.19/block2

Pull NVMe changes from Christoph:

"This contains the support for TP4004, Asymmetric Namespace Access,
 which makes NVMe multipathing usable in practice."

* 'nvme-4.19' of git://git.infradead.org/nvme:
  nvmet: use Retain Async Event bit to clear AEN
  nvmet: support configuring ANA groups
  nvmet: add minimal ANA support
  nvmet: track and limit the number of namespaces per subsystem
  nvmet: keep a port pointer in nvmet_ctrl
  nvme: add ANA support
  nvme: remove nvme_req_needs_failover
  nvme: simplify the API for getting log pages
  nvme.h: add ANA definitions
  nvme.h: add support for the log specific field
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parents 05b9ba4b b369b30c
......@@ -252,7 +252,8 @@ void nvme_complete_rq(struct request *req)
trace_nvme_complete_rq(req);
if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
if (nvme_req_needs_failover(req, status)) {
if ((req->cmd_flags & REQ_NVME_MPATH) &&
blk_path_error(status)) {
nvme_failover_req(req);
return;
}
......@@ -1067,7 +1068,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
EXPORT_SYMBOL_GPL(nvme_set_queue_count);
#define NVME_AEN_SUPPORTED \
(NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT)
(NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | NVME_AEN_CFG_ANA_CHANGE)
static void nvme_enable_aen(struct nvme_ctrl *ctrl)
{
......@@ -2281,21 +2282,16 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
return ret;
}
int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u8 log_page, void *log,
size_t size, u64 offset)
int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
void *log, size_t size, u64 offset)
{
struct nvme_command c = { };
unsigned long dwlen = size / 4 - 1;
c.get_log_page.opcode = nvme_admin_get_log_page;
if (ns)
c.get_log_page.nsid = cpu_to_le32(ns->head->ns_id);
else
c.get_log_page.nsid = cpu_to_le32(NVME_NSID_ALL);
c.get_log_page.nsid = cpu_to_le32(nsid);
c.get_log_page.lid = log_page;
c.get_log_page.lsp = lsp;
c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1));
c.get_log_page.numdu = cpu_to_le16(dwlen >> 16);
c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset));
......@@ -2304,12 +2300,6 @@ int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
}
static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log,
size_t size)
{
return nvme_get_log_ext(ctrl, NULL, log_page, log, size, 0);
}
static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
{
int ret;
......@@ -2320,8 +2310,8 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
if (!ctrl->effects)
return 0;
ret = nvme_get_log(ctrl, NVME_LOG_CMD_EFFECTS, ctrl->effects,
sizeof(*ctrl->effects));
ret = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CMD_EFFECTS, 0,
ctrl->effects, sizeof(*ctrl->effects), 0);
if (ret) {
kfree(ctrl->effects);
ctrl->effects = NULL;
......@@ -2412,6 +2402,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
nvme_set_queue_limits(ctrl, ctrl->admin_q);
ctrl->sgls = le32_to_cpu(id->sgls);
ctrl->kas = le16_to_cpu(id->kas);
ctrl->max_namespaces = le32_to_cpu(id->mnan);
if (id->rtd3e) {
/* us -> s */
......@@ -2471,8 +2462,12 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
}
ret = nvme_mpath_init(ctrl, id);
kfree(id);
if (ret < 0)
return ret;
if (ctrl->apst_enabled && !prev_apst_enabled)
dev_pm_qos_expose_latency_tolerance(ctrl->device);
else if (!ctrl->apst_enabled && prev_apst_enabled)
......@@ -2691,6 +2686,10 @@ static struct attribute *nvme_ns_id_attrs[] = {
&dev_attr_nguid.attr,
&dev_attr_eui.attr,
&dev_attr_nsid.attr,
#ifdef CONFIG_NVME_MULTIPATH
&dev_attr_ana_grpid.attr,
&dev_attr_ana_state.attr,
#endif
NULL,
};
......@@ -2713,6 +2712,14 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
return 0;
}
#ifdef CONFIG_NVME_MULTIPATH
if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
if (dev_to_disk(dev)->fops != &nvme_fops) /* per-path attr */
return 0;
if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
return 0;
}
#endif
return a->mode;
}
......@@ -3086,8 +3093,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
nvme_get_ctrl(ctrl);
kfree(id);
device_add_disk(ctrl->device, ns->disk);
if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
&nvme_ns_id_attr_group))
......@@ -3097,8 +3102,10 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
ns->disk->disk_name);
nvme_mpath_add_disk(ns->head);
nvme_mpath_add_disk(ns, id);
nvme_fault_inject_init(ns);
kfree(id);
return;
out_unlink_ns:
mutex_lock(&ctrl->subsys->lock);
......@@ -3240,7 +3247,8 @@ static void nvme_clear_changed_ns_log(struct nvme_ctrl *ctrl)
* raced with us in reading the log page, which could cause us to miss
* updates.
*/
error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size);
error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CHANGED_NS, 0, log,
log_size, 0);
if (error)
dev_warn(ctrl->device,
"reading changed ns log failed: %d\n", error);
......@@ -3357,9 +3365,9 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
if (!log)
return;
if (nvme_get_log(ctrl, NVME_LOG_FW_SLOT, log, sizeof(*log)))
dev_warn(ctrl->device,
"Get FW SLOT INFO log error\n");
if (nvme_get_log(ctrl, NVME_NSID_ALL, 0, NVME_LOG_FW_SLOT, log,
sizeof(*log), 0))
dev_warn(ctrl->device, "Get FW SLOT INFO log error\n");
kfree(log);
}
......@@ -3405,6 +3413,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
case NVME_AER_NOTICE_FW_ACT_STARTING:
queue_work(nvme_wq, &ctrl->fw_act_work);
break;
#ifdef CONFIG_NVME_MULTIPATH
case NVME_AER_NOTICE_ANA:
if (!ctrl->ana_log_buf)
break;
queue_work(nvme_wq, &ctrl->ana_work);
break;
#endif
default:
dev_warn(ctrl->device, "async event result %08x\n", result);
}
......@@ -3437,6 +3452,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
{
nvme_mpath_stop(ctrl);
nvme_stop_keep_alive(ctrl);
flush_work(&ctrl->async_event_work);
flush_work(&ctrl->scan_work);
......@@ -3474,6 +3490,7 @@ static void nvme_free_ctrl(struct device *dev)
ida_simple_remove(&nvme_instance_ida, ctrl->instance);
kfree(ctrl->effects);
nvme_mpath_uninit(ctrl);
if (subsys) {
mutex_lock(&subsys->lock);
......
......@@ -604,8 +604,9 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
while (left) {
len = min_t(unsigned int, left, max_len);
ret = nvme_get_log_ext(ctrl, ns, NVME_NVM_LOG_REPORT_CHUNK,
dev_meta, len, offset);
ret = nvme_get_log(ctrl, ns->head->ns_id,
NVME_NVM_LOG_REPORT_CHUNK, 0, dev_meta, len,
offset);
if (ret) {
dev_err(ctrl->device, "Get REPORT CHUNK log error\n");
break;
......
This diff is collapsed.
......@@ -183,6 +183,7 @@ struct nvme_ctrl {
u16 oacs;
u16 nssa;
u16 nr_streams;
u32 max_namespaces;
atomic_t abort_limit;
u8 vwc;
u32 vs;
......@@ -205,6 +206,19 @@ struct nvme_ctrl {
struct work_struct fw_act_work;
unsigned long events;
#ifdef CONFIG_NVME_MULTIPATH
/* asymmetric namespace access: */
u8 anacap;
u8 anatt;
u32 anagrpmax;
u32 nanagrpid;
struct mutex ana_lock;
struct nvme_ana_rsp_hdr *ana_log_buf;
size_t ana_log_size;
struct timer_list anatt_timer;
struct work_struct ana_work;
#endif
/* Power saving configuration */
u64 ps_max_latency_us;
bool apst_enabled;
......@@ -269,6 +283,7 @@ struct nvme_ns_head {
struct bio_list requeue_list;
spinlock_t requeue_lock;
struct work_struct requeue_work;
struct mutex lock;
#endif
struct list_head list;
struct srcu_struct srcu;
......@@ -295,6 +310,10 @@ struct nvme_ns {
struct nvme_ctrl *ctrl;
struct request_queue *queue;
struct gendisk *disk;
#ifdef CONFIG_NVME_MULTIPATH
enum nvme_ana_state ana_state;
u32 ana_grpid;
#endif
struct list_head siblings;
struct nvm_dev *ndev;
struct kref kref;
......@@ -309,6 +328,7 @@ struct nvme_ns {
unsigned long flags;
#define NVME_NS_REMOVING 0
#define NVME_NS_DEAD 1
#define NVME_NS_ANA_PENDING 2
u16 noiob;
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
......@@ -436,21 +456,24 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);
int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u8 log_page, void *log, size_t size, u64 offset);
int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
void *log, size_t size, u64 offset);
extern const struct attribute_group nvme_ns_id_attr_group;
extern const struct block_device_operations nvme_ns_head_ops;
#ifdef CONFIG_NVME_MULTIPATH
bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl);
void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
struct nvme_ctrl *ctrl, int *flags);
void nvme_failover_req(struct request *req);
bool nvme_req_needs_failover(struct request *req, blk_status_t error);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
void nvme_mpath_add_disk(struct nvme_ns_head *head);
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
void nvme_mpath_remove_disk(struct nvme_ns_head *head);
int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
void nvme_mpath_stop(struct nvme_ctrl *ctrl);
static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
......@@ -469,7 +492,14 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
kblockd_schedule_work(&head->requeue_work);
}
extern struct device_attribute dev_attr_ana_grpid;
extern struct device_attribute dev_attr_ana_state;
#else
static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
{
return false;
}
/*
* Without the multipath code enabled, multiple controller per subsystems are
* visible as devices and thus we cannot use the subsystem instance.
......@@ -483,11 +513,6 @@ static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
static inline void nvme_failover_req(struct request *req)
{
}
static inline bool nvme_req_needs_failover(struct request *req,
blk_status_t error)
{
return false;
}
static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
{
}
......@@ -496,7 +521,8 @@ static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,
{
return 0;
}
static inline void nvme_mpath_add_disk(struct nvme_ns_head *head)
static inline void nvme_mpath_add_disk(struct nvme_ns *ns,
struct nvme_id_ns *id)
{
}
static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
......@@ -508,6 +534,17 @@ static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
{
}
static inline int nvme_mpath_init(struct nvme_ctrl *ctrl,
struct nvme_id_ctrl *id)
{
return 0;
}
static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
{
}
static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl)
{
}
#endif /* CONFIG_NVME_MULTIPATH */
#ifdef CONFIG_NVM
......
......@@ -19,6 +19,19 @@
#include <asm/unaligned.h>
#include "nvmet.h"
/*
* This helper allows us to clear the AEN based on the RAE bit,
* Please use this helper when processing the log pages which are
* associated with the AEN.
*/
static inline void nvmet_clear_aen(struct nvmet_req *req, u32 aen_bit)
{
int rae = le32_to_cpu(req->cmd->common.cdw10[0]) & 1 << 15;
if (!rae)
clear_bit(aen_bit, &req->sq->ctrl->aen_masked);
}
u32 nvmet_get_log_page_len(struct nvme_command *cmd)
{
u32 len = le16_to_cpu(cmd->get_log_page.numdu);
......@@ -176,12 +189,76 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req)
if (!status)
status = nvmet_zero_sgl(req, len, req->data_len - len);
ctrl->nr_changed_ns = 0;
clear_bit(NVME_AEN_CFG_NS_ATTR, &ctrl->aen_masked);
nvmet_clear_aen(req, NVME_AEN_CFG_NS_ATTR);
mutex_unlock(&ctrl->lock);
out:
nvmet_req_complete(req, status);
}
static u32 nvmet_format_ana_group(struct nvmet_req *req, u32 grpid,
struct nvme_ana_group_desc *desc)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmet_ns *ns;
u32 count = 0;
if (!(req->cmd->get_log_page.lsp & NVME_ANA_LOG_RGO)) {
rcu_read_lock();
list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link)
if (ns->anagrpid == grpid)
desc->nsids[count++] = cpu_to_le32(ns->nsid);
rcu_read_unlock();
}
desc->grpid = cpu_to_le32(grpid);
desc->nnsids = cpu_to_le32(count);
desc->chgcnt = cpu_to_le64(nvmet_ana_chgcnt);
desc->state = req->port->ana_state[grpid];
memset(desc->rsvd17, 0, sizeof(desc->rsvd17));
return sizeof(struct nvme_ana_group_desc) + count * sizeof(__le32);
}
static void nvmet_execute_get_log_page_ana(struct nvmet_req *req)
{
struct nvme_ana_rsp_hdr hdr = { 0, };
struct nvme_ana_group_desc *desc;
size_t offset = sizeof(struct nvme_ana_rsp_hdr); /* start beyond hdr */
size_t len;
u32 grpid;
u16 ngrps = 0;
u16 status;
status = NVME_SC_INTERNAL;
desc = kmalloc(sizeof(struct nvme_ana_group_desc) +
NVMET_MAX_NAMESPACES * sizeof(__le32), GFP_KERNEL);
if (!desc)
goto out;
down_read(&nvmet_ana_sem);
for (grpid = 1; grpid <= NVMET_MAX_ANAGRPS; grpid++) {
if (!nvmet_ana_group_enabled[grpid])
continue;
len = nvmet_format_ana_group(req, grpid, desc);
status = nvmet_copy_to_sgl(req, offset, desc, len);
if (status)
break;
offset += len;
ngrps++;
}
hdr.chgcnt = cpu_to_le64(nvmet_ana_chgcnt);
hdr.ngrps = cpu_to_le16(ngrps);
nvmet_clear_aen(req, NVME_AEN_CFG_ANA_CHANGE);
up_read(&nvmet_ana_sem);
kfree(desc);
/* copy the header last once we know the number of groups */
status = nvmet_copy_to_sgl(req, 0, &hdr, sizeof(hdr));
out:
nvmet_req_complete(req, status);
}
static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
......@@ -213,8 +290,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
* the safest is to leave it as zeroes.
*/
/* we support multiple ports and multiples hosts: */
id->cmic = (1 << 0) | (1 << 1);
/* we support multiple ports, multiples hosts and ANA: */
id->cmic = (1 << 0) | (1 << 1) | (1 << 3);
/* no limit on data transfer sizes for now */
id->mdts = 0;
......@@ -252,6 +329,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
id->nn = cpu_to_le32(ctrl->subsys->max_nsid);
id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES);
id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM |
NVME_CTRL_ONCS_WRITE_ZEROES);
......@@ -281,6 +359,11 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->msdbd = ctrl->ops->msdbd;
id->anacap = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
id->anatt = 10; /* random value */
id->anagrpmax = cpu_to_le32(NVMET_MAX_ANAGRPS);
id->nanagrpid = cpu_to_le32(NVMET_MAX_ANAGRPS);
/*
* Meh, we don't really support any power state. Fake up the same
* values that qemu does.
......@@ -322,8 +405,15 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
* nuse = ncap = nsze isn't always true, but we have no way to find
* that out from the underlying device.
*/
id->ncap = id->nuse = id->nsze =
cpu_to_le64(ns->size >> ns->blksize_shift);
id->ncap = id->nsze = cpu_to_le64(ns->size >> ns->blksize_shift);
switch (req->port->ana_state[ns->anagrpid]) {
case NVME_ANA_INACCESSIBLE:
case NVME_ANA_PERSISTENT_LOSS:
break;
default:
id->nuse = id->nsze;
break;
}
/*
* We just provide a single LBA format that matches what the
......@@ -337,6 +427,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
* controllers, but also with any other user of the block device.
*/
id->nmic = (1 << 0);
id->anagrpid = cpu_to_le32(ns->anagrpid);
memcpy(&id->nguid, &ns->nguid, sizeof(id->nguid));
......@@ -619,6 +710,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
case NVME_LOG_CMD_EFFECTS:
req->execute = nvmet_execute_get_log_cmd_effects_ns;
return 0;
case NVME_LOG_ANA:
req->execute = nvmet_execute_get_log_page_ana;
return 0;
}
break;
case nvme_admin_identify:
......
......@@ -411,6 +411,39 @@ static ssize_t nvmet_ns_device_nguid_store(struct config_item *item,
CONFIGFS_ATTR(nvmet_ns_, device_nguid);
static ssize_t nvmet_ns_ana_grpid_show(struct config_item *item, char *page)
{
return sprintf(page, "%u\n", to_nvmet_ns(item)->anagrpid);
}
static ssize_t nvmet_ns_ana_grpid_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_ns *ns = to_nvmet_ns(item);
u32 oldgrpid, newgrpid;
int ret;
ret = kstrtou32(page, 0, &newgrpid);
if (ret)
return ret;
if (newgrpid < 1 || newgrpid > NVMET_MAX_ANAGRPS)
return -EINVAL;
down_write(&nvmet_ana_sem);
oldgrpid = ns->anagrpid;
nvmet_ana_group_enabled[newgrpid]++;
ns->anagrpid = newgrpid;
nvmet_ana_group_enabled[oldgrpid]--;
nvmet_ana_chgcnt++;
up_write(&nvmet_ana_sem);
nvmet_send_ana_event(ns->subsys, NULL);
return count;
}
CONFIGFS_ATTR(nvmet_ns_, ana_grpid);
static ssize_t nvmet_ns_enable_show(struct config_item *item, char *page)
{
return sprintf(page, "%d\n", to_nvmet_ns(item)->enabled);
......@@ -468,6 +501,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_device_path,
&nvmet_ns_attr_device_nguid,
&nvmet_ns_attr_device_uuid,
&nvmet_ns_attr_ana_grpid,
&nvmet_ns_attr_enable,
&nvmet_ns_attr_buffered_io,
NULL,
......@@ -916,6 +950,134 @@ static const struct config_item_type nvmet_referrals_type = {
.ct_group_ops = &nvmet_referral_group_ops,
};
static struct {
enum nvme_ana_state state;
const char *name;
} nvmet_ana_state_names[] = {
{ NVME_ANA_OPTIMIZED, "optimized" },
{ NVME_ANA_NONOPTIMIZED, "non-optimized" },
{ NVME_ANA_INACCESSIBLE, "inaccessible" },
{ NVME_ANA_PERSISTENT_LOSS, "persistent-loss" },
{ NVME_ANA_CHANGE, "change" },
};
static ssize_t nvmet_ana_group_ana_state_show(struct config_item *item,
char *page)
{
struct nvmet_ana_group *grp = to_ana_group(item);
enum nvme_ana_state state = grp->port->ana_state[grp->grpid];
int i;
for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) {
if (state != nvmet_ana_state_names[i].state)
continue;
return sprintf(page, "%s\n", nvmet_ana_state_names[i].name);
}
return sprintf(page, "\n");
}
static ssize_t nvmet_ana_group_ana_state_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_ana_group *grp = to_ana_group(item);
int i;
for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) {
if (sysfs_streq(page, nvmet_ana_state_names[i].name))
goto found;
}
pr_err("Invalid value '%s' for ana_state\n", page);
return -EINVAL;
found:
down_write(&nvmet_ana_sem);
grp->port->ana_state[grp->grpid] = nvmet_ana_state_names[i].state;
nvmet_ana_chgcnt++;
up_write(&nvmet_ana_sem);
nvmet_port_send_ana_event(grp->port);
return count;
}
CONFIGFS_ATTR(nvmet_ana_group_, ana_state);
static struct configfs_attribute *nvmet_ana_group_attrs[] = {
&nvmet_ana_group_attr_ana_state,
NULL,
};
static void nvmet_ana_group_release(struct config_item *item)
{
struct nvmet_ana_group *grp = to_ana_group(item);
if (grp == &grp->port->ana_default_group)
return;
down_write(&nvmet_ana_sem);
grp->port->ana_state[grp->grpid] = NVME_ANA_INACCESSIBLE;
nvmet_ana_group_enabled[grp->grpid]--;
up_write(&nvmet_ana_sem);
nvmet_port_send_ana_event(grp->port);
kfree(grp);
}
static struct configfs_item_operations nvmet_ana_group_item_ops = {
.release = nvmet_ana_group_release,
};
static const struct config_item_type nvmet_ana_group_type = {
.ct_item_ops = &nvmet_ana_group_item_ops,
.ct_attrs = nvmet_ana_group_attrs,
.ct_owner = THIS_MODULE,
};
static struct config_group *nvmet_ana_groups_make_group(
struct config_group *group, const char *name)
{
struct nvmet_port *port = ana_groups_to_port(&group->cg_item);
struct nvmet_ana_group *grp;
u32 grpid;
int ret;
ret = kstrtou32(name, 0, &grpid);
if (ret)
goto out;
ret = -EINVAL;
if (grpid <= 1 || grpid > NVMET_MAX_ANAGRPS)
goto out;
ret = -ENOMEM;
grp = kzalloc(sizeof(*grp), GFP_KERNEL);
if (!grp)
goto out;
grp->port = port;
grp->grpid = grpid;
down_write(&nvmet_ana_sem);
nvmet_ana_group_enabled[grpid]++;
up_write(&nvmet_ana_sem);
nvmet_port_send_ana_event(grp->port);
config_group_init_type_name(&grp->group, name, &nvmet_ana_group_type);
return &grp->group;
out:
return ERR_PTR(ret);
}
static struct configfs_group_operations nvmet_ana_groups_group_ops = {
.make_group = nvmet_ana_groups_make_group,
};
static const struct config_item_type nvmet_ana_groups_type = {
.ct_group_ops = &nvmet_ana_groups_group_ops,
.ct_owner = THIS_MODULE,
};
/*
* Ports definitions.
*/
......@@ -923,6 +1085,7 @@ static void nvmet_port_release(struct config_item *item)
{
struct nvmet_port *port = to_nvmet_port(item);
kfree(port->ana_state);
kfree(port);
}
......@@ -951,6 +1114,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
{
struct nvmet_port *port;
u16 portid;
u32 i;
if (kstrtou16(name, 0, &portid))
return ERR_PTR(-EINVAL);
......@@ -959,6 +1123,20 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
if (!port)
return ERR_PTR(-ENOMEM);
port->ana_state = kcalloc(NVMET_MAX_ANAGRPS + 1,
sizeof(*port->ana_state), GFP_KERNEL);
if (!port->ana_state) {
kfree(port);
return ERR_PTR(-ENOMEM);
}
for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) {
if (i == NVMET_DEFAULT_ANA_GRPID)
port->ana_state[1] = NVME_ANA_OPTIMIZED;
else
port->ana_state[i] = NVME_ANA_INACCESSIBLE;
}
INIT_LIST_HEAD(&port->entry);
INIT_LIST_HEAD(&port->subsystems);
INIT_LIST_HEAD(&port->referrals);
......@@ -975,6 +1153,18 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
"referrals", &nvmet_referrals_type);
configfs_add_default_group(&port->referrals_group, &port->group);
config_group_init_type_name(&port->ana_groups_group,
"ana_groups", &nvmet_ana_groups_type);
configfs_add_default_group(&port->ana_groups_group, &port->group);
port->ana_default_group.port = port;
port->ana_default_group.grpid = NVMET_DEFAULT_ANA_GRPID;
config_group_init_type_name(&port->ana_default_group.group,
__stringify(NVMET_DEFAULT_ANA_GRPID),
&nvmet_ana_group_type);
configfs_add_default_group(&port->ana_default_group.group,
&port->ana_groups_group);
return &port->group;
}
......
......@@ -40,6 +40,10 @@ static DEFINE_IDA(cntlid_ida);
*/
DECLARE_RWSEM(nvmet_config_sem);
u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
u64 nvmet_ana_chgcnt;
DECLARE_RWSEM(nvmet_ana_sem);
static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
const char *subsysnqn);
......@@ -190,6 +194,33 @@ static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
}
}
void nvmet_send_ana_event(struct nvmet_subsys *subsys,
struct nvmet_port *port)
{
struct nvmet_ctrl *ctrl;
mutex_lock(&subsys->lock);
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
if (port && ctrl->port != port)
continue;
if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_ANA_CHANGE))
continue;
nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
NVME_AER_NOTICE_ANA, NVME_LOG_ANA);
}
mutex_unlock(&subsys->lock);
}
void nvmet_port_send_ana_event(struct nvmet_port *port)
{
struct nvmet_subsys_link *p;
down_read(&nvmet_config_sem);
list_for_each_entry(p, &port->subsystems, entry)
nvmet_send_ana_event(p->subsys, port);
up_read(&nvmet_config_sem);
}
int nvmet_register_transport(const struct nvmet_fabrics_ops *ops)
{
int ret = 0;
......@@ -337,9 +368,13 @@ static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
int nvmet_ns_enable(struct nvmet_ns *ns)
{
struct nvmet_subsys *subsys = ns->subsys;
int ret = 0;
int ret;
mutex_lock(&subsys->lock);
ret = -EMFILE;
if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES)
goto out_unlock;
ret = 0;
if (ns->enabled)
goto out_unlock;
......@@ -374,6 +409,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
list_add_tail_rcu(&ns->dev_link, &old->dev_link);
}
subsys->nr_namespaces++;
nvmet_ns_changed(subsys, ns->nsid);
ns->enabled = true;
......@@ -414,6 +450,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
percpu_ref_exit(&ns->ref);
mutex_lock(&subsys->lock);
subsys->nr_namespaces--;
nvmet_ns_changed(subsys, ns->nsid);
nvmet_ns_dev_disable(ns);
out_unlock:
......@@ -424,6 +461,10 @@ void nvmet_ns_free(struct nvmet_ns *ns)
{
nvmet_ns_disable(ns);
down_write(&nvmet_ana_sem);
nvmet_ana_group_enabled[ns->anagrpid]--;
up_write(&nvmet_ana_sem);
kfree(ns->device_path);
kfree(ns);
}
......@@ -441,6 +482,12 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
ns->nsid = nsid;
ns->subsys = subsys;
down_write(&nvmet_ana_sem);
ns->anagrpid = NVMET_DEFAULT_ANA_GRPID;
nvmet_ana_group_enabled[ns->anagrpid]++;
up_write(&nvmet_ana_sem);
uuid_gen(&ns->uuid);
ns->buffered_io = false;
......@@ -548,6 +595,20 @@ int nvmet_sq_init(struct nvmet_sq *sq)
}
EXPORT_SYMBOL_GPL(nvmet_sq_init);
static inline u16 nvmet_check_ana_state(struct nvmet_port *port,
struct nvmet_ns *ns)
{
enum nvme_ana_state state = port->ana_state[ns->anagrpid];
if (unlikely(state == NVME_ANA_INACCESSIBLE))
return NVME_SC_ANA_INACCESSIBLE;
if (unlikely(state == NVME_ANA_PERSISTENT_LOSS))
return NVME_SC_ANA_PERSISTENT_LOSS;
if (unlikely(state == NVME_ANA_CHANGE))
return NVME_SC_ANA_TRANSITION;
return 0;
}
static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
{
struct nvme_command *cmd = req->cmd;
......@@ -560,6 +621,9 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
if (unlikely(!req->ns))
return NVME_SC_INVALID_NS | NVME_SC_DNR;
ret = nvmet_check_ana_state(req->port, req->ns);
if (unlikely(ret))
return ret;
if (req->ns->file)
return nvmet_file_parse_io_cmd(req);
......@@ -876,6 +940,8 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
nvmet_init_cap(ctrl);
ctrl->port = req->port;
INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
INIT_LIST_HEAD(&ctrl->async_events);
......@@ -1115,12 +1181,15 @@ static int __init nvmet_init(void)
{
int error;
nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1;
buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
WQ_MEM_RECLAIM, 0);
if (!buffered_io_wq) {
error = -ENOMEM;
goto out;
}
error = nvmet_init_discovery();
if (error)
goto out;
......
......@@ -30,12 +30,11 @@
#define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128
/*
* Supported optional AENs:
*/
#define NVMET_AEN_CFG_OPTIONAL \
NVME_AEN_CFG_NS_ATTR
(NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_ANA_CHANGE)
/*
* Plus mandatory SMART AENs (we'll never send them, but allow enabling them):
......@@ -64,6 +63,7 @@ struct nvmet_ns {
loff_t size;
u8 nguid[16];
uuid_t uuid;
u32 anagrpid;
bool buffered_io;
bool enabled;
......@@ -98,6 +98,18 @@ struct nvmet_sq {
struct completion confirm_done;
};
struct nvmet_ana_group {
struct config_group group;
struct nvmet_port *port;
u32 grpid;
};
static inline struct nvmet_ana_group *to_ana_group(struct config_item *item)
{
return container_of(to_config_group(item), struct nvmet_ana_group,
group);
}
/**
* struct nvmet_port - Common structure to keep port
* information for the target.
......@@ -115,6 +127,9 @@ struct nvmet_port {
struct list_head subsystems;
struct config_group referrals_group;
struct list_head referrals;
struct config_group ana_groups_group;
struct nvmet_ana_group ana_default_group;
enum nvme_ana_state *ana_state;
void *priv;
bool enabled;
int inline_data_size;
......@@ -126,6 +141,13 @@ static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
group);
}
static inline struct nvmet_port *ana_groups_to_port(
struct config_item *item)
{
return container_of(to_config_group(item), struct nvmet_port,
ana_groups_group);
}
struct nvmet_ctrl {
struct nvmet_subsys *subsys;
struct nvmet_cq **cqs;
......@@ -140,6 +162,8 @@ struct nvmet_ctrl {
u16 cntlid;
u32 kato;
struct nvmet_port *port;
u32 aen_enabled;
unsigned long aen_masked;
struct nvmet_req *async_event_cmds[NVMET_ASYNC_EVENTS];
......@@ -168,6 +192,7 @@ struct nvmet_subsys {
struct kref ref;
struct list_head namespaces;
unsigned int nr_namespaces;
unsigned int max_nsid;
struct list_head ctrls;
......@@ -340,6 +365,10 @@ void nvmet_ns_disable(struct nvmet_ns *ns);
struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid);
void nvmet_ns_free(struct nvmet_ns *ns);
void nvmet_send_ana_event(struct nvmet_subsys *subsys,
struct nvmet_port *port);
void nvmet_port_send_ana_event(struct nvmet_port *port);
int nvmet_register_transport(const struct nvmet_fabrics_ops *ops);
void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops);
......@@ -360,6 +389,22 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd);
#define NVMET_QUEUE_SIZE 1024
#define NVMET_NR_QUEUES 128
#define NVMET_MAX_CMD NVMET_QUEUE_SIZE
/*
* Nice round number that makes a list of nsids fit into a page.
* Should become tunable at some point in the future.
*/
#define NVMET_MAX_NAMESPACES 1024
/*
* 0 is not a valid ANA group ID, so we start numbering at 1.
*
* ANA Group 1 exists without manual intervention, has namespaces assigned to it
* by default, and is available in an optimized state through all ports.
*/
#define NVMET_MAX_ANAGRPS 128
#define NVMET_DEFAULT_ANA_GRPID 1
#define NVMET_KAS 10
#define NVMET_DISC_KATO 120
......@@ -373,6 +418,10 @@ extern struct nvmet_subsys *nvmet_disc_subsys;
extern u64 nvmet_genctr;
extern struct rw_semaphore nvmet_config_sem;
extern u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
extern u64 nvmet_ana_chgcnt;
extern struct rw_semaphore nvmet_ana_sem;
bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
const char *hostnqn);
......
......@@ -242,7 +242,12 @@ struct nvme_id_ctrl {
__le32 sanicap;
__le32 hmminds;
__le16 hmmaxd;
__u8 rsvd338[174];
__u8 rsvd338[4];
__u8 anatt;
__u8 anacap;
__le32 anagrpmax;
__le32 nanagrpid;
__u8 rsvd352[160];
__u8 sqes;
__u8 cqes;
__le16 maxcmd;
......@@ -258,7 +263,8 @@ struct nvme_id_ctrl {
__le16 acwu;
__u8 rsvd534[2];
__le32 sgls;
__u8 rsvd540[228];
__le32 mnan;
__u8 rsvd544[224];
char subnqn[256];
__u8 rsvd1024[768];
__le32 ioccsz;
......@@ -312,7 +318,9 @@ struct nvme_id_ns {
__le16 nabspf;
__le16 noiob;
__u8 nvmcap[16];
__u8 rsvd64[40];
__u8 rsvd64[28];
__le32 anagrpid;
__u8 rsvd96[8];
__u8 nguid[16];
__u8 eui64[8];
struct nvme_lbaf lbaf[16];
......@@ -425,6 +433,32 @@ struct nvme_effects_log {
__u8 resv[2048];
};
enum nvme_ana_state {
NVME_ANA_OPTIMIZED = 0x01,
NVME_ANA_NONOPTIMIZED = 0x02,
NVME_ANA_INACCESSIBLE = 0x03,
NVME_ANA_PERSISTENT_LOSS = 0x04,
NVME_ANA_CHANGE = 0x0f,
};
struct nvme_ana_group_desc {
__le32 grpid;
__le32 nnsids;
__le64 chgcnt;
__u8 state;
__u8 rsvd17[7];
__le32 nsids[];
};
/* flag for the log specific field of the ANA log */
#define NVME_ANA_LOG_RGO (1 << 0)
struct nvme_ana_rsp_hdr {
__le64 chgcnt;
__le16 ngrps;
__le16 rsvd10[3];
};
enum {
NVME_SMART_CRIT_SPARE = 1 << 0,
NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
......@@ -444,11 +478,13 @@ enum {
enum {
NVME_AER_NOTICE_NS_CHANGED = 0x00,
NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,
NVME_AER_NOTICE_ANA = 0x03,
};
enum {
NVME_AEN_CFG_NS_ATTR = 1 << 8,
NVME_AEN_CFG_FW_ACT = 1 << 9,
NVME_AEN_CFG_ANA_CHANGE = 1 << 11,
};
struct nvme_lba_range_type {
......@@ -763,6 +799,7 @@ enum {
NVME_LOG_FW_SLOT = 0x03,
NVME_LOG_CHANGED_NS = 0x04,
NVME_LOG_CMD_EFFECTS = 0x05,
NVME_LOG_ANA = 0x0c,
NVME_LOG_DISC = 0x70,
NVME_LOG_RESERVATION = 0x80,
NVME_FWACT_REPL = (0 << 3),
......@@ -885,7 +922,7 @@ struct nvme_get_log_page_command {
__u64 rsvd2[2];
union nvme_data_ptr dptr;
__u8 lid;
__u8 rsvd10;
__u8 lsp; /* upper 4 bits reserved */
__le16 numdl;
__le16 numdu;
__u16 rsvd11;
......@@ -1185,6 +1222,13 @@ enum {
NVME_SC_ACCESS_DENIED = 0x286,
NVME_SC_UNWRITTEN_BLOCK = 0x287,
/*
* Path-related Errors:
*/
NVME_SC_ANA_PERSISTENT_LOSS = 0x301,
NVME_SC_ANA_INACCESSIBLE = 0x302,
NVME_SC_ANA_TRANSITION = 0x303,
NVME_SC_DNR = 0x4000,
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment