Commit 3e2cb3ad authored by Jens Axboe's avatar Jens Axboe

Merge branch 'nvme-4.15' of git://git.infradead.org/nvme into for-4.15/block

Pull NVMe changes from Christoph:

"Below are the currently queue nvme updates for Linux 4.15.  There are
a few more things that could make it for this merge window, but I'd
like to get things into linux-next, especially for the unlikely case
that Linus decided to cut -rc8.

Highlights:
 - support for SGLs in the PCIe driver (Chaitanya Kulkarni)
 - disable I/O schedulers for the admin queue (Israel Rukshin)
 - various Fibre Channel fixes and enhancements (James Smart)
 - various refactoring for better code sharing between transports
   (Sagi Grimberg and me)

as well as lots of little bits from various contributors."
parents 474f5da2 a806c6c8
......@@ -298,12 +298,12 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
}
EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
int blk_mq_reinit_tagset(struct blk_mq_tag_set *set,
int (reinit_request)(void *, struct request *))
int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data,
int (fn)(void *, struct request *))
{
int i, j, ret = 0;
if (WARN_ON_ONCE(!reinit_request))
if (WARN_ON_ONCE(!fn))
goto out;
for (i = 0; i < set->nr_hw_queues; i++) {
......@@ -316,8 +316,7 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set,
if (!tags->static_rqs[j])
continue;
ret = reinit_request(set->driver_data,
tags->static_rqs[j]);
ret = fn(data, tags->static_rqs[j]);
if (ret)
goto out;
}
......@@ -326,7 +325,7 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set,
out:
return ret;
}
EXPORT_SYMBOL_GPL(blk_mq_reinit_tagset);
EXPORT_SYMBOL_GPL(blk_mq_tagset_iter);
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
void *priv)
......
menu "NVME Support"
source "drivers/nvme/host/Kconfig"
source "drivers/nvme/target/Kconfig"
endmenu
......@@ -34,13 +34,13 @@
#define NVME_MINORS (1U << MINORBITS)
unsigned char admin_timeout = 60;
module_param(admin_timeout, byte, 0644);
unsigned int admin_timeout = 60;
module_param(admin_timeout, uint, 0644);
MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
EXPORT_SYMBOL_GPL(admin_timeout);
unsigned char nvme_io_timeout = 30;
module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
unsigned int nvme_io_timeout = 30;
module_param_named(io_timeout, nvme_io_timeout, uint, 0644);
MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
EXPORT_SYMBOL_GPL(nvme_io_timeout);
......@@ -52,9 +52,6 @@ static u8 nvme_max_retries = 5;
module_param_named(max_retries, nvme_max_retries, byte, 0644);
MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
static int nvme_char_major;
module_param(nvme_char_major, int, 0);
static unsigned long default_ps_max_latency_us = 100000;
module_param(default_ps_max_latency_us, ulong, 0644);
MODULE_PARM_DESC(default_ps_max_latency_us,
......@@ -71,9 +68,8 @@ MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
struct workqueue_struct *nvme_wq;
EXPORT_SYMBOL_GPL(nvme_wq);
static LIST_HEAD(nvme_ctrl_list);
static DEFINE_SPINLOCK(dev_list_lock);
static DEFINE_IDA(nvme_instance_ida);
static dev_t nvme_chr_devt;
static struct class *nvme_class;
static __le32 nvme_get_log_dw10(u8 lid, size_t size)
......@@ -101,6 +97,46 @@ static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
return ret;
}
static void nvme_delete_ctrl_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl =
container_of(work, struct nvme_ctrl, delete_work);
flush_work(&ctrl->reset_work);
nvme_stop_ctrl(ctrl);
nvme_remove_namespaces(ctrl);
ctrl->ops->delete_ctrl(ctrl);
nvme_uninit_ctrl(ctrl);
nvme_put_ctrl(ctrl);
}
int nvme_delete_ctrl(struct nvme_ctrl *ctrl)
{
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING))
return -EBUSY;
if (!queue_work(nvme_wq, &ctrl->delete_work))
return -EBUSY;
return 0;
}
EXPORT_SYMBOL_GPL(nvme_delete_ctrl);
int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
{
int ret = 0;
/*
* Keep a reference until the work is flushed since ->delete_ctrl
* can free the controller.
*/
nvme_get_ctrl(ctrl);
ret = nvme_delete_ctrl(ctrl);
if (!ret)
flush_work(&ctrl->delete_work);
nvme_put_ctrl(ctrl);
return ret;
}
EXPORT_SYMBOL_GPL(nvme_delete_ctrl_sync);
static blk_status_t nvme_error_status(struct request *req)
{
switch (nvme_req(req)->status & 0x7ff) {
......@@ -205,6 +241,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
case NVME_CTRL_RECONNECTING:
switch (old_state) {
case NVME_CTRL_LIVE:
case NVME_CTRL_RESETTING:
changed = true;
/* FALLTHRU */
default:
......@@ -251,12 +288,6 @@ static void nvme_free_ns(struct kref *kref)
if (ns->ndev)
nvme_nvm_unregister(ns);
if (ns->disk) {
spin_lock(&dev_list_lock);
ns->disk->private_data = NULL;
spin_unlock(&dev_list_lock);
}
put_disk(ns->disk);
ida_simple_remove(&ns->ctrl->ns_ida, ns->instance);
nvme_put_ctrl(ns->ctrl);
......@@ -268,29 +299,6 @@ static void nvme_put_ns(struct nvme_ns *ns)
kref_put(&ns->kref, nvme_free_ns);
}
static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
{
struct nvme_ns *ns;
spin_lock(&dev_list_lock);
ns = disk->private_data;
if (ns) {
if (!kref_get_unless_zero(&ns->kref))
goto fail;
if (!try_module_get(ns->ctrl->ops->module))
goto fail_put_ns;
}
spin_unlock(&dev_list_lock);
return ns;
fail_put_ns:
kref_put(&ns->kref, nvme_free_ns);
fail:
spin_unlock(&dev_list_lock);
return NULL;
}
struct request *nvme_alloc_request(struct request_queue *q,
struct nvme_command *cmd, unsigned int flags, int qid)
{
......@@ -1052,27 +1060,18 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
}
}
#ifdef CONFIG_COMPAT
static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
return nvme_ioctl(bdev, mode, cmd, arg);
}
#else
#define nvme_compat_ioctl NULL
#endif
static int nvme_open(struct block_device *bdev, fmode_t mode)
{
return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO;
struct nvme_ns *ns = bdev->bd_disk->private_data;
if (!kref_get_unless_zero(&ns->kref))
return -ENXIO;
return 0;
}
static void nvme_release(struct gendisk *disk, fmode_t mode)
{
struct nvme_ns *ns = disk->private_data;
module_put(ns->ctrl->ops->module);
nvme_put_ns(ns);
nvme_put_ns(disk->private_data);
}
static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
......@@ -1380,7 +1379,7 @@ EXPORT_SYMBOL_GPL(nvme_sec_submit);
static const struct block_device_operations nvme_fops = {
.owner = THIS_MODULE,
.ioctl = nvme_ioctl,
.compat_ioctl = nvme_compat_ioctl,
.compat_ioctl = nvme_ioctl,
.open = nvme_open,
.release = nvme_release,
.getgeo = nvme_getgeo,
......@@ -1930,33 +1929,12 @@ EXPORT_SYMBOL_GPL(nvme_init_identify);
static int nvme_dev_open(struct inode *inode, struct file *file)
{
struct nvme_ctrl *ctrl;
int instance = iminor(inode);
int ret = -ENODEV;
spin_lock(&dev_list_lock);
list_for_each_entry(ctrl, &nvme_ctrl_list, node) {
if (ctrl->instance != instance)
continue;
struct nvme_ctrl *ctrl =
container_of(inode->i_cdev, struct nvme_ctrl, cdev);
if (!ctrl->admin_q) {
ret = -EWOULDBLOCK;
break;
}
if (!kref_get_unless_zero(&ctrl->kref))
break;
if (ctrl->state != NVME_CTRL_LIVE)
return -EWOULDBLOCK;
file->private_data = ctrl;
ret = 0;
break;
}
spin_unlock(&dev_list_lock);
return ret;
}
static int nvme_dev_release(struct inode *inode, struct file *file)
{
nvme_put_ctrl(file->private_data);
return 0;
}
......@@ -2020,7 +1998,6 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
static const struct file_operations nvme_dev_fops = {
.owner = THIS_MODULE,
.open = nvme_dev_open,
.release = nvme_dev_release,
.unlocked_ioctl = nvme_dev_ioctl,
.compat_ioctl = nvme_dev_ioctl,
};
......@@ -2186,7 +2163,7 @@ static ssize_t nvme_sysfs_delete(struct device *dev,
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
if (device_remove_file_self(dev, attr))
ctrl->ops->delete_ctrl(ctrl);
nvme_delete_ctrl_sync(ctrl);
return count;
}
static DEVICE_ATTR(delete_controller, S_IWUSR, NULL, nvme_sysfs_delete);
......@@ -2298,7 +2275,8 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
mutex_lock(&ctrl->namespaces_mutex);
list_for_each_entry(ns, &ctrl->namespaces, list) {
if (ns->ns_id == nsid) {
kref_get(&ns->kref);
if (!kref_get_unless_zero(&ns->kref))
continue;
ret = ns;
break;
}
......@@ -2401,7 +2379,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
list_add_tail(&ns->list, &ctrl->namespaces);
mutex_unlock(&ctrl->namespaces_mutex);
kref_get(&ctrl->kref);
nvme_get_ctrl(ctrl);
kfree(id);
......@@ -2659,7 +2637,7 @@ static void nvme_fw_act_work(struct work_struct *work)
return;
nvme_start_queues(ctrl);
/* read FW slot informationi to clear the AER*/
/* read FW slot information to clear the AER */
nvme_get_fw_slot_info(ctrl);
}
......@@ -2706,35 +2684,6 @@ void nvme_queue_async_events(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_queue_async_events);
static DEFINE_IDA(nvme_instance_ida);
static int nvme_set_instance(struct nvme_ctrl *ctrl)
{
int instance, error;
do {
if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
return -ENODEV;
spin_lock(&dev_list_lock);
error = ida_get_new(&nvme_instance_ida, &instance);
spin_unlock(&dev_list_lock);
} while (error == -EAGAIN);
if (error)
return -ENODEV;
ctrl->instance = instance;
return 0;
}
static void nvme_release_instance(struct nvme_ctrl *ctrl)
{
spin_lock(&dev_list_lock);
ida_remove(&nvme_instance_ida, ctrl->instance);
spin_unlock(&dev_list_lock);
}
void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
{
nvme_stop_keep_alive(ctrl);
......@@ -2759,31 +2708,21 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl);
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
{
device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
spin_lock(&dev_list_lock);
list_del(&ctrl->node);
spin_unlock(&dev_list_lock);
cdev_device_del(&ctrl->cdev, ctrl->device);
}
EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
static void nvme_free_ctrl(struct kref *kref)
static void nvme_free_ctrl(struct device *dev)
{
struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref);
struct nvme_ctrl *ctrl =
container_of(dev, struct nvme_ctrl, ctrl_device);
put_device(ctrl->device);
nvme_release_instance(ctrl);
ida_simple_remove(&nvme_instance_ida, ctrl->instance);
ida_destroy(&ctrl->ns_ida);
ctrl->ops->free_ctrl(ctrl);
}
void nvme_put_ctrl(struct nvme_ctrl *ctrl)
{
kref_put(&ctrl->kref, nvme_free_ctrl);
}
EXPORT_SYMBOL_GPL(nvme_put_ctrl);
/*
* Initialize a NVMe controller structures. This needs to be called during
* earliest initialization so that we have the initialized structured around
......@@ -2798,32 +2737,38 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
spin_lock_init(&ctrl->lock);
INIT_LIST_HEAD(&ctrl->namespaces);
mutex_init(&ctrl->namespaces_mutex);
kref_init(&ctrl->kref);
ctrl->dev = dev;
ctrl->ops = ops;
ctrl->quirks = quirks;
INIT_WORK(&ctrl->scan_work, nvme_scan_work);
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
ret = nvme_set_instance(ctrl);
if (ret)
ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL);
if (ret < 0)
goto out;
ctrl->device = device_create_with_groups(nvme_class, ctrl->dev,
MKDEV(nvme_char_major, ctrl->instance),
ctrl, nvme_dev_attr_groups,
"nvme%d", ctrl->instance);
if (IS_ERR(ctrl->device)) {
ret = PTR_ERR(ctrl->device);
ctrl->instance = ret;
device_initialize(&ctrl->ctrl_device);
ctrl->device = &ctrl->ctrl_device;
ctrl->device->devt = MKDEV(MAJOR(nvme_chr_devt), ctrl->instance);
ctrl->device->class = nvme_class;
ctrl->device->parent = ctrl->dev;
ctrl->device->groups = nvme_dev_attr_groups;
ctrl->device->release = nvme_free_ctrl;
dev_set_drvdata(ctrl->device, ctrl);
ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance);
if (ret)
goto out_release_instance;
}
get_device(ctrl->device);
ida_init(&ctrl->ns_ida);
spin_lock(&dev_list_lock);
list_add_tail(&ctrl->node, &nvme_ctrl_list);
spin_unlock(&dev_list_lock);
cdev_init(&ctrl->cdev, &nvme_dev_fops);
ctrl->cdev.owner = ops->module;
ret = cdev_device_add(&ctrl->cdev, ctrl->device);
if (ret)
goto out_free_name;
ida_init(&ctrl->ns_ida);
/*
* Initialize latency tolerance controls. The sysfs files won't
......@@ -2834,8 +2779,10 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
min(default_ps_max_latency_us, (unsigned long)S32_MAX));
return 0;
out_free_name:
kfree_const(dev->kobj.name);
out_release_instance:
nvme_release_instance(ctrl);
ida_simple_remove(&nvme_instance_ida, ctrl->instance);
out:
return ret;
}
......@@ -2944,6 +2891,16 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_start_queues);
int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set)
{
if (!ctrl->ops->reinit_request)
return 0;
return blk_mq_tagset_iter(set, set->driver_data,
ctrl->ops->reinit_request);
}
EXPORT_SYMBOL_GPL(nvme_reinit_tagset);
int __init nvme_core_init(void)
{
int result;
......@@ -2953,12 +2910,9 @@ int __init nvme_core_init(void)
if (!nvme_wq)
return -ENOMEM;
result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
&nvme_dev_fops);
result = alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme");
if (result < 0)
goto destroy_wq;
else if (result > 0)
nvme_char_major = result;
nvme_class = class_create(THIS_MODULE, "nvme");
if (IS_ERR(nvme_class)) {
......@@ -2969,7 +2923,7 @@ int __init nvme_core_init(void)
return 0;
unregister_chrdev:
__unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
destroy_wq:
destroy_workqueue(nvme_wq);
return result;
......@@ -2978,7 +2932,7 @@ int __init nvme_core_init(void)
void nvme_core_exit(void)
{
class_destroy(nvme_class);
__unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
destroy_workqueue(nvme_wq);
}
......
......@@ -548,6 +548,7 @@ static const match_table_t opt_tokens = {
{ NVMF_OPT_HOSTNQN, "hostnqn=%s" },
{ NVMF_OPT_HOST_TRADDR, "host_traddr=%s" },
{ NVMF_OPT_HOST_ID, "hostid=%s" },
{ NVMF_OPT_DUP_CONNECT, "duplicate_connect" },
{ NVMF_OPT_ERR, NULL }
};
......@@ -566,6 +567,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->nr_io_queues = num_online_cpus();
opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
opts->kato = NVME_DEFAULT_KATO;
opts->duplicate_connect = false;
options = o = kstrdup(buf, GFP_KERNEL);
if (!options)
......@@ -742,6 +744,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
goto out;
}
break;
case NVMF_OPT_DUP_CONNECT:
opts->duplicate_connect = true;
break;
default:
pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
p);
......@@ -823,7 +828,7 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
#define NVMF_REQUIRED_OPTS (NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
#define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
NVMF_OPT_HOST_ID)
NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT)
static struct nvme_ctrl *
nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
......@@ -841,6 +846,9 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
if (ret)
goto out_free_opts;
request_module("nvme-%s", opts->transport);
/*
* Check the generic options first as we need a valid transport for
* the lookup below. Then clear the generic flags so that transport
......@@ -879,7 +887,7 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
"controller returned incorrect NQN: \"%s\".\n",
ctrl->subnqn);
up_read(&nvmf_transports_rwsem);
ctrl->ops->delete_ctrl(ctrl);
nvme_delete_ctrl_sync(ctrl);
return ERR_PTR(-EINVAL);
}
......
......@@ -57,6 +57,7 @@ enum {
NVMF_OPT_HOST_TRADDR = 1 << 10,
NVMF_OPT_CTRL_LOSS_TMO = 1 << 11,
NVMF_OPT_HOST_ID = 1 << 12,
NVMF_OPT_DUP_CONNECT = 1 << 13,
};
/**
......@@ -96,6 +97,7 @@ struct nvmf_ctrl_options {
unsigned int nr_io_queues;
unsigned int reconnect_delay;
bool discovery_nqn;
bool duplicate_connect;
unsigned int kato;
struct nvmf_host *host;
int max_reconnects;
......@@ -131,6 +133,18 @@ struct nvmf_transport_ops {
struct nvmf_ctrl_options *opts);
};
static inline bool
nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl,
struct nvmf_ctrl_options *opts)
{
if (strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) ||
strcmp(opts->host->nqn, ctrl->opts->host->nqn) ||
memcmp(&opts->host->id, &ctrl->opts->host->id, sizeof(uuid_t)))
return false;
return true;
}
int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
......
......@@ -45,6 +45,8 @@ enum nvme_fc_queue_flags {
#define NVMEFC_QUEUE_DELAY 3 /* ms units */
#define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */
struct nvme_fc_queue {
struct nvme_fc_ctrl *ctrl;
struct device *dev;
......@@ -136,6 +138,7 @@ struct nvme_fc_rport {
struct nvme_fc_lport *lport;
spinlock_t lock;
struct kref ref;
unsigned long dev_loss_end;
} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
enum nvme_fcctrl_flags {
......@@ -157,7 +160,6 @@ struct nvme_fc_ctrl {
struct blk_mq_tag_set admin_tag_set;
struct blk_mq_tag_set tag_set;
struct work_struct delete_work;
struct delayed_work connect_work;
struct kref ref;
......@@ -213,10 +215,16 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt);
/*
* These items are short-term. They will eventually be moved into
* a generic FC class. See comments in module init.
*/
static struct class *fc_class;
static struct device *fc_udev_device;
/* *********************** FC-NVME Port Management ************************ */
static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *);
static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *,
struct nvme_fc_queue *, unsigned int);
......@@ -452,6 +460,171 @@ nvme_fc_unregister_localport(struct nvme_fc_local_port *portptr)
}
EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport);
/*
* TRADDR strings, per FC-NVME are fixed format:
* "nn-0x<16hexdigits>:pn-0x<16hexdigits>" - 43 characters
* udev event will only differ by prefix of what field is
* being specified:
* "NVMEFC_HOST_TRADDR=" or "NVMEFC_TRADDR=" - 19 max characters
* 19 + 43 + null_fudge = 64 characters
*/
#define FCNVME_TRADDR_LENGTH 64
static void
nvme_fc_signal_discovery_scan(struct nvme_fc_lport *lport,
struct nvme_fc_rport *rport)
{
char hostaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_HOST_TRADDR=...*/
char tgtaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_TRADDR=...*/
char *envp[4] = { "FC_EVENT=nvmediscovery", hostaddr, tgtaddr, NULL };
if (!(rport->remoteport.port_role & FC_PORT_ROLE_NVME_DISCOVERY))
return;
snprintf(hostaddr, sizeof(hostaddr),
"NVMEFC_HOST_TRADDR=nn-0x%016llx:pn-0x%016llx",
lport->localport.node_name, lport->localport.port_name);
snprintf(tgtaddr, sizeof(tgtaddr),
"NVMEFC_TRADDR=nn-0x%016llx:pn-0x%016llx",
rport->remoteport.node_name, rport->remoteport.port_name);
kobject_uevent_env(&fc_udev_device->kobj, KOBJ_CHANGE, envp);
}
static void
nvme_fc_free_rport(struct kref *ref)
{
struct nvme_fc_rport *rport =
container_of(ref, struct nvme_fc_rport, ref);
struct nvme_fc_lport *lport =
localport_to_lport(rport->remoteport.localport);
unsigned long flags;
WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED);
WARN_ON(!list_empty(&rport->ctrl_list));
/* remove from lport list */
spin_lock_irqsave(&nvme_fc_lock, flags);
list_del(&rport->endp_list);
spin_unlock_irqrestore(&nvme_fc_lock, flags);
/* let the LLDD know we've finished tearing it down */
lport->ops->remoteport_delete(&rport->remoteport);
ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num);
kfree(rport);
nvme_fc_lport_put(lport);
}
static void
nvme_fc_rport_put(struct nvme_fc_rport *rport)
{
kref_put(&rport->ref, nvme_fc_free_rport);
}
static int
nvme_fc_rport_get(struct nvme_fc_rport *rport)
{
return kref_get_unless_zero(&rport->ref);
}
static void
nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
{
switch (ctrl->ctrl.state) {
case NVME_CTRL_NEW:
case NVME_CTRL_RECONNECTING:
/*
* As all reconnects were suppressed, schedule a
* connect.
*/
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: connectivity re-established. "
"Attempting reconnect\n", ctrl->cnum);
queue_delayed_work(nvme_wq, &ctrl->connect_work, 0);
break;
case NVME_CTRL_RESETTING:
/*
* Controller is already in the process of terminating the
* association. No need to do anything further. The reconnect
* step will naturally occur after the reset completes.
*/
break;
default:
/* no action to take - let it delete */
break;
}
}
static struct nvme_fc_rport *
nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport,
struct nvme_fc_port_info *pinfo)
{
struct nvme_fc_rport *rport;
struct nvme_fc_ctrl *ctrl;
unsigned long flags;
spin_lock_irqsave(&nvme_fc_lock, flags);
list_for_each_entry(rport, &lport->endp_list, endp_list) {
if (rport->remoteport.node_name != pinfo->node_name ||
rport->remoteport.port_name != pinfo->port_name)
continue;
if (!nvme_fc_rport_get(rport)) {
rport = ERR_PTR(-ENOLCK);
goto out_done;
}
spin_unlock_irqrestore(&nvme_fc_lock, flags);
spin_lock_irqsave(&rport->lock, flags);
/* has it been unregistered */
if (rport->remoteport.port_state != FC_OBJSTATE_DELETED) {
/* means lldd called us twice */
spin_unlock_irqrestore(&rport->lock, flags);
nvme_fc_rport_put(rport);
return ERR_PTR(-ESTALE);
}
rport->remoteport.port_state = FC_OBJSTATE_ONLINE;
rport->dev_loss_end = 0;
/*
* kick off a reconnect attempt on all associations to the
* remote port. A successful reconnects will resume i/o.
*/
list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
nvme_fc_resume_controller(ctrl);
spin_unlock_irqrestore(&rport->lock, flags);
return rport;
}
rport = NULL;
out_done:
spin_unlock_irqrestore(&nvme_fc_lock, flags);
return rport;
}
static inline void
__nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport,
struct nvme_fc_port_info *pinfo)
{
if (pinfo->dev_loss_tmo)
rport->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo;
else
rport->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO;
}
/**
* nvme_fc_register_remoteport - transport entry point called by an
* LLDD to register the existence of a NVME
......@@ -478,22 +651,45 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
unsigned long flags;
int ret, idx;
if (!nvme_fc_lport_get(lport)) {
ret = -ESHUTDOWN;
goto out_reghost_failed;
}
/*
* look to see if there is already a remoteport that is waiting
* for a reconnect (within dev_loss_tmo) with the same WWN's.
* If so, transition to it and reconnect.
*/
newrec = nvme_fc_attach_to_suspended_rport(lport, pinfo);
/* found an rport, but something about its state is bad */
if (IS_ERR(newrec)) {
ret = PTR_ERR(newrec);
goto out_lport_put;
/* found existing rport, which was resumed */
} else if (newrec) {
nvme_fc_lport_put(lport);
__nvme_fc_set_dev_loss_tmo(newrec, pinfo);
nvme_fc_signal_discovery_scan(lport, newrec);
*portptr = &newrec->remoteport;
return 0;
}
/* nothing found - allocate a new remoteport struct */
newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz),
GFP_KERNEL);
if (!newrec) {
ret = -ENOMEM;
goto out_reghost_failed;
}
if (!nvme_fc_lport_get(lport)) {
ret = -ESHUTDOWN;
goto out_kfree_rport;
goto out_lport_put;
}
idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL);
if (idx < 0) {
ret = -ENOSPC;
goto out_lport_put;
goto out_kfree_rport;
}
INIT_LIST_HEAD(&newrec->endp_list);
......@@ -511,63 +707,27 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
newrec->remoteport.port_id = pinfo->port_id;
newrec->remoteport.port_state = FC_OBJSTATE_ONLINE;
newrec->remoteport.port_num = idx;
__nvme_fc_set_dev_loss_tmo(newrec, pinfo);
spin_lock_irqsave(&nvme_fc_lock, flags);
list_add_tail(&newrec->endp_list, &lport->endp_list);
spin_unlock_irqrestore(&nvme_fc_lock, flags);
nvme_fc_signal_discovery_scan(lport, newrec);
*portptr = &newrec->remoteport;
return 0;
out_lport_put:
nvme_fc_lport_put(lport);
out_kfree_rport:
kfree(newrec);
out_lport_put:
nvme_fc_lport_put(lport);
out_reghost_failed:
*portptr = NULL;
return ret;
}
EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport);
static void
nvme_fc_free_rport(struct kref *ref)
{
struct nvme_fc_rport *rport =
container_of(ref, struct nvme_fc_rport, ref);
struct nvme_fc_lport *lport =
localport_to_lport(rport->remoteport.localport);
unsigned long flags;
WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED);
WARN_ON(!list_empty(&rport->ctrl_list));
/* remove from lport list */
spin_lock_irqsave(&nvme_fc_lock, flags);
list_del(&rport->endp_list);
spin_unlock_irqrestore(&nvme_fc_lock, flags);
/* let the LLDD know we've finished tearing it down */
lport->ops->remoteport_delete(&rport->remoteport);
ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num);
kfree(rport);
nvme_fc_lport_put(lport);
}
static void
nvme_fc_rport_put(struct nvme_fc_rport *rport)
{
kref_put(&rport->ref, nvme_fc_free_rport);
}
static int
nvme_fc_rport_get(struct nvme_fc_rport *rport)
{
return kref_get_unless_zero(&rport->ref);
}
static int
nvme_fc_abort_lsops(struct nvme_fc_rport *rport)
{
......@@ -592,6 +752,58 @@ nvme_fc_abort_lsops(struct nvme_fc_rport *rport)
return 0;
}
static void
nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
{
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: controller connectivity lost. Awaiting "
"Reconnect", ctrl->cnum);
switch (ctrl->ctrl.state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
/*
* Schedule a controller reset. The reset will terminate the
* association and schedule the reconnect timer. Reconnects
* will be attempted until either the ctlr_loss_tmo
* (max_retries * connect_delay) expires or the remoteport's
* dev_loss_tmo expires.
*/
if (nvme_reset_ctrl(&ctrl->ctrl)) {
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: Couldn't schedule reset. "
"Deleting controller.\n",
ctrl->cnum);
nvme_delete_ctrl(&ctrl->ctrl);
}
break;
case NVME_CTRL_RECONNECTING:
/*
* The association has already been terminated and the
* controller is attempting reconnects. No need to do anything
* futher. Reconnects will be attempted until either the
* ctlr_loss_tmo (max_retries * connect_delay) expires or the
* remoteport's dev_loss_tmo expires.
*/
break;
case NVME_CTRL_RESETTING:
/*
* Controller is already in the process of terminating the
* association. No need to do anything further. The reconnect
* step will kick in naturally after the association is
* terminated.
*/
break;
case NVME_CTRL_DELETING:
default:
/* no action to take - let it delete */
break;
}
}
/**
* nvme_fc_unregister_remoteport - transport entry point called by an
* LLDD to deregister/remove a previously
......@@ -621,19 +833,75 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr)
}
portptr->port_state = FC_OBJSTATE_DELETED;
/* tear down all associations to the remote port */
list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
__nvme_fc_del_ctrl(ctrl);
rport->dev_loss_end = jiffies + (portptr->dev_loss_tmo * HZ);
list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
/* if dev_loss_tmo==0, dev loss is immediate */
if (!portptr->dev_loss_tmo) {
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: controller connectivity lost. "
"Deleting controller.\n",
ctrl->cnum);
nvme_delete_ctrl(&ctrl->ctrl);
} else
nvme_fc_ctrl_connectivity_loss(ctrl);
}
spin_unlock_irqrestore(&rport->lock, flags);
nvme_fc_abort_lsops(rport);
/*
* release the reference, which will allow, if all controllers
* go away, which should only occur after dev_loss_tmo occurs,
* for the rport to be torn down.
*/
nvme_fc_rport_put(rport);
return 0;
}
EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport);
/**
* nvme_fc_rescan_remoteport - transport entry point called by an
* LLDD to request a nvme device rescan.
* @remoteport: pointer to the (registered) remote port that is to be
* rescanned.
*
* Returns: N/A
*/
void
nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport)
{
struct nvme_fc_rport *rport = remoteport_to_rport(remoteport);
nvme_fc_signal_discovery_scan(rport->lport, rport);
}
EXPORT_SYMBOL_GPL(nvme_fc_rescan_remoteport);
int
nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *portptr,
u32 dev_loss_tmo)
{
struct nvme_fc_rport *rport = remoteport_to_rport(portptr);
unsigned long flags;
spin_lock_irqsave(&rport->lock, flags);
if (portptr->port_state != FC_OBJSTATE_ONLINE) {
spin_unlock_irqrestore(&rport->lock, flags);
return -EINVAL;
}
/* a dev_loss_tmo of 0 (immediate) is allowed to be set */
rport->remoteport.dev_loss_tmo = dev_loss_tmo;
spin_unlock_irqrestore(&rport->lock, flags);
return 0;
}
EXPORT_SYMBOL_GPL(nvme_fc_set_remoteport_devloss);
/* *********************** FC-NVME DMA Handling **************************** */
......@@ -723,7 +991,6 @@ fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
dma_unmap_sg(dev, sg, nents, dir);
}
/* *********************** FC-NVME LS Handling **************************** */
static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *);
......@@ -1331,7 +1598,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
struct nvme_command *sqe = &op->cmd_iu.sqe;
__le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
union nvme_result result;
bool complete_rq, terminate_assoc = true;
bool terminate_assoc = true;
/*
* WARNING:
......@@ -1373,8 +1640,9 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,
sizeof(op->rsp_iu), DMA_FROM_DEVICE);
if (atomic_read(&op->state) == FCPOP_STATE_ABORTED)
status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1);
if (atomic_read(&op->state) == FCPOP_STATE_ABORTED ||
op->flags & FCOP_FLAGS_TERMIO)
status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
else if (freq->status)
status = cpu_to_le16(NVME_SC_INTERNAL << 1);
......@@ -1438,23 +1706,27 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
done:
if (op->flags & FCOP_FLAGS_AEN) {
nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
__nvme_fc_fcpop_chk_teardowns(ctrl, op);
atomic_set(&op->state, FCPOP_STATE_IDLE);
op->flags = FCOP_FLAGS_AEN; /* clear other flags */
nvme_fc_ctrl_put(ctrl);
goto check_error;
}
complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
if (!complete_rq) {
if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
if (blk_queue_dying(rq->q))
/*
* Force failures of commands if we're killing the controller
* or have an error on a command used to create an new association
*/
if (status &&
(blk_queue_dying(rq->q) ||
ctrl->ctrl.state == NVME_CTRL_NEW ||
ctrl->ctrl.state == NVME_CTRL_RECONNECTING))
status |= cpu_to_le16(NVME_SC_DNR << 1);
}
nvme_end_request(rq, status, result);
} else
if (__nvme_fc_fcpop_chk_teardowns(ctrl, op))
__nvme_fc_final_op_cleanup(rq);
else
nvme_end_request(rq, status, result);
check_error:
if (terminate_assoc)
......@@ -1825,13 +2097,6 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: error_recovery: Couldn't change state "
"to RECONNECTING\n", ctrl->cnum);
return;
}
nvme_reset_ctrl(&ctrl->ctrl);
}
......@@ -1842,13 +2107,14 @@ nvme_fc_timeout(struct request *rq, bool reserved)
struct nvme_fc_ctrl *ctrl = op->ctrl;
int ret;
if (reserved)
if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
atomic_read(&op->state) == FCPOP_STATE_ABORTED)
return BLK_EH_RESET_TIMER;
ret = __nvme_fc_abort_op(ctrl, op);
if (ret)
/* io wasn't active to abort consider it done */
return BLK_EH_HANDLED;
/* io wasn't active to abort */
return BLK_EH_NOT_HANDLED;
/*
* we can't individually ABTS an io without affecting the queue,
......@@ -1859,7 +2125,12 @@ nvme_fc_timeout(struct request *rq, bool reserved)
*/
nvme_fc_error_recovery(ctrl, "io timeout error");
return BLK_EH_HANDLED;
/*
* the io abort has been initiated. Have the reset timer
* restarted and the abort completion will complete the io
* shortly. Avoids a synchronous wait while the abort finishes.
*/
return BLK_EH_RESET_TIMER;
}
static int
......@@ -2337,7 +2608,7 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
nvme_fc_init_io_queues(ctrl);
ret = blk_mq_reinit_tagset(&ctrl->tag_set, nvme_fc_reinit_request);
ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
if (ret)
goto out_free_io_queues;
......@@ -2368,12 +2639,14 @@ static int
nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
{
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
u32 segs;
int ret;
bool changed;
++ctrl->ctrl.nr_reconnects;
if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
return -ENODEV;
/*
* Create the admin queue
*/
......@@ -2419,9 +2692,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
if (ret)
goto out_disconnect_admin_queue;
segs = min_t(u32, NVME_FC_MAX_SEGMENTS,
ctrl->lport->ops->max_sgl_segments);
ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9);
ctrl->ctrl.max_hw_sectors =
(ctrl->lport->ops->max_sgl_segments - 1) << (PAGE_SHIFT - 9);
ret = nvme_init_identify(&ctrl->ctrl);
if (ret)
......@@ -2465,10 +2737,10 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
}
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
ctrl->ctrl.nr_reconnects = 0;
if (changed)
nvme_start_ctrl(&ctrl->ctrl);
return 0; /* Success */
......@@ -2537,6 +2809,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
* use blk_mq_tagset_busy_itr() and the transport routine to
* terminate the exchanges.
*/
if (ctrl->ctrl.state != NVME_CTRL_NEW)
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl);
......@@ -2571,99 +2844,59 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
}
static void
nvme_fc_delete_ctrl_work(struct work_struct *work)
nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_fc_ctrl *ctrl =
container_of(work, struct nvme_fc_ctrl, delete_work);
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
cancel_work_sync(&ctrl->ctrl.reset_work);
cancel_delayed_work_sync(&ctrl->connect_work);
nvme_stop_ctrl(&ctrl->ctrl);
nvme_remove_namespaces(&ctrl->ctrl);
/*
* kill the association on the link side. this will block
* waiting for io to terminate
*/
nvme_fc_delete_association(ctrl);
/*
* tear down the controller
* After the last reference on the nvme ctrl is removed,
* the transport nvme_fc_nvme_ctrl_freed() callback will be
* invoked. From there, the transport will tear down it's
* logical queues and association.
*/
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
}
static bool
__nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl)
{
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
return true;
if (!queue_work(nvme_wq, &ctrl->delete_work))
return true;
return false;
}
static int
__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl)
{
return __nvme_fc_schedule_delete_work(ctrl) ? -EBUSY : 0;
}
/*
* Request from nvme core layer to delete the controller
*/
static int
nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
int ret;
if (!kref_get_unless_zero(&ctrl->ctrl.kref))
return -EBUSY;
ret = __nvme_fc_del_ctrl(ctrl);
if (!ret)
flush_workqueue(nvme_wq);
nvme_put_ctrl(&ctrl->ctrl);
return ret;
}
static void
nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
{
/* If we are resetting/deleting then do nothing */
if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) {
WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
ctrl->ctrl.state == NVME_CTRL_LIVE);
struct nvme_fc_rport *rport = ctrl->rport;
struct nvme_fc_remote_port *portptr = &rport->remoteport;
unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
bool recon = true;
if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING)
return;
}
if (portptr->port_state == FC_OBJSTATE_ONLINE)
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
ctrl->cnum, status);
else if (time_after_eq(jiffies, rport->dev_loss_end))
recon = false;
if (nvmf_should_reconnect(&ctrl->ctrl)) {
if (recon && nvmf_should_reconnect(&ctrl->ctrl)) {
if (portptr->port_state == FC_OBJSTATE_ONLINE)
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
queue_delayed_work(nvme_wq, &ctrl->connect_work,
ctrl->ctrl.opts->reconnect_delay * HZ);
"NVME-FC{%d}: Reconnect attempt in %ld "
"seconds\n",
ctrl->cnum, recon_delay / HZ);
else if (time_after(jiffies + recon_delay, rport->dev_loss_end))
recon_delay = rport->dev_loss_end - jiffies;
queue_delayed_work(nvme_wq, &ctrl->connect_work, recon_delay);
} else {
if (portptr->port_state == FC_OBJSTATE_ONLINE)
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: Max reconnect attempts (%d) "
"reached. Removing controller\n",
ctrl->cnum, ctrl->ctrl.nr_reconnects);
WARN_ON(__nvme_fc_schedule_delete_work(ctrl));
else
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: dev_loss_tmo (%d) expired "
"while waiting for remoteport connectivity. "
"Removing controller\n", ctrl->cnum,
portptr->dev_loss_tmo);
WARN_ON(nvme_delete_ctrl(&ctrl->ctrl));
}
}
......@@ -2675,15 +2908,28 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
int ret;
nvme_stop_ctrl(&ctrl->ctrl);
/* will block will waiting for io to terminate */
nvme_fc_delete_association(ctrl);
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: error_recovery: Couldn't change state "
"to RECONNECTING\n", ctrl->cnum);
return;
}
if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE)
ret = nvme_fc_create_association(ctrl);
else
ret = -ENOTCONN;
if (ret)
nvme_fc_reconnect_or_delete(ctrl, ret);
else
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
"NVME-FC{%d}: controller reset complete\n",
ctrl->cnum);
}
static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
......@@ -2695,8 +2941,9 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
.reg_write32 = nvmf_reg_write32,
.free_ctrl = nvme_fc_nvme_ctrl_freed,
.submit_async_event = nvme_fc_submit_async_event,
.delete_ctrl = nvme_fc_del_nvme_ctrl,
.delete_ctrl = nvme_fc_delete_ctrl,
.get_address = nvmf_get_address,
.reinit_request = nvme_fc_reinit_request,
};
static void
......@@ -2728,6 +2975,33 @@ static const struct blk_mq_ops nvme_fc_admin_mq_ops = {
};
/*
* Fails a controller request if it matches an existing controller
* (association) with the same tuple:
* <Host NQN, Host ID, local FC port, remote FC port, SUBSYS NQN>
*
* The ports don't need to be compared as they are intrinsically
* already matched by the port pointers supplied.
*/
static bool
nvme_fc_existing_controller(struct nvme_fc_rport *rport,
struct nvmf_ctrl_options *opts)
{
struct nvme_fc_ctrl *ctrl;
unsigned long flags;
bool found = false;
spin_lock_irqsave(&rport->lock, flags);
list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
found = nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts);
if (found)
break;
}
spin_unlock_irqrestore(&rport->lock, flags);
return found;
}
static struct nvme_ctrl *
nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
struct nvme_fc_lport *lport, struct nvme_fc_rport *rport)
......@@ -2742,6 +3016,12 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
goto out_fail;
}
if (!opts->duplicate_connect &&
nvme_fc_existing_controller(rport, opts)) {
ret = -EALREADY;
goto out_fail;
}
ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
if (!ctrl) {
ret = -ENOMEM;
......@@ -2764,7 +3044,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
get_device(ctrl->dev);
kref_init(&ctrl->ref);
INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
spin_lock_init(&ctrl->lock);
......@@ -2796,6 +3075,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ctrl->admin_tag_set.driver_data = ctrl;
ctrl->admin_tag_set.nr_hw_queues = 1;
ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED;
ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
if (ret)
......@@ -2849,7 +3129,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
return ERR_PTR(ret);
}
kref_get(&ctrl->ctrl.kref);
nvme_get_ctrl(&ctrl->ctrl);
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
......@@ -2997,7 +3277,50 @@ static struct nvmf_transport_ops nvme_fc_transport = {
static int __init nvme_fc_init_module(void)
{
return nvmf_register_transport(&nvme_fc_transport);
int ret;
/*
* NOTE:
* It is expected that in the future the kernel will combine
* the FC-isms that are currently under scsi and now being
* added to by NVME into a new standalone FC class. The SCSI
* and NVME protocols and their devices would be under this
* new FC class.
*
* As we need something to post FC-specific udev events to,
* specifically for nvme probe events, start by creating the
* new device class. When the new standalone FC class is
* put in place, this code will move to a more generic
* location for the class.
*/
fc_class = class_create(THIS_MODULE, "fc");
if (IS_ERR(fc_class)) {
pr_err("couldn't register class fc\n");
return PTR_ERR(fc_class);
}
/*
* Create a device for the FC-centric udev events
*/
fc_udev_device = device_create(fc_class, NULL, MKDEV(0, 0), NULL,
"fc_udev_device");
if (IS_ERR(fc_udev_device)) {
pr_err("couldn't create fc_udev device!\n");
ret = PTR_ERR(fc_udev_device);
goto out_destroy_class;
}
ret = nvmf_register_transport(&nvme_fc_transport);
if (ret)
goto out_destroy_device;
return 0;
out_destroy_device:
device_destroy(fc_class, MKDEV(0, 0));
out_destroy_class:
class_destroy(fc_class);
return ret;
}
static void __exit nvme_fc_exit_module(void)
......@@ -3010,6 +3333,9 @@ static void __exit nvme_fc_exit_module(void)
ida_destroy(&nvme_fc_local_port_cnt);
ida_destroy(&nvme_fc_ctrl_cnt);
device_destroy(fc_class, MKDEV(0, 0));
class_destroy(fc_class);
}
module_init(nvme_fc_init_module);
......
......@@ -15,16 +15,17 @@
#define _NVME_H
#include <linux/nvme.h>
#include <linux/cdev.h>
#include <linux/pci.h>
#include <linux/kref.h>
#include <linux/blk-mq.h>
#include <linux/lightnvm.h>
#include <linux/sed-opal.h>
extern unsigned char nvme_io_timeout;
extern unsigned int nvme_io_timeout;
#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
extern unsigned char admin_timeout;
extern unsigned int admin_timeout;
#define ADMIN_TIMEOUT (admin_timeout * HZ)
#define NVME_DEFAULT_KATO 5
......@@ -127,16 +128,17 @@ struct nvme_ctrl {
struct request_queue *admin_q;
struct request_queue *connect_q;
struct device *dev;
struct kref kref;
int instance;
struct blk_mq_tag_set *tagset;
struct blk_mq_tag_set *admin_tagset;
struct list_head namespaces;
struct mutex namespaces_mutex;
struct device ctrl_device;
struct device *device; /* char device */
struct list_head node;
struct cdev cdev;
struct ida ns_ida;
struct work_struct reset_work;
struct work_struct delete_work;
struct opal_dev *opal_dev;
......@@ -235,8 +237,9 @@ struct nvme_ctrl_ops {
int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
void (*free_ctrl)(struct nvme_ctrl *ctrl);
void (*submit_async_event)(struct nvme_ctrl *ctrl, int aer_idx);
int (*delete_ctrl)(struct nvme_ctrl *ctrl);
void (*delete_ctrl)(struct nvme_ctrl *ctrl);
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
int (*reinit_request)(void *data, struct request *rq);
};
static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl)
......@@ -278,6 +281,16 @@ static inline void nvme_end_request(struct request *req, __le16 status,
blk_mq_complete_request(req);
}
static inline void nvme_get_ctrl(struct nvme_ctrl *ctrl)
{
get_device(ctrl->device);
}
static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl)
{
put_device(ctrl->device);
}
void nvme_complete_rq(struct request *req);
void nvme_cancel_request(struct request *req, void *data, bool reserved);
bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
......@@ -311,6 +324,7 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl);
void nvme_wait_freeze(struct nvme_ctrl *ctrl);
void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
void nvme_start_freeze(struct nvme_ctrl *ctrl);
int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set);
#define NVME_QID_ANY -1
struct request *nvme_alloc_request(struct request_queue *q,
......@@ -326,6 +340,8 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
void nvme_start_keep_alive(struct nvme_ctrl *ctrl);
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);
#ifdef CONFIG_NVM
int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node);
......
......@@ -13,7 +13,6 @@
*/
#include <linux/aer.h>
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/blk-mq-pci.h>
......@@ -26,12 +25,9 @@
#include <linux/mutex.h>
#include <linux/once.h>
#include <linux/pci.h>
#include <linux/poison.h>
#include <linux/t10-pi.h>
#include <linux/timer.h>
#include <linux/types.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <asm/unaligned.h>
#include <linux/sed-opal.h>
#include "nvme.h"
......@@ -45,6 +41,8 @@
*/
#define NVME_AQ_BLKMQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AERS)
#define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc))
static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0);
......@@ -57,6 +55,12 @@ module_param(max_host_mem_size_mb, uint, 0444);
MODULE_PARM_DESC(max_host_mem_size_mb,
"Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
static unsigned int sgl_threshold = SZ_32K;
module_param(sgl_threshold, uint, 0644);
MODULE_PARM_DESC(sgl_threshold,
"Use SGLs when average request segment size is larger or equal to "
"this size. Use 0 to disable SGLs.");
static int io_queue_depth_set(const char *val, const struct kernel_param *kp);
static const struct kernel_param_ops io_queue_depth_ops = {
.set = io_queue_depth_set,
......@@ -178,6 +182,7 @@ struct nvme_queue {
struct nvme_iod {
struct nvme_request req;
struct nvme_queue *nvmeq;
bool use_sgl;
int aborted;
int npages; /* In the PRP list. 0 means small pool in use */
int nents; /* Used in scatterlist */
......@@ -331,17 +336,35 @@ static int nvme_npages(unsigned size, struct nvme_dev *dev)
return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8);
}
static unsigned int nvme_iod_alloc_size(struct nvme_dev *dev,
unsigned int size, unsigned int nseg)
/*
* Calculates the number of pages needed for the SGL segments. For example a 4k
* page can accommodate 256 SGL descriptors.
*/
static int nvme_pci_npages_sgl(unsigned int num_seg)
{
return sizeof(__le64 *) * nvme_npages(size, dev) +
sizeof(struct scatterlist) * nseg;
return DIV_ROUND_UP(num_seg * sizeof(struct nvme_sgl_desc), PAGE_SIZE);
}
static unsigned int nvme_cmd_size(struct nvme_dev *dev)
static unsigned int nvme_pci_iod_alloc_size(struct nvme_dev *dev,
unsigned int size, unsigned int nseg, bool use_sgl)
{
return sizeof(struct nvme_iod) +
nvme_iod_alloc_size(dev, NVME_INT_BYTES(dev), NVME_INT_PAGES);
size_t alloc_size;
if (use_sgl)
alloc_size = sizeof(__le64 *) * nvme_pci_npages_sgl(nseg);
else
alloc_size = sizeof(__le64 *) * nvme_npages(size, dev);
return alloc_size + sizeof(struct scatterlist) * nseg;
}
static unsigned int nvme_pci_cmd_size(struct nvme_dev *dev, bool use_sgl)
{
unsigned int alloc_size = nvme_pci_iod_alloc_size(dev,
NVME_INT_BYTES(dev), NVME_INT_PAGES,
use_sgl);
return sizeof(struct nvme_iod) + alloc_size;
}
static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
......@@ -425,10 +448,10 @@ static void __nvme_submit_cmd(struct nvme_queue *nvmeq,
nvmeq->sq_tail = tail;
}
static __le64 **iod_list(struct request *req)
static void **nvme_pci_iod_list(struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
return (void **)(iod->sg + blk_rq_nr_phys_segments(req));
}
static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
......@@ -438,7 +461,10 @@ static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
unsigned int size = blk_rq_payload_bytes(rq);
if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg,
iod->use_sgl);
iod->sg = kmalloc(alloc_size, GFP_ATOMIC);
if (!iod->sg)
return BLK_STS_RESOURCE;
} else {
......@@ -456,18 +482,31 @@ static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
const int last_prp = dev->ctrl.page_size / 8 - 1;
const int last_prp = dev->ctrl.page_size / sizeof(__le64) - 1;
dma_addr_t dma_addr = iod->first_dma, next_dma_addr;
int i;
__le64 **list = iod_list(req);
dma_addr_t prp_dma = iod->first_dma;
if (iod->npages == 0)
dma_pool_free(dev->prp_small_pool, list[0], prp_dma);
dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0],
dma_addr);
for (i = 0; i < iod->npages; i++) {
__le64 *prp_list = list[i];
dma_addr_t next_prp_dma = le64_to_cpu(prp_list[last_prp]);
dma_pool_free(dev->prp_page_pool, prp_list, prp_dma);
prp_dma = next_prp_dma;
void *addr = nvme_pci_iod_list(req)[i];
if (iod->use_sgl) {
struct nvme_sgl_desc *sg_list = addr;
next_dma_addr =
le64_to_cpu((sg_list[SGES_PER_PAGE - 1]).addr);
} else {
__le64 *prp_list = addr;
next_dma_addr = le64_to_cpu(prp_list[last_prp]);
}
dma_pool_free(dev->prp_page_pool, addr, dma_addr);
dma_addr = next_dma_addr;
}
if (iod->sg != iod->inline_sg)
......@@ -555,7 +594,8 @@ static void nvme_print_sgl(struct scatterlist *sgl, int nents)
}
}
static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
struct request *req, struct nvme_rw_command *cmnd)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct dma_pool *pool;
......@@ -566,14 +606,16 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
u32 page_size = dev->ctrl.page_size;
int offset = dma_addr & (page_size - 1);
__le64 *prp_list;
__le64 **list = iod_list(req);
void **list = nvme_pci_iod_list(req);
dma_addr_t prp_dma;
int nprps, i;
iod->use_sgl = false;
length -= (page_size - offset);
if (length <= 0) {
iod->first_dma = 0;
return BLK_STS_OK;
goto done;
}
dma_len -= (page_size - offset);
......@@ -587,7 +629,7 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
if (length <= page_size) {
iod->first_dma = dma_addr;
return BLK_STS_OK;
goto done;
}
nprps = DIV_ROUND_UP(length, page_size);
......@@ -634,6 +676,10 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
dma_len = sg_dma_len(sg);
}
done:
cmnd->dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma);
return BLK_STS_OK;
bad_sgl:
......@@ -643,6 +689,110 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
return BLK_STS_IOERR;
}
static void nvme_pci_sgl_set_data(struct nvme_sgl_desc *sge,
struct scatterlist *sg)
{
sge->addr = cpu_to_le64(sg_dma_address(sg));
sge->length = cpu_to_le32(sg_dma_len(sg));
sge->type = NVME_SGL_FMT_DATA_DESC << 4;
}
static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge,
dma_addr_t dma_addr, int entries)
{
sge->addr = cpu_to_le64(dma_addr);
if (entries < SGES_PER_PAGE) {
sge->length = cpu_to_le32(entries * sizeof(*sge));
sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4;
} else {
sge->length = cpu_to_le32(PAGE_SIZE);
sge->type = NVME_SGL_FMT_SEG_DESC << 4;
}
}
static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
struct request *req, struct nvme_rw_command *cmd)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
int length = blk_rq_payload_bytes(req);
struct dma_pool *pool;
struct nvme_sgl_desc *sg_list;
struct scatterlist *sg = iod->sg;
int entries = iod->nents, i = 0;
dma_addr_t sgl_dma;
iod->use_sgl = true;
/* setting the transfer type as SGL */
cmd->flags = NVME_CMD_SGL_METABUF;
if (length == sg_dma_len(sg)) {
nvme_pci_sgl_set_data(&cmd->dptr.sgl, sg);
return BLK_STS_OK;
}
if (entries <= (256 / sizeof(struct nvme_sgl_desc))) {
pool = dev->prp_small_pool;
iod->npages = 0;
} else {
pool = dev->prp_page_pool;
iod->npages = 1;
}
sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma);
if (!sg_list) {
iod->npages = -1;
return BLK_STS_RESOURCE;
}
nvme_pci_iod_list(req)[0] = sg_list;
iod->first_dma = sgl_dma;
nvme_pci_sgl_set_seg(&cmd->dptr.sgl, sgl_dma, entries);
do {
if (i == SGES_PER_PAGE) {
struct nvme_sgl_desc *old_sg_desc = sg_list;
struct nvme_sgl_desc *link = &old_sg_desc[i - 1];
sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma);
if (!sg_list)
return BLK_STS_RESOURCE;
i = 0;
nvme_pci_iod_list(req)[iod->npages++] = sg_list;
sg_list[i++] = *link;
nvme_pci_sgl_set_seg(link, sgl_dma, entries);
}
nvme_pci_sgl_set_data(&sg_list[i++], sg);
length -= sg_dma_len(sg);
sg = sg_next(sg);
entries--;
} while (length > 0);
WARN_ON(entries > 0);
return BLK_STS_OK;
}
static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
unsigned int avg_seg_size;
avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req),
blk_rq_nr_phys_segments(req));
if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1))))
return false;
if (!iod->nvmeq->qid)
return false;
if (!sgl_threshold || avg_seg_size < sgl_threshold)
return false;
return true;
}
static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
struct nvme_command *cmnd)
{
......@@ -662,7 +812,11 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
DMA_ATTR_NO_WARN))
goto out;
ret = nvme_setup_prps(dev, req);
if (nvme_pci_use_sgls(dev, req))
ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw);
else
ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
if (ret != BLK_STS_OK)
goto out_unmap;
......@@ -682,8 +836,6 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
goto out_unmap;
}
cmnd->rw.dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
cmnd->rw.dptr.prp2 = cpu_to_le64(iod->first_dma);
if (blk_integrity_rq(req))
cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
return BLK_STS_OK;
......@@ -930,7 +1082,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
/*
* Note: we (ab)use the fact the the prp fields survive if no data
* Note: we (ab)use the fact that the prp fields survive if no data
* is attached to the request.
*/
memset(&c, 0, sizeof(c));
......@@ -951,7 +1103,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
int flags = NVME_QUEUE_PHYS_CONTIG;
/*
* Note: we (ab)use the fact the the prp fields survive if no data
* Note: we (ab)use the fact that the prp fields survive if no data
* is attached to the request.
*/
memset(&c, 0, sizeof(c));
......@@ -1379,7 +1531,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
dev->admin_tagset.queue_depth = NVME_AQ_BLKMQ_DEPTH - 1;
dev->admin_tagset.timeout = ADMIN_TIMEOUT;
dev->admin_tagset.numa_node = dev_to_node(dev->dev);
dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
dev->admin_tagset.cmd_size = nvme_pci_cmd_size(dev, false);
dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
dev->admin_tagset.driver_data = dev;
......@@ -1906,7 +2058,11 @@ static int nvme_dev_add(struct nvme_dev *dev)
dev->tagset.numa_node = dev_to_node(dev->dev);
dev->tagset.queue_depth =
min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
dev->tagset.cmd_size = nvme_cmd_size(dev);
dev->tagset.cmd_size = nvme_pci_cmd_size(dev, false);
if ((dev->ctrl.sgls & ((1 << 0) | (1 << 1))) && sgl_threshold) {
dev->tagset.cmd_size = max(dev->tagset.cmd_size,
nvme_pci_cmd_size(dev, true));
}
dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
dev->tagset.driver_data = dev;
......@@ -2132,7 +2288,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
{
dev_warn(dev->ctrl.device, "Removing after probe failure status: %d\n", status);
kref_get(&dev->ctrl.kref);
nvme_get_ctrl(&dev->ctrl);
nvme_dev_disable(dev, false);
if (!schedule_work(&dev->remove_work))
nvme_put_ctrl(&dev->ctrl);
......
......@@ -79,8 +79,8 @@ struct nvme_rdma_request {
};
enum nvme_rdma_queue_flags {
NVME_RDMA_Q_LIVE = 0,
NVME_RDMA_Q_DELETING = 1,
NVME_RDMA_Q_ALLOCATED = 0,
NVME_RDMA_Q_LIVE = 1,
};
struct nvme_rdma_queue {
......@@ -105,7 +105,6 @@ struct nvme_rdma_ctrl {
/* other member variables */
struct blk_mq_tag_set tag_set;
struct work_struct delete_work;
struct work_struct err_work;
struct nvme_rdma_qe async_event_sqe;
......@@ -274,6 +273,9 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq)
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
int ret = 0;
if (WARN_ON_ONCE(!req->mr))
return 0;
ib_dereg_mr(req->mr);
req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
......@@ -434,11 +436,9 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
{
struct nvme_rdma_device *dev;
struct ib_device *ibdev;
struct nvme_rdma_device *dev = queue->device;
struct ib_device *ibdev = dev->dev;
dev = queue->device;
ibdev = dev->dev;
rdma_destroy_qp(queue->cm_id);
ib_free_cq(queue->ib_cq);
......@@ -544,11 +544,11 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
ret = nvme_rdma_wait_for_cm(queue);
if (ret) {
dev_info(ctrl->ctrl.device,
"rdma_resolve_addr wait failed (%d).\n", ret);
"rdma connection establishment failed (%d)\n", ret);
goto out_destroy_cm_id;
}
clear_bit(NVME_RDMA_Q_DELETING, &queue->flags);
set_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags);
return 0;
......@@ -568,7 +568,7 @@ static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
{
if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags))
if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
return;
nvme_rdma_destroy_queue_ib(queue);
......@@ -670,11 +670,10 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
return ret;
}
static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl, bool admin)
static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl,
struct blk_mq_tag_set *set)
{
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
struct blk_mq_tag_set *set = admin ?
&ctrl->admin_tag_set : &ctrl->tag_set;
blk_mq_free_tag_set(set);
nvme_rdma_dev_put(ctrl->device);
......@@ -699,6 +698,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
set->driver_data = ctrl;
set->nr_hw_queues = 1;
set->timeout = ADMIN_TIMEOUT;
set->flags = BLK_MQ_F_NO_SCHED;
} else {
set = &ctrl->tag_set;
memset(set, 0, sizeof(*set));
......@@ -744,7 +744,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
nvme_rdma_stop_queue(&ctrl->queues[0]);
if (remove) {
blk_cleanup_queue(ctrl->ctrl.admin_q);
nvme_rdma_free_tagset(&ctrl->ctrl, true);
nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
}
nvme_rdma_free_queue(&ctrl->queues[0]);
}
......@@ -774,8 +774,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
goto out_free_tagset;
}
} else {
error = blk_mq_reinit_tagset(&ctrl->admin_tag_set,
nvme_rdma_reinit_request);
error = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
if (error)
goto out_free_queue;
}
......@@ -819,7 +818,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
blk_cleanup_queue(ctrl->ctrl.admin_q);
out_free_tagset:
if (new)
nvme_rdma_free_tagset(&ctrl->ctrl, true);
nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
out_free_queue:
nvme_rdma_free_queue(&ctrl->queues[0]);
return error;
......@@ -831,7 +830,7 @@ static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
nvme_rdma_stop_io_queues(ctrl);
if (remove) {
blk_cleanup_queue(ctrl->ctrl.connect_q);
nvme_rdma_free_tagset(&ctrl->ctrl, false);
nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
}
nvme_rdma_free_io_queues(ctrl);
}
......@@ -855,8 +854,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
goto out_free_tag_set;
}
} else {
ret = blk_mq_reinit_tagset(&ctrl->tag_set,
nvme_rdma_reinit_request);
ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
if (ret)
goto out_free_io_queues;
......@@ -875,7 +873,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
blk_cleanup_queue(ctrl->ctrl.connect_q);
out_free_tag_set:
if (new)
nvme_rdma_free_tagset(&ctrl->ctrl, false);
nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
out_free_io_queues:
nvme_rdma_free_io_queues(ctrl);
return ret;
......@@ -914,7 +912,7 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
ctrl->ctrl.opts->reconnect_delay * HZ);
} else {
dev_info(ctrl->ctrl.device, "Removing controller...\n");
queue_work(nvme_wq, &ctrl->delete_work);
nvme_delete_ctrl(&ctrl->ctrl);
}
}
......@@ -927,10 +925,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
++ctrl->ctrl.nr_reconnects;
if (ctrl->ctrl.queue_count > 1)
nvme_rdma_destroy_io_queues(ctrl, false);
nvme_rdma_destroy_admin_queue(ctrl, false);
ret = nvme_rdma_configure_admin_queue(ctrl, false);
if (ret)
goto requeue;
......@@ -938,7 +932,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
if (ctrl->ctrl.queue_count > 1) {
ret = nvme_rdma_configure_io_queues(ctrl, false);
if (ret)
goto requeue;
goto destroy_admin;
}
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
......@@ -948,14 +942,17 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
return;
}
ctrl->ctrl.nr_reconnects = 0;
nvme_start_ctrl(&ctrl->ctrl);
dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
ctrl->ctrl.nr_reconnects);
ctrl->ctrl.nr_reconnects = 0;
return;
destroy_admin:
nvme_rdma_destroy_admin_queue(ctrl, false);
requeue:
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
ctrl->ctrl.nr_reconnects);
......@@ -971,17 +968,15 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl);
nvme_rdma_stop_io_queues(ctrl);
}
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
nvme_rdma_stop_queue(&ctrl->queues[0]);
/* We must take care of fastfail/requeue all our inflight requests */
if (ctrl->ctrl.queue_count > 1)
blk_mq_tagset_busy_iter(&ctrl->tag_set,
nvme_cancel_request, &ctrl->ctrl);
nvme_rdma_destroy_io_queues(ctrl, false);
}
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_cancel_request, &ctrl->ctrl);
nvme_rdma_destroy_admin_queue(ctrl, false);
/*
* queues are not a live anymore, so restart the queues to fail fast
......@@ -1057,7 +1052,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
if (!blk_rq_bytes(rq))
return;
if (req->mr->need_inval) {
if (req->mr->need_inval && test_bit(NVME_RDMA_Q_LIVE, &req->queue->flags)) {
res = nvme_rdma_inv_rkey(queue, req);
if (unlikely(res < 0)) {
dev_err(ctrl->ctrl.device,
......@@ -1582,6 +1577,10 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
dev_warn(req->queue->ctrl->ctrl.device,
"I/O %d QID %d timeout, reset controller\n",
rq->tag, nvme_rdma_queue_idx(req->queue));
/* queue error recovery */
nvme_rdma_error_recovery(req->queue->ctrl);
......@@ -1756,50 +1755,9 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
nvme_rdma_destroy_admin_queue(ctrl, shutdown);
}
static void nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl)
{
nvme_remove_namespaces(&ctrl->ctrl);
nvme_rdma_shutdown_ctrl(ctrl, true);
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
}
static void nvme_rdma_del_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, delete_work);
nvme_stop_ctrl(&ctrl->ctrl);
nvme_rdma_remove_ctrl(ctrl);
}
static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
static void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl)
{
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
return -EBUSY;
if (!queue_work(nvme_wq, &ctrl->delete_work))
return -EBUSY;
return 0;
}
static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
int ret = 0;
/*
* Keep a reference until all work is flushed since
* __nvme_rdma_del_ctrl can free the ctrl mem
*/
if (!kref_get_unless_zero(&ctrl->ctrl.kref))
return -EBUSY;
ret = __nvme_rdma_del_ctrl(ctrl);
if (!ret)
flush_work(&ctrl->delete_work);
nvme_put_ctrl(&ctrl->ctrl);
return ret;
nvme_rdma_shutdown_ctrl(to_rdma_ctrl(ctrl), true);
}
static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
......@@ -1823,7 +1781,11 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
}
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
if (!changed) {
/* state change failure is ok if we're in DELETING state */
WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
return;
}
nvme_start_ctrl(&ctrl->ctrl);
......@@ -1831,7 +1793,10 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
out_fail:
dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
nvme_rdma_remove_ctrl(ctrl);
nvme_remove_namespaces(&ctrl->ctrl);
nvme_rdma_shutdown_ctrl(ctrl, true);
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
}
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
......@@ -1843,10 +1808,88 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.reg_write32 = nvmf_reg_write32,
.free_ctrl = nvme_rdma_free_ctrl,
.submit_async_event = nvme_rdma_submit_async_event,
.delete_ctrl = nvme_rdma_del_ctrl,
.delete_ctrl = nvme_rdma_delete_ctrl,
.get_address = nvmf_get_address,
.reinit_request = nvme_rdma_reinit_request,
};
static inline bool
__nvme_rdma_options_match(struct nvme_rdma_ctrl *ctrl,
struct nvmf_ctrl_options *opts)
{
char *stdport = __stringify(NVME_RDMA_IP_PORT);
if (!nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts) ||
strcmp(opts->traddr, ctrl->ctrl.opts->traddr))
return false;
if (opts->mask & NVMF_OPT_TRSVCID &&
ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) {
if (strcmp(opts->trsvcid, ctrl->ctrl.opts->trsvcid))
return false;
} else if (opts->mask & NVMF_OPT_TRSVCID) {
if (strcmp(opts->trsvcid, stdport))
return false;
} else if (ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) {
if (strcmp(stdport, ctrl->ctrl.opts->trsvcid))
return false;
}
/* else, it's a match as both have stdport. Fall to next checks */
/*
* checking the local address is rough. In most cases, one
* is not specified and the host port is selected by the stack.
*
* Assume no match if:
* local address is specified and address is not the same
* local address is not specified but remote is, or vice versa
* (admin using specific host_traddr when it matters).
*/
if (opts->mask & NVMF_OPT_HOST_TRADDR &&
ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) {
if (strcmp(opts->host_traddr, ctrl->ctrl.opts->host_traddr))
return false;
} else if (opts->mask & NVMF_OPT_HOST_TRADDR ||
ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
return false;
/*
* if neither controller had an host port specified, assume it's
* a match as everything else matched.
*/
return true;
}
/*
* Fails a connection request if it matches an existing controller
* (association) with the same tuple:
* <Host NQN, Host ID, local address, remote address, remote port, SUBSYS NQN>
*
* if local address is not specified in the request, it will match an
* existing controller with all the other parameters the same and no
* local port address specified as well.
*
* The ports don't need to be compared as they are intrinsically
* already matched by the port pointers supplied.
*/
static bool
nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts)
{
struct nvme_rdma_ctrl *ctrl;
bool found = false;
mutex_lock(&nvme_rdma_ctrl_mutex);
list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
found = __nvme_rdma_options_match(ctrl, opts);
if (found)
break;
}
mutex_unlock(&nvme_rdma_ctrl_mutex);
return found;
}
static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
struct nvmf_ctrl_options *opts)
{
......@@ -1883,6 +1926,11 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
}
}
if (!opts->duplicate_connect && nvme_rdma_existing_controller(opts)) {
ret = -EALREADY;
goto out_free_ctrl;
}
ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
0 /* no quirks, we're perfect! */);
if (ret)
......@@ -1891,7 +1939,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
INIT_DELAYED_WORK(&ctrl->reconnect_work,
nvme_rdma_reconnect_ctrl_work);
INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
......@@ -1950,7 +1997,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
kref_get(&ctrl->ctrl.kref);
nvme_get_ctrl(&ctrl->ctrl);
mutex_lock(&nvme_rdma_ctrl_mutex);
list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
......@@ -1995,7 +2042,7 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
dev_info(ctrl->ctrl.device,
"Removing ctrl: NQN \"%s\", addr %pISp\n",
ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
__nvme_rdma_del_ctrl(ctrl);
nvme_delete_ctrl(&ctrl->ctrl);
}
mutex_unlock(&nvme_rdma_ctrl_mutex);
......
......@@ -57,6 +57,17 @@ u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
return 0;
}
static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys)
{
struct nvmet_ns *ns;
if (list_empty(&subsys->namespaces))
return 0;
ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link);
return ns->nsid;
}
static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
{
return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
......@@ -334,6 +345,8 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
ns->enabled = false;
list_del_rcu(&ns->dev_link);
if (ns->nsid == subsys->max_nsid)
subsys->max_nsid = nvmet_max_nsid(subsys);
mutex_unlock(&subsys->lock);
/*
......
......@@ -150,6 +150,7 @@ struct nvmet_fc_tgt_assoc {
struct list_head a_list;
struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1];
struct kref ref;
struct work_struct del_work;
};
......@@ -232,6 +233,7 @@ static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport);
static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport);
static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
struct nvmet_fc_fcp_iod *fod);
static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc);
/* *********************** FC-NVME DMA Handling **************************** */
......@@ -802,6 +804,16 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport,
return NULL;
}
static void
nvmet_fc_delete_assoc(struct work_struct *work)
{
struct nvmet_fc_tgt_assoc *assoc =
container_of(work, struct nvmet_fc_tgt_assoc, del_work);
nvmet_fc_delete_target_assoc(assoc);
nvmet_fc_tgt_a_put(assoc);
}
static struct nvmet_fc_tgt_assoc *
nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport)
{
......@@ -826,6 +838,7 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport)
assoc->a_id = idx;
INIT_LIST_HEAD(&assoc->a_list);
kref_init(&assoc->ref);
INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc);
while (needrandom) {
get_random_bytes(&ran, sizeof(ran) - BYTES_FOR_QID);
......@@ -1118,8 +1131,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
nvmet_fc_tgtport_put(tgtport);
if (found_ctrl) {
nvmet_fc_delete_target_assoc(assoc);
nvmet_fc_tgt_a_put(assoc);
schedule_work(&assoc->del_work);
return;
}
......
......@@ -53,7 +53,6 @@ struct nvme_loop_ctrl {
struct nvme_ctrl ctrl;
struct nvmet_ctrl *target_ctrl;
struct work_struct delete_work;
};
static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
......@@ -365,6 +364,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
ctrl->admin_tag_set.driver_data = ctrl;
ctrl->admin_tag_set.nr_hw_queues = 1;
ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED;
ctrl->queues[0].ctrl = ctrl;
error = nvmet_sq_init(&ctrl->queues[0].nvme_sq);
......@@ -438,41 +438,9 @@ static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
nvme_loop_destroy_admin_queue(ctrl);
}
static void nvme_loop_del_ctrl_work(struct work_struct *work)
static void nvme_loop_delete_ctrl_host(struct nvme_ctrl *ctrl)
{
struct nvme_loop_ctrl *ctrl = container_of(work,
struct nvme_loop_ctrl, delete_work);
nvme_stop_ctrl(&ctrl->ctrl);
nvme_remove_namespaces(&ctrl->ctrl);
nvme_loop_shutdown_ctrl(ctrl);
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
}
static int __nvme_loop_del_ctrl(struct nvme_loop_ctrl *ctrl)
{
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
return -EBUSY;
if (!queue_work(nvme_wq, &ctrl->delete_work))
return -EBUSY;
return 0;
}
static int nvme_loop_del_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
int ret;
ret = __nvme_loop_del_ctrl(ctrl);
if (ret)
return ret;
flush_work(&ctrl->delete_work);
return 0;
nvme_loop_shutdown_ctrl(to_loop_ctrl(ctrl));
}
static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl)
......@@ -482,7 +450,7 @@ static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl)
mutex_lock(&nvme_loop_ctrl_mutex);
list_for_each_entry(ctrl, &nvme_loop_ctrl_list, list) {
if (ctrl->ctrl.cntlid == nctrl->cntlid)
__nvme_loop_del_ctrl(ctrl);
nvme_delete_ctrl(&ctrl->ctrl);
}
mutex_unlock(&nvme_loop_ctrl_mutex);
}
......@@ -538,7 +506,7 @@ static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
.reg_write32 = nvmf_reg_write32,
.free_ctrl = nvme_loop_free_ctrl,
.submit_async_event = nvme_loop_submit_async_event,
.delete_ctrl = nvme_loop_del_ctrl,
.delete_ctrl = nvme_loop_delete_ctrl_host,
};
static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
......@@ -600,7 +568,6 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
ctrl->ctrl.opts = opts;
INIT_LIST_HEAD(&ctrl->list);
INIT_WORK(&ctrl->delete_work, nvme_loop_del_ctrl_work);
INIT_WORK(&ctrl->ctrl.reset_work, nvme_loop_reset_ctrl_work);
ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops,
......@@ -641,7 +608,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
dev_info(ctrl->ctrl.device,
"new ctrl: \"%s\"\n", ctrl->ctrl.opts->subsysnqn);
kref_get(&ctrl->ctrl.kref);
nvme_get_ctrl(&ctrl->ctrl);
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
......@@ -730,7 +697,7 @@ static void __exit nvme_loop_cleanup_module(void)
mutex_lock(&nvme_loop_ctrl_mutex);
list_for_each_entry_safe(ctrl, next, &nvme_loop_ctrl_list, list)
__nvme_loop_del_ctrl(ctrl);
nvme_delete_ctrl(&ctrl->ctrl);
mutex_unlock(&nvme_loop_ctrl_mutex);
flush_workqueue(nvme_wq);
......
......@@ -314,7 +314,7 @@ u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf,
u32 nvmet_get_log_page_len(struct nvme_command *cmd);
#define NVMET_QUEUE_SIZE 1024
#define NVMET_NR_QUEUES 64
#define NVMET_NR_QUEUES 128
#define NVMET_MAX_CMD NVMET_QUEUE_SIZE
#define NVMET_KAS 10
#define NVMET_DISC_KATO 120
......
......@@ -272,7 +272,7 @@ void blk_freeze_queue_start(struct request_queue *q);
void blk_mq_freeze_queue_wait(struct request_queue *q);
int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
unsigned long timeout);
int blk_mq_reinit_tagset(struct blk_mq_tag_set *set,
int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data,
int (reinit_request)(void *, struct request *));
int blk_mq_map_queues(struct blk_mq_tag_set *set);
......
......@@ -40,6 +40,8 @@
* @node_name: FC WWNN for the port
* @port_name: FC WWPN for the port
* @port_role: What NVME roles are supported (see FC_PORT_ROLE_xxx)
* @dev_loss_tmo: maximum delay for reconnects to an association on
* this device. Used only on a remoteport.
*
* Initialization values for dynamic port fields:
* @port_id: FC N_Port_ID currently assigned the port. Upper 8 bits must
......@@ -50,6 +52,7 @@ struct nvme_fc_port_info {
u64 port_name;
u32 port_role;
u32 port_id;
u32 dev_loss_tmo;
};
......@@ -102,8 +105,6 @@ enum nvmefc_fcp_datadir {
};
#define NVME_FC_MAX_SEGMENTS 256
/**
* struct nvmefc_fcp_req - Request structure passed from NVME-FC transport
* to LLDD in order to perform a NVME FCP IO operation.
......@@ -202,6 +203,9 @@ enum nvme_fc_obj_state {
* The length of the buffer corresponds to the local_priv_sz
* value specified in the nvme_fc_port_template supplied by
* the LLDD.
* @dev_loss_tmo: maximum delay for reconnects to an association on
* this device. To modify, lldd must call
* nvme_fc_set_remoteport_devloss().
*
* Fields with dynamic values. Values may change base on link state. LLDD
* may reference fields directly to change them. Initialized by the
......@@ -259,10 +263,9 @@ struct nvme_fc_remote_port {
u32 port_role;
u64 node_name;
u64 port_name;
struct nvme_fc_local_port *localport;
void *private;
u32 dev_loss_tmo;
/* dynamic fields */
u32 port_id;
......@@ -446,6 +449,10 @@ int nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
int nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *remoteport);
void nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport);
int nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *remoteport,
u32 dev_loss_tmo);
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment