Commit 19240e6b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - Two sets of NVMe pull requests from Christoph:
      - Fixes for the Fibre Channel host/target to fix spec compliance
      - Allow a zero keep alive timeout
      - Make the debug printk for broken SGLs work better
      - Fix queue zeroing during initialization
      - Set of RDMA and FC fixes
      - Target div-by-zero fix

 - bsg double-free fix.

 - ndb unknown ioctl fix from Josef.

 - Buffered vs O_DIRECT page cache inconsistency fix. Has been floating
   around for a long time, well reviewed. From Lukas.

 - brd overflow fix from Mikulas.

 - Fix for a loop regression in this merge window, where using a union
   for two members of the loop_cmd turned out to be a really bad idea.
   From Omar.

 - Fix for an iostat regression fix in this series, using the wrong API
   to get at the block queue. From Shaohua.

 - Fix for a potential blktrace delection deadlock. From Waiman.

* 'for-linus' of git://git.kernel.dk/linux-block: (30 commits)
  nvme-fcloop: fix port deletes and callbacks
  nvmet-fc: sync header templates with comments
  nvmet-fc: ensure target queue id within range.
  nvmet-fc: on port remove call put outside lock
  nvme-rdma: don't fully stop the controller in error recovery
  nvme-rdma: give up reconnect if state change fails
  nvme-core: Use nvme_wq to queue async events and fw activation
  nvme: fix sqhd reference when admin queue connect fails
  block: fix a crash caused by wrong API
  fs: Fix page cache inconsistency when mixing buffered and AIO DIO
  nvmet: implement valid sqhd values in completions
  nvme-fabrics: Allow 0 as KATO value
  nvme: allow timed-out ios to retry
  nvme: stop aer posting if controller state not live
  nvme-pci: Print invalid SGL only once
  nvme-pci: initialize queue memory before interrupts
  nvmet-fc: fix failing max io queue connections
  nvme-fc: use transport-specific sgl format
  nvme: add transport SGL definitions
  nvme.h: remove FC transport-specific error values
  ...
parents 17763641 fddc9923
......@@ -854,6 +854,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
kobject_init(&q->kobj, &blk_queue_ktype);
#ifdef CONFIG_BLK_DEV_IO_TRACE
mutex_init(&q->blk_trace_mutex);
#endif
mutex_init(&q->sysfs_lock);
spin_lock_init(&q->__queue_lock);
......
......@@ -154,7 +154,6 @@ static int bsg_prepare_job(struct device *dev, struct request *req)
failjob_rls_rqst_payload:
kfree(job->request_payload.sg_list);
failjob_rls_job:
kfree(job);
return -ENOMEM;
}
......
......@@ -112,7 +112,7 @@ ssize_t part_stat_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
struct request_queue *q = dev_to_disk(dev)->queue;
struct request_queue *q = part_to_disk(p)->queue;
unsigned int inflight[2];
int cpu;
......
......@@ -342,7 +342,7 @@ static long __brd_direct_access(struct brd_device *brd, pgoff_t pgoff,
if (!brd)
return -ENODEV;
page = brd_insert_page(brd, PFN_PHYS(pgoff) / 512);
page = brd_insert_page(brd, (sector_t)pgoff << PAGE_SECTORS_SHIFT);
if (!page)
return -ENOSPC;
*kaddr = page_address(page);
......
......@@ -67,10 +67,8 @@ struct loop_device {
struct loop_cmd {
struct kthread_work work;
struct request *rq;
union {
bool use_aio; /* use AIO interface to handle I/O */
atomic_t ref; /* only for aio */
};
bool use_aio; /* use AIO interface to handle I/O */
atomic_t ref; /* only for aio */
long ret;
struct kiocb iocb;
struct bio_vec *bvec;
......
......@@ -1194,6 +1194,12 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
/* The block layer will pass back some non-nbd ioctls in case we have
* special handling for them, but we don't so just return an error.
*/
if (_IOC_TYPE(cmd) != 0xab)
return -EINVAL;
mutex_lock(&nbd->config_lock);
/* Don't allow ioctl operations on a nbd device that was created with
......
......@@ -134,8 +134,6 @@ static inline bool nvme_req_needs_retry(struct request *req)
return false;
if (nvme_req(req)->status & NVME_SC_DNR)
return false;
if (jiffies - req->start_time >= req->timeout)
return false;
if (nvme_req(req)->retries >= nvme_max_retries)
return false;
return true;
......@@ -2590,7 +2588,7 @@ static void nvme_async_event_work(struct work_struct *work)
container_of(work, struct nvme_ctrl, async_event_work);
spin_lock_irq(&ctrl->lock);
while (ctrl->event_limit > 0) {
while (ctrl->state == NVME_CTRL_LIVE && ctrl->event_limit > 0) {
int aer_idx = --ctrl->event_limit;
spin_unlock_irq(&ctrl->lock);
......@@ -2677,7 +2675,8 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
/*FALLTHRU*/
case NVME_SC_ABORT_REQ:
++ctrl->event_limit;
queue_work(nvme_wq, &ctrl->async_event_work);
if (ctrl->state == NVME_CTRL_LIVE)
queue_work(nvme_wq, &ctrl->async_event_work);
break;
default:
break;
......@@ -2692,7 +2691,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
nvme_queue_scan(ctrl);
break;
case NVME_AER_NOTICE_FW_ACT_STARTING:
schedule_work(&ctrl->fw_act_work);
queue_work(nvme_wq, &ctrl->fw_act_work);
break;
default:
dev_warn(ctrl->device, "async event result %08x\n", result);
......
......@@ -565,6 +565,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->queue_size = NVMF_DEF_QUEUE_SIZE;
opts->nr_io_queues = num_online_cpus();
opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
opts->kato = NVME_DEFAULT_KATO;
options = o = kstrdup(buf, GFP_KERNEL);
if (!options)
......@@ -655,21 +656,22 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
goto out;
}
if (opts->discovery_nqn) {
pr_err("Discovery controllers cannot accept keep_alive_tmo != 0\n");
ret = -EINVAL;
goto out;
}
if (token < 0) {
pr_err("Invalid keep_alive_tmo %d\n", token);
ret = -EINVAL;
goto out;
} else if (token == 0) {
} else if (token == 0 && !opts->discovery_nqn) {
/* Allowed for debug */
pr_warn("keep_alive_tmo 0 won't execute keep alives!!!\n");
}
opts->kato = token;
if (opts->discovery_nqn && opts->kato) {
pr_err("Discovery controllers cannot accept KATO != 0\n");
ret = -EINVAL;
goto out;
}
break;
case NVMF_OPT_CTRL_LOSS_TMO:
if (match_int(args, &token)) {
......@@ -762,8 +764,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
uuid_copy(&opts->host->id, &hostid);
out:
if (!opts->discovery_nqn && !opts->kato)
opts->kato = NVME_DEFAULT_KATO;
kfree(options);
return ret;
}
......
......@@ -1376,7 +1376,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
if (atomic_read(&op->state) == FCPOP_STATE_ABORTED)
status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1);
else if (freq->status)
status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
status = cpu_to_le16(NVME_SC_INTERNAL << 1);
/*
* For the linux implementation, if we have an unsuccesful
......@@ -1404,7 +1404,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
*/
if (freq->transferred_length !=
be32_to_cpu(op->cmd_iu.data_len)) {
status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
status = cpu_to_le16(NVME_SC_INTERNAL << 1);
goto done;
}
result.u64 = 0;
......@@ -1421,7 +1421,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
freq->transferred_length ||
op->rsp_iu.status_code ||
sqe->common.command_id != cqe->command_id)) {
status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
status = cpu_to_le16(NVME_SC_INTERNAL << 1);
goto done;
}
result = cqe->result;
......@@ -1429,7 +1429,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
break;
default:
status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1);
status = cpu_to_le16(NVME_SC_INTERNAL << 1);
goto done;
}
......@@ -1989,16 +1989,17 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
* as well as those by FC-NVME spec.
*/
WARN_ON_ONCE(sqe->common.metadata);
WARN_ON_ONCE(sqe->common.dptr.prp1);
WARN_ON_ONCE(sqe->common.dptr.prp2);
sqe->common.flags |= NVME_CMD_SGL_METABUF;
/*
* format SQE DPTR field per FC-NVME rules
* type=data block descr; subtype=offset;
* offset is currently 0.
* format SQE DPTR field per FC-NVME rules:
* type=0x5 Transport SGL Data Block Descriptor
* subtype=0xA Transport-specific value
* address=0
* length=length of the data series
*/
sqe->rw.dptr.sgl.type = NVME_SGL_FMT_OFFSET;
sqe->rw.dptr.sgl.type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) |
NVME_SGL_FMT_TRANSPORT_A;
sqe->rw.dptr.sgl.length = cpu_to_le32(data_len);
sqe->rw.dptr.sgl.addr = 0;
......
......@@ -24,6 +24,7 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/once.h>
#include <linux/pci.h>
#include <linux/poison.h>
#include <linux/t10-pi.h>
......@@ -540,6 +541,20 @@ static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
}
#endif
static void nvme_print_sgl(struct scatterlist *sgl, int nents)
{
int i;
struct scatterlist *sg;
for_each_sg(sgl, sg, nents, i) {
dma_addr_t phys = sg_phys(sg);
pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d "
"dma_address:%pad dma_length:%d\n",
i, &phys, sg->offset, sg->length, &sg_dma_address(sg),
sg_dma_len(sg));
}
}
static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
......@@ -622,19 +637,10 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
return BLK_STS_OK;
bad_sgl:
if (WARN_ONCE(1, "Invalid SGL for payload:%d nents:%d\n",
blk_rq_payload_bytes(req), iod->nents)) {
for_each_sg(iod->sg, sg, iod->nents, i) {
dma_addr_t phys = sg_phys(sg);
pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d "
"dma_address:%pad dma_length:%d\n", i, &phys,
sg->offset, sg->length,
&sg_dma_address(sg),
sg_dma_len(sg));
}
}
WARN(DO_ONCE(nvme_print_sgl, iod->sg, iod->nents),
"Invalid SGL for payload:%d nents:%d\n",
blk_rq_payload_bytes(req), iod->nents);
return BLK_STS_IOERR;
}
static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
......@@ -1313,11 +1319,11 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
if (result < 0)
goto release_cq;
nvme_init_queue(nvmeq, qid);
result = queue_request_irq(nvmeq);
if (result < 0)
goto release_sq;
nvme_init_queue(nvmeq, qid);
return result;
release_sq:
......@@ -1464,6 +1470,7 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
return result;
nvmeq->cq_vector = 0;
nvme_init_queue(nvmeq, 0);
result = queue_request_irq(nvmeq);
if (result) {
nvmeq->cq_vector = -1;
......@@ -2156,7 +2163,6 @@ static void nvme_reset_work(struct work_struct *work)
if (result)
goto out;
nvme_init_queue(dev->queues[0], 0);
result = nvme_alloc_admin_tags(dev);
if (result)
goto out;
......
......@@ -942,7 +942,12 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
}
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
if (!changed) {
/* state change failure is ok if we're in DELETING state */
WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
return;
}
ctrl->ctrl.nr_reconnects = 0;
nvme_start_ctrl(&ctrl->ctrl);
......@@ -962,7 +967,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
struct nvme_rdma_ctrl *ctrl = container_of(work,
struct nvme_rdma_ctrl, err_work);
nvme_stop_ctrl(&ctrl->ctrl);
nvme_stop_keep_alive(&ctrl->ctrl);
if (ctrl->ctrl.queue_count > 1) {
nvme_stop_queues(&ctrl->ctrl);
......
......@@ -390,10 +390,10 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
if (status)
nvmet_set_status(req, status);
/* XXX: need to fill in something useful for sq_head */
req->rsp->sq_head = 0;
if (likely(req->sq)) /* may happen during early failure */
req->rsp->sq_id = cpu_to_le16(req->sq->qid);
if (req->sq->size)
req->sq->sqhd = (req->sq->sqhd + 1) % req->sq->size;
req->rsp->sq_head = cpu_to_le16(req->sq->sqhd);
req->rsp->sq_id = cpu_to_le16(req->sq->qid);
req->rsp->command_id = req->cmd->common.command_id;
if (req->ns)
......@@ -420,6 +420,7 @@ void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
u16 qid, u16 size)
{
sq->sqhd = 0;
sq->qid = qid;
sq->size = size;
......
......@@ -109,9 +109,14 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
pr_warn("queue already connected!\n");
return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
}
if (!sqsize) {
pr_warn("queue size zero!\n");
return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
}
nvmet_cq_setup(ctrl, req->cq, qid, sqsize);
nvmet_sq_setup(ctrl, req->sq, qid, sqsize);
/* note: convert queue size from 0's-based value to 1's-based value */
nvmet_cq_setup(ctrl, req->cq, qid, sqsize + 1);
nvmet_sq_setup(ctrl, req->sq, qid, sqsize + 1);
return 0;
}
......
......@@ -148,7 +148,7 @@ struct nvmet_fc_tgt_assoc {
u32 a_id;
struct nvmet_fc_tgtport *tgtport;
struct list_head a_list;
struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES];
struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1];
struct kref ref;
};
......@@ -608,7 +608,7 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
unsigned long flags;
int ret;
if (qid >= NVMET_NR_QUEUES)
if (qid > NVMET_NR_QUEUES)
return NULL;
queue = kzalloc((sizeof(*queue) +
......@@ -783,6 +783,9 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport,
u16 qid = nvmet_fc_getqueueid(connection_id);
unsigned long flags;
if (qid > NVMET_NR_QUEUES)
return NULL;
spin_lock_irqsave(&tgtport->lock, flags);
list_for_each_entry(assoc, &tgtport->assoc_list, a_list) {
if (association_id == assoc->association_id) {
......@@ -888,7 +891,7 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
int i;
spin_lock_irqsave(&tgtport->lock, flags);
for (i = NVMET_NR_QUEUES - 1; i >= 0; i--) {
for (i = NVMET_NR_QUEUES; i >= 0; i--) {
queue = assoc->queues[i];
if (queue) {
if (!nvmet_fc_tgt_q_get(queue))
......@@ -1910,8 +1913,7 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport,
spin_lock_irqsave(&fod->flock, flags);
fod->writedataactive = false;
spin_unlock_irqrestore(&fod->flock, flags);
nvmet_req_complete(&fod->req,
NVME_SC_FC_TRANSPORT_ERROR);
nvmet_req_complete(&fod->req, NVME_SC_INTERNAL);
} else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ {
fcpreq->fcp_error = ret;
fcpreq->transferred_length = 0;
......@@ -1929,8 +1931,7 @@ __nvmet_fc_fod_op_abort(struct nvmet_fc_fcp_iod *fod, bool abort)
/* if in the middle of an io and we need to tear down */
if (abort) {
if (fcpreq->op == NVMET_FCOP_WRITEDATA) {
nvmet_req_complete(&fod->req,
NVME_SC_FC_TRANSPORT_ERROR);
nvmet_req_complete(&fod->req, NVME_SC_INTERNAL);
return true;
}
......@@ -1968,8 +1969,7 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod)
fod->abort = true;
spin_unlock(&fod->flock);
nvmet_req_complete(&fod->req,
NVME_SC_FC_TRANSPORT_ERROR);
nvmet_req_complete(&fod->req, NVME_SC_INTERNAL);
return;
}
......@@ -2533,13 +2533,17 @@ nvmet_fc_remove_port(struct nvmet_port *port)
{
struct nvmet_fc_tgtport *tgtport = port->priv;
unsigned long flags;
bool matched = false;
spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
if (tgtport->port == port) {
nvmet_fc_tgtport_put(tgtport);
matched = true;
tgtport->port = NULL;
}
spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags);
if (matched)
nvmet_fc_tgtport_put(tgtport);
}
static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {
......
......@@ -224,8 +224,6 @@ struct fcloop_nport {
struct fcloop_lport *lport;
struct list_head nport_list;
struct kref ref;
struct completion rport_unreg_done;
struct completion tport_unreg_done;
u64 node_name;
u64 port_name;
u32 port_role;
......@@ -576,7 +574,7 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
tfcp_req->aborted = true;
spin_unlock(&tfcp_req->reqlock);
tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED;
tfcp_req->status = NVME_SC_INTERNAL;
/*
* nothing more to do. If io wasn't active, the transport should
......@@ -630,6 +628,32 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
schedule_work(&inireq->iniwork);
}
static void
fcloop_nport_free(struct kref *ref)
{
struct fcloop_nport *nport =
container_of(ref, struct fcloop_nport, ref);
unsigned long flags;
spin_lock_irqsave(&fcloop_lock, flags);
list_del(&nport->nport_list);
spin_unlock_irqrestore(&fcloop_lock, flags);
kfree(nport);
}
static void
fcloop_nport_put(struct fcloop_nport *nport)
{
kref_put(&nport->ref, fcloop_nport_free);
}
static int
fcloop_nport_get(struct fcloop_nport *nport)
{
return kref_get_unless_zero(&nport->ref);
}
static void
fcloop_localport_delete(struct nvme_fc_local_port *localport)
{
......@@ -644,8 +668,7 @@ fcloop_remoteport_delete(struct nvme_fc_remote_port *remoteport)
{
struct fcloop_rport *rport = remoteport->private;
/* release any threads waiting for the unreg to complete */
complete(&rport->nport->rport_unreg_done);
fcloop_nport_put(rport->nport);
}
static void
......@@ -653,8 +676,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport)
{
struct fcloop_tport *tport = targetport->private;
/* release any threads waiting for the unreg to complete */
complete(&tport->nport->tport_unreg_done);
fcloop_nport_put(tport->nport);
}
#define FCLOOP_HW_QUEUES 4
......@@ -722,6 +744,7 @@ fcloop_create_local_port(struct device *dev, struct device_attribute *attr,
goto out_free_opts;
}
memset(&pinfo, 0, sizeof(pinfo));
pinfo.node_name = opts->wwnn;
pinfo.port_name = opts->wwpn;
pinfo.port_role = opts->roles;
......@@ -804,32 +827,6 @@ fcloop_delete_local_port(struct device *dev, struct device_attribute *attr,
return ret ? ret : count;
}
static void
fcloop_nport_free(struct kref *ref)
{
struct fcloop_nport *nport =
container_of(ref, struct fcloop_nport, ref);
unsigned long flags;
spin_lock_irqsave(&fcloop_lock, flags);
list_del(&nport->nport_list);
spin_unlock_irqrestore(&fcloop_lock, flags);
kfree(nport);
}
static void
fcloop_nport_put(struct fcloop_nport *nport)
{
kref_put(&nport->ref, fcloop_nport_free);
}
static int
fcloop_nport_get(struct fcloop_nport *nport)
{
return kref_get_unless_zero(&nport->ref);
}
static struct fcloop_nport *
fcloop_alloc_nport(const char *buf, size_t count, bool remoteport)
{
......@@ -938,6 +935,7 @@ fcloop_create_remote_port(struct device *dev, struct device_attribute *attr,
if (!nport)
return -EIO;
memset(&pinfo, 0, sizeof(pinfo));
pinfo.node_name = nport->node_name;
pinfo.port_name = nport->port_name;
pinfo.port_role = nport->port_role;
......@@ -979,24 +977,12 @@ __unlink_remote_port(struct fcloop_nport *nport)
}
static int
__wait_remoteport_unreg(struct fcloop_nport *nport, struct fcloop_rport *rport)
__remoteport_unreg(struct fcloop_nport *nport, struct fcloop_rport *rport)
{
int ret;
if (!rport)
return -EALREADY;
init_completion(&nport->rport_unreg_done);
ret = nvme_fc_unregister_remoteport(rport->remoteport);
if (ret)
return ret;
wait_for_completion(&nport->rport_unreg_done);
fcloop_nport_put(nport);
return ret;
return nvme_fc_unregister_remoteport(rport->remoteport);
}
static ssize_t
......@@ -1029,7 +1015,7 @@ fcloop_delete_remote_port(struct device *dev, struct device_attribute *attr,
if (!nport)
return -ENOENT;
ret = __wait_remoteport_unreg(nport, rport);
ret = __remoteport_unreg(nport, rport);
return ret ? ret : count;
}
......@@ -1086,24 +1072,12 @@ __unlink_target_port(struct fcloop_nport *nport)
}
static int
__wait_targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport)
__targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport)
{
int ret;
if (!tport)
return -EALREADY;
init_completion(&nport->tport_unreg_done);
ret = nvmet_fc_unregister_targetport(tport->targetport);
if (ret)
return ret;
wait_for_completion(&nport->tport_unreg_done);
fcloop_nport_put(nport);
return ret;
return nvmet_fc_unregister_targetport(tport->targetport);
}
static ssize_t
......@@ -1136,7 +1110,7 @@ fcloop_delete_target_port(struct device *dev, struct device_attribute *attr,
if (!nport)
return -ENOENT;
ret = __wait_targetport_unreg(nport, tport);
ret = __targetport_unreg(nport, tport);
return ret ? ret : count;
}
......@@ -1223,11 +1197,11 @@ static void __exit fcloop_exit(void)
spin_unlock_irqrestore(&fcloop_lock, flags);
ret = __wait_targetport_unreg(nport, tport);
ret = __targetport_unreg(nport, tport);
if (ret)
pr_warn("%s: Failed deleting target port\n", __func__);
ret = __wait_remoteport_unreg(nport, rport);
ret = __remoteport_unreg(nport, rport);
if (ret)
pr_warn("%s: Failed deleting remote port\n", __func__);
......
......@@ -74,6 +74,7 @@ struct nvmet_sq {
struct percpu_ref ref;
u16 qid;
u16 size;
u16 sqhd;
struct completion free_done;
struct completion confirm_done;
};
......
......@@ -884,7 +884,7 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
wcqe->total_data_placed);
nCmd->transferred_length = 0;
nCmd->rcv_rsplen = 0;
nCmd->status = NVME_SC_FC_TRANSPORT_ERROR;
nCmd->status = NVME_SC_INTERNAL;
}
}
......
......@@ -180,7 +180,7 @@ static void qla_nvme_sp_done(void *ptr, int res)
goto rel;
if (unlikely(res == QLA_FUNCTION_FAILED))
fd->status = NVME_SC_FC_TRANSPORT_ERROR;
fd->status = NVME_SC_INTERNAL;
else
fd->status = 0;
......
......@@ -229,6 +229,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
{
loff_t offset = dio->iocb->ki_pos;
ssize_t transferred = 0;
int err;
/*
* AIO submission can race with bio completion to get here while
......@@ -258,8 +259,22 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
if (ret == 0)
ret = transferred;
/*
* Try again to invalidate clean pages which might have been cached by
* non-direct readahead, or faulted in by get_user_pages() if the source
* of the write was an mmap'ed region of the file we're writing. Either
* one is a pretty crazy thing to do, so we don't support it 100%. If
* this invalidation fails, tough, the write still worked...
*/
if (ret > 0 && dio->op == REQ_OP_WRITE &&
dio->inode->i_mapping->nrpages) {
err = invalidate_inode_pages2_range(dio->inode->i_mapping,
offset >> PAGE_SHIFT,
(offset + ret - 1) >> PAGE_SHIFT);
WARN_ON_ONCE(err);
}
if (dio->end_io) {
int err;
// XXX: ki_pos??
err = dio->end_io(dio->iocb, offset, ret, dio->private);
......@@ -304,6 +319,7 @@ static void dio_bio_end_aio(struct bio *bio)
struct dio *dio = bio->bi_private;
unsigned long remaining;
unsigned long flags;
bool defer_completion = false;
/* cleanup the bio */
dio_bio_complete(dio, bio);
......@@ -315,7 +331,19 @@ static void dio_bio_end_aio(struct bio *bio)
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (remaining == 0) {
if (dio->result && dio->defer_completion) {
/*
* Defer completion when defer_completion is set or
* when the inode has pages mapped and this is AIO write.
* We need to invalidate those pages because there is a
* chance they contain stale data in the case buffered IO
* went in between AIO submission and completion into the
* same region.
*/
if (dio->result)
defer_completion = dio->defer_completion ||
(dio->op == REQ_OP_WRITE &&
dio->inode->i_mapping->nrpages);
if (defer_completion) {
INIT_WORK(&dio->complete_work, dio_aio_complete_work);
queue_work(dio->inode->i_sb->s_dio_done_wq,
&dio->complete_work);
......@@ -1210,10 +1238,19 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
* For AIO O_(D)SYNC writes we need to defer completions to a workqueue
* so that we can call ->fsync.
*/
if (dio->is_async && iov_iter_rw(iter) == WRITE &&
((iocb->ki_filp->f_flags & O_DSYNC) ||
IS_SYNC(iocb->ki_filp->f_mapping->host))) {
retval = dio_set_defer_completion(dio);
if (dio->is_async && iov_iter_rw(iter) == WRITE) {
retval = 0;
if ((iocb->ki_filp->f_flags & O_DSYNC) ||
IS_SYNC(iocb->ki_filp->f_mapping->host))
retval = dio_set_defer_completion(dio);
else if (!dio->inode->i_sb->s_dio_done_wq) {
/*
* In case of AIO write racing with buffered read we
* need to defer completion. We can't decide this now,
* however the workqueue needs to be initialized here.
*/
retval = sb_init_dio_done_wq(dio->inode->i_sb);
}
if (retval) {
/*
* We grab i_mutex only for reads so we don't have
......
......@@ -713,8 +713,24 @@ struct iomap_dio {
static ssize_t iomap_dio_complete(struct iomap_dio *dio)
{
struct kiocb *iocb = dio->iocb;
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
/*
* Try again to invalidate clean pages which might have been cached by
* non-direct readahead, or faulted in by get_user_pages() if the source
* of the write was an mmap'ed region of the file we're writing. Either
* one is a pretty crazy thing to do, so we don't support it 100%. If
* this invalidation fails, tough, the write still worked...
*/
if (!dio->error &&
(dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
ret = invalidate_inode_pages2_range(inode->i_mapping,
iocb->ki_pos >> PAGE_SHIFT,
(iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT);
WARN_ON_ONCE(ret);
}
if (dio->end_io) {
ret = dio->end_io(iocb,
dio->error ? dio->error : dio->size,
......@@ -1042,19 +1058,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
ret = iomap_dio_complete(dio);
/*
* Try again to invalidate clean pages which might have been cached by
* non-direct readahead, or faulted in by get_user_pages() if the source
* of the write was an mmap'ed region of the file we're writing. Either
* one is a pretty crazy thing to do, so we don't support it 100%. If
* this invalidation fails, tough, the write still worked...
*/
if (iov_iter_rw(iter) == WRITE) {
int err = invalidate_inode_pages2_range(mapping,
start >> PAGE_SHIFT, end >> PAGE_SHIFT);
WARN_ON_ONCE(err);
}
return ret;
out_free_dio:
......
......@@ -551,6 +551,7 @@ struct request_queue {
int node;
#ifdef CONFIG_BLK_DEV_IO_TRACE
struct blk_trace *blk_trace;
struct mutex blk_trace_mutex;
#endif
/*
* for flush operations
......
......@@ -346,11 +346,6 @@ struct nvme_fc_remote_port {
* indicating an FC transport Aborted status.
* Entrypoint is Mandatory.
*
* @defer_rcv: Called by the transport to signal the LLLD that it has
* begun processing of a previously received NVME CMD IU. The LLDD
* is now free to re-use the rcv buffer associated with the
* nvmefc_tgt_fcp_req.
*
* @max_hw_queues: indicates the maximum number of hw queues the LLDD
* supports for cpu affinitization.
* Value is Mandatory. Must be at least 1.
......@@ -806,11 +801,19 @@ struct nvmet_fc_target_port {
* outstanding operation (if there was one) to complete, then will
* call the fcp_req_release() callback to return the command's
* exchange context back to the LLDD.
* Entrypoint is Mandatory.
*
* @fcp_req_release: Called by the transport to return a nvmefc_tgt_fcp_req
* to the LLDD after all operations on the fcp operation are complete.
* This may be due to the command completing or upon completion of
* abort cleanup.
* Entrypoint is Mandatory.
*
* @defer_rcv: Called by the transport to signal the LLLD that it has
* begun processing of a previously received NVME CMD IU. The LLDD
* is now free to re-use the rcv buffer associated with the
* nvmefc_tgt_fcp_req.
* Entrypoint is Optional.
*
* @max_hw_queues: indicates the maximum number of hw queues the LLDD
* supports for cpu affinitization.
......
......@@ -471,12 +471,14 @@ enum nvme_opcode {
*
* @NVME_SGL_FMT_ADDRESS: absolute address of the data block
* @NVME_SGL_FMT_OFFSET: relative offset of the in-capsule data block
* @NVME_SGL_FMT_TRANSPORT_A: transport defined format, value 0xA
* @NVME_SGL_FMT_INVALIDATE: RDMA transport specific remote invalidation
* request subtype
*/
enum {
NVME_SGL_FMT_ADDRESS = 0x00,
NVME_SGL_FMT_OFFSET = 0x01,
NVME_SGL_FMT_TRANSPORT_A = 0x0A,
NVME_SGL_FMT_INVALIDATE = 0x0f,
};
......@@ -490,12 +492,16 @@ enum {
*
* For struct nvme_keyed_sgl_desc:
* @NVME_KEY_SGL_FMT_DATA_DESC: keyed data block descriptor
*
* Transport-specific SGL types:
* @NVME_TRANSPORT_SGL_DATA_DESC: Transport SGL data dlock descriptor
*/
enum {
NVME_SGL_FMT_DATA_DESC = 0x00,
NVME_SGL_FMT_SEG_DESC = 0x02,
NVME_SGL_FMT_LAST_SEG_DESC = 0x03,
NVME_KEY_SGL_FMT_DATA_DESC = 0x04,
NVME_TRANSPORT_SGL_DATA_DESC = 0x05,
};
struct nvme_sgl_desc {
......@@ -1127,19 +1133,6 @@ enum {
NVME_SC_UNWRITTEN_BLOCK = 0x287,
NVME_SC_DNR = 0x4000,
/*
* FC Transport-specific error status values for NVME commands
*
* Transport-specific status code values must be in the range 0xB0..0xBF
*/
/* Generic FC failure - catchall */
NVME_SC_FC_TRANSPORT_ERROR = 0x00B0,
/* I/O failure due to FC ABTS'd */
NVME_SC_FC_TRANSPORT_ABORTED = 0x00B1,
};
struct nvme_completion {
......
......@@ -648,6 +648,12 @@ int blk_trace_startstop(struct request_queue *q, int start)
}
EXPORT_SYMBOL_GPL(blk_trace_startstop);
/*
* When reading or writing the blktrace sysfs files, the references to the
* opened sysfs or device files should prevent the underlying block device
* from being removed. So no further delete protection is really needed.
*/
/**
* blk_trace_ioctl: - handle the ioctls associated with tracing
* @bdev: the block device
......@@ -665,7 +671,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
if (!q)
return -ENXIO;
mutex_lock(&bdev->bd_mutex);
mutex_lock(&q->blk_trace_mutex);
switch (cmd) {
case BLKTRACESETUP:
......@@ -691,7 +697,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
break;
}
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&q->blk_trace_mutex);
return ret;
}
......@@ -1727,7 +1733,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
if (q == NULL)
goto out_bdput;
mutex_lock(&bdev->bd_mutex);
mutex_lock(&q->blk_trace_mutex);
if (attr == &dev_attr_enable) {
ret = sprintf(buf, "%u\n", !!q->blk_trace);
......@@ -1746,7 +1752,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&q->blk_trace_mutex);
out_bdput:
bdput(bdev);
out:
......@@ -1788,7 +1794,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
if (q == NULL)
goto out_bdput;
mutex_lock(&bdev->bd_mutex);
mutex_lock(&q->blk_trace_mutex);
if (attr == &dev_attr_enable) {
if (value)
......@@ -1814,7 +1820,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
}
out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
mutex_unlock(&q->blk_trace_mutex);
out_bdput:
bdput(bdev);
out:
......
......@@ -2926,9 +2926,15 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
* we're writing. Either one is a pretty crazy thing to do,
* so we don't support it 100%. If this invalidation
* fails, tough, the write still worked...
*
* Most of the time we do not need this since dio_complete() will do
* the invalidation for us. However there are some file systems that
* do not end up with dio_complete() being called, so let's not break
* them by removing it completely
*/
invalidate_inode_pages2_range(mapping,
pos >> PAGE_SHIFT, end);
if (mapping->nrpages)
invalidate_inode_pages2_range(mapping,
pos >> PAGE_SHIFT, end);
if (written > 0) {
pos += written;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment