Commit a692a610 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'block-5.11-2021-01-24' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - NVMe pull request from Christoph:
      - fix a status code in nvmet (Chaitanya Kulkarni)
      - avoid double completions in nvme-rdma/nvme-tcp (Chao Leng)
      - fix the CMB support to cope with NVMe 1.4 controllers (Klaus Jensen)
      - fix PRINFO handling in the passthrough ioctl (Revanth Rajashekar)
      - fix a double DMA unmap in nvme-pci

 - lightnvm error path leak fix (Pan)

 - MD pull request from Song:
      - Flush request fix (Xiao)

* tag 'block-5.11-2021-01-24' of git://git.kernel.dk/linux-block:
  lightnvm: fix memory leak when submit fails
  nvme-pci: fix error unwind in nvme_map_data
  nvme-pci: refactor nvme_unmap_data
  md: Set prev_flush_start and flush_bio in an atomic way
  nvmet: set right status on error in id-ns handler
  nvme-pci: allow use of cmb on v1.4 controllers
  nvme-tcp: avoid request double completion for concurrent nvme_tcp_timeout
  nvme-rdma: avoid request double completion for concurrent nvme_rdma_timeout
  nvme: check the PRINFO bit before deciding the host buffer length
parents 51306806 97784481
......@@ -844,11 +844,10 @@ static int nvm_bb_chunk_sense(struct nvm_dev *dev, struct ppa_addr ppa)
rqd.ppa_addr = generic_to_dev_addr(dev, ppa);
ret = nvm_submit_io_sync_raw(dev, &rqd);
__free_page(page);
if (ret)
return ret;
__free_page(page);
return rqd.error;
}
......
......@@ -639,8 +639,10 @@ static void md_submit_flush_data(struct work_struct *ws)
* could wait for this and below md_handle_request could wait for those
* bios because of suspend check
*/
spin_lock_irq(&mddev->lock);
mddev->prev_flush_start = mddev->start_flush;
mddev->flush_bio = NULL;
spin_unlock_irq(&mddev->lock);
wake_up(&mddev->sb_wait);
if (bio->bi_iter.bi_size == 0) {
......
......@@ -1543,8 +1543,21 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
}
length = (io.nblocks + 1) << ns->lba_shift;
if ((io.control & NVME_RW_PRINFO_PRACT) &&
ns->ms == sizeof(struct t10_pi_tuple)) {
/*
* Protection information is stripped/inserted by the
* controller.
*/
if (nvme_to_user_ptr(io.metadata))
return -EINVAL;
meta_len = 0;
metadata = NULL;
} else {
meta_len = (io.nblocks + 1) * ns->ms;
metadata = nvme_to_user_ptr(io.metadata);
}
if (ns->features & NVME_NS_EXT_LBAS) {
length += meta_len;
......
......@@ -23,6 +23,7 @@
#include <linux/t10-pi.h>
#include <linux/types.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/io-64-nonatomic-hi-lo.h>
#include <linux/sed-opal.h>
#include <linux/pci-p2pdma.h>
......@@ -542,50 +543,71 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
return true;
}
static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
static void nvme_free_prps(struct nvme_dev *dev, struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1;
dma_addr_t dma_addr = iod->first_dma, next_dma_addr;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
dma_addr_t dma_addr = iod->first_dma;
int i;
if (iod->dma_len) {
dma_unmap_page(dev->dev, dma_addr, iod->dma_len,
rq_dma_dir(req));
return;
for (i = 0; i < iod->npages; i++) {
__le64 *prp_list = nvme_pci_iod_list(req)[i];
dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]);
dma_pool_free(dev->prp_page_pool, prp_list, dma_addr);
dma_addr = next_dma_addr;
}
WARN_ON_ONCE(!iod->nents);
}
static void nvme_free_sgls(struct nvme_dev *dev, struct request *req)
{
const int last_sg = SGES_PER_PAGE - 1;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
dma_addr_t dma_addr = iod->first_dma;
int i;
for (i = 0; i < iod->npages; i++) {
struct nvme_sgl_desc *sg_list = nvme_pci_iod_list(req)[i];
dma_addr_t next_dma_addr = le64_to_cpu((sg_list[last_sg]).addr);
dma_pool_free(dev->prp_page_pool, sg_list, dma_addr);
dma_addr = next_dma_addr;
}
}
static void nvme_unmap_sg(struct nvme_dev *dev, struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
if (is_pci_p2pdma_page(sg_page(iod->sg)))
pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents,
rq_dma_dir(req));
else
dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req));
}
static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
if (iod->npages == 0)
dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0],
dma_addr);
for (i = 0; i < iod->npages; i++) {
void *addr = nvme_pci_iod_list(req)[i];
if (iod->use_sgl) {
struct nvme_sgl_desc *sg_list = addr;
next_dma_addr =
le64_to_cpu((sg_list[SGES_PER_PAGE - 1]).addr);
} else {
__le64 *prp_list = addr;
next_dma_addr = le64_to_cpu(prp_list[last_prp]);
if (iod->dma_len) {
dma_unmap_page(dev->dev, iod->first_dma, iod->dma_len,
rq_dma_dir(req));
return;
}
dma_pool_free(dev->prp_page_pool, addr, dma_addr);
dma_addr = next_dma_addr;
}
WARN_ON_ONCE(!iod->nents);
nvme_unmap_sg(dev, req);
if (iod->npages == 0)
dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0],
iod->first_dma);
else if (iod->use_sgl)
nvme_free_sgls(dev, req);
else
nvme_free_prps(dev, req);
mempool_free(iod->sg, dev->iod_mempool);
}
......@@ -661,7 +683,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
__le64 *old_prp_list = prp_list;
prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
if (!prp_list)
return BLK_STS_RESOURCE;
goto free_prps;
list[iod->npages++] = prp_list;
prp_list[0] = old_prp_list[i - 1];
old_prp_list[i - 1] = cpu_to_le64(prp_dma);
......@@ -681,14 +703,14 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
dma_addr = sg_dma_address(sg);
dma_len = sg_dma_len(sg);
}
done:
cmnd->dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma);
return BLK_STS_OK;
bad_sgl:
free_prps:
nvme_free_prps(dev, req);
return BLK_STS_RESOURCE;
bad_sgl:
WARN(DO_ONCE(nvme_print_sgl, iod->sg, iod->nents),
"Invalid SGL for payload:%d nents:%d\n",
blk_rq_payload_bytes(req), iod->nents);
......@@ -760,7 +782,7 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma);
if (!sg_list)
return BLK_STS_RESOURCE;
goto free_sgls;
i = 0;
nvme_pci_iod_list(req)[iod->npages++] = sg_list;
......@@ -773,6 +795,9 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
} while (--entries > 0);
return BLK_STS_OK;
free_sgls:
nvme_free_sgls(dev, req);
return BLK_STS_RESOURCE;
}
static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev,
......@@ -841,7 +866,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
iod->nents = blk_rq_map_sg(req->q, req, iod->sg);
if (!iod->nents)
goto out;
goto out_free_sg;
if (is_pci_p2pdma_page(sg_page(iod->sg)))
nr_mapped = pci_p2pdma_map_sg_attrs(dev->dev, iod->sg,
......@@ -850,16 +875,21 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents,
rq_dma_dir(req), DMA_ATTR_NO_WARN);
if (!nr_mapped)
goto out;
goto out_free_sg;
iod->use_sgl = nvme_pci_use_sgls(dev, req);
if (iod->use_sgl)
ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped);
else
ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
out:
if (ret != BLK_STS_OK)
nvme_unmap_data(dev, req);
goto out_unmap_sg;
return BLK_STS_OK;
out_unmap_sg:
nvme_unmap_sg(dev, req);
out_free_sg:
mempool_free(iod->sg, dev->iod_mempool);
return ret;
}
......@@ -1795,6 +1825,9 @@ static void nvme_map_cmb(struct nvme_dev *dev)
if (dev->cmb_size)
return;
if (NVME_CAP_CMBS(dev->ctrl.cap))
writel(NVME_CMBMSC_CRE, dev->bar + NVME_REG_CMBMSC);
dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
if (!dev->cmbsz)
return;
......@@ -1808,6 +1841,16 @@ static void nvme_map_cmb(struct nvme_dev *dev)
if (offset > bar_size)
return;
/*
* Tell the controller about the host side address mapping the CMB,
* and enable CMB decoding for the NVMe 1.4+ scheme:
*/
if (NVME_CAP_CMBS(dev->ctrl.cap)) {
hi_lo_writeq(NVME_CMBMSC_CRE | NVME_CMBMSC_CMSE |
(pci_bus_address(pdev, bar) + offset),
dev->bar + NVME_REG_CMBMSC);
}
/*
* Controllers may support a CMB size larger than their BAR,
* for example, due to being behind a bridge. Reduce the CMB to
......
......@@ -97,6 +97,7 @@ struct nvme_rdma_queue {
struct completion cm_done;
bool pi_support;
int cq_size;
struct mutex queue_lock;
};
struct nvme_rdma_ctrl {
......@@ -579,6 +580,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
int ret;
queue = &ctrl->queues[idx];
mutex_init(&queue->queue_lock);
queue->ctrl = ctrl;
if (idx && ctrl->ctrl.max_integrity_segments)
queue->pi_support = true;
......@@ -598,7 +600,8 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
if (IS_ERR(queue->cm_id)) {
dev_info(ctrl->ctrl.device,
"failed to create CM ID: %ld\n", PTR_ERR(queue->cm_id));
return PTR_ERR(queue->cm_id);
ret = PTR_ERR(queue->cm_id);
goto out_destroy_mutex;
}
if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
......@@ -628,6 +631,8 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
out_destroy_cm_id:
rdma_destroy_id(queue->cm_id);
nvme_rdma_destroy_queue_ib(queue);
out_destroy_mutex:
mutex_destroy(&queue->queue_lock);
return ret;
}
......@@ -639,9 +644,10 @@ static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
{
if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
return;
mutex_lock(&queue->queue_lock);
if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
__nvme_rdma_stop_queue(queue);
mutex_unlock(&queue->queue_lock);
}
static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
......@@ -651,6 +657,7 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
nvme_rdma_destroy_queue_ib(queue);
rdma_destroy_id(queue->cm_id);
mutex_destroy(&queue->queue_lock);
}
static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
......
......@@ -76,6 +76,7 @@ struct nvme_tcp_queue {
struct work_struct io_work;
int io_cpu;
struct mutex queue_lock;
struct mutex send_mutex;
struct llist_head req_list;
struct list_head send_list;
......@@ -1219,6 +1220,7 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
sock_release(queue->sock);
kfree(queue->pdu);
mutex_destroy(&queue->queue_lock);
}
static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
......@@ -1380,6 +1382,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
struct nvme_tcp_queue *queue = &ctrl->queues[qid];
int ret, rcv_pdu_size;
mutex_init(&queue->queue_lock);
queue->ctrl = ctrl;
init_llist_head(&queue->req_list);
INIT_LIST_HEAD(&queue->send_list);
......@@ -1398,7 +1401,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
if (ret) {
dev_err(nctrl->device,
"failed to create socket: %d\n", ret);
return ret;
goto err_destroy_mutex;
}
/* Single syn retry */
......@@ -1507,6 +1510,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
err_sock:
sock_release(queue->sock);
queue->sock = NULL;
err_destroy_mutex:
mutex_destroy(&queue->queue_lock);
return ret;
}
......@@ -1534,9 +1539,10 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = &ctrl->queues[qid];
if (!test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
return;
mutex_lock(&queue->queue_lock);
if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
__nvme_tcp_stop_queue(queue);
mutex_unlock(&queue->queue_lock);
}
static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx)
......
......@@ -487,8 +487,10 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
/* return an all zeroed buffer if we can't find an active namespace */
ns = nvmet_find_namespace(ctrl, req->cmd->identify.nsid);
if (!ns)
if (!ns) {
status = NVME_SC_INVALID_NS;
goto done;
}
nvmet_ns_revalidate(ns);
......@@ -541,7 +543,9 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
id->nsattr |= (1 << 0);
nvmet_put_namespace(ns);
done:
if (!status)
status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
kfree(id);
out:
nvmet_req_complete(req, status);
......
......@@ -116,6 +116,9 @@ enum {
NVME_REG_BPMBL = 0x0048, /* Boot Partition Memory Buffer
* Location
*/
NVME_REG_CMBMSC = 0x0050, /* Controller Memory Buffer Memory
* Space Control
*/
NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */
NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */
NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */
......@@ -135,6 +138,7 @@ enum {
#define NVME_CAP_CSS(cap) (((cap) >> 37) & 0xff)
#define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf)
#define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf)
#define NVME_CAP_CMBS(cap) (((cap) >> 57) & 0x1)
#define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7)
#define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff)
......@@ -192,6 +196,8 @@ enum {
NVME_CSTS_SHST_OCCUR = 1 << 2,
NVME_CSTS_SHST_CMPLT = 2 << 2,
NVME_CSTS_SHST_MASK = 3 << 2,
NVME_CMBMSC_CRE = 1 << 0,
NVME_CMBMSC_CMSE = 1 << 1,
};
struct nvme_id_power_state {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment