Commit b409624a authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.infradead.org/users/willy/linux-nvme

Pull NVM Express driver update from Matthew Wilcox.

* git://git.infradead.org/users/willy/linux-nvme:
  NVMe: Merge issue on character device bring-up
  NVMe: Handle ioremap failure
  NVMe: Add pci suspend/resume driver callbacks
  NVMe: Use normal shutdown
  NVMe: Separate controller init from disk discovery
  NVMe: Separate queue alloc/free from create/delete
  NVMe: Group pci related actions in functions
  NVMe: Disk stats for read/write commands only
  NVMe: Bring up cdev on set feature failure
  NVMe: Fix checkpatch issues
  NVMe: Namespace IDs are unsigned
  NVMe: Update nvme_id_power_state with latest spec
  NVMe: Split header file into user-visible and kernel-visible pieces
  NVMe: Call nvme_process_cq from submission path
  NVMe: Remove "process_cq did something" message
  NVMe: Return correct value from interrupt handler
  NVMe: Disk IO statistics
  NVMe: Restructure MSI / MSI-X setup
  NVMe: Use kzalloc instead of kmalloc+memset
parents c4c17252 d82e8bfd
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include <linux/moduleparam.h> #include <linux/moduleparam.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/poison.h> #include <linux/poison.h>
#include <linux/ptrace.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/types.h> #include <linux/types.h>
...@@ -79,7 +80,9 @@ struct nvme_queue { ...@@ -79,7 +80,9 @@ struct nvme_queue {
u16 sq_head; u16 sq_head;
u16 sq_tail; u16 sq_tail;
u16 cq_head; u16 cq_head;
u16 cq_phase; u8 cq_phase;
u8 cqe_seen;
u8 q_suspended;
unsigned long cmdid_data[]; unsigned long cmdid_data[];
}; };
...@@ -115,6 +118,11 @@ static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq) ...@@ -115,6 +118,11 @@ static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq)
return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)]; return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)];
} }
static unsigned nvme_queue_extra(int depth)
{
return DIV_ROUND_UP(depth, 8) + (depth * sizeof(struct nvme_cmd_info));
}
/** /**
* alloc_cmdid() - Allocate a Command ID * alloc_cmdid() - Allocate a Command ID
* @nvmeq: The queue that will be used for this command * @nvmeq: The queue that will be used for this command
...@@ -285,6 +293,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp) ...@@ -285,6 +293,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp)
iod->npages = -1; iod->npages = -1;
iod->length = nbytes; iod->length = nbytes;
iod->nents = 0; iod->nents = 0;
iod->start_time = jiffies;
} }
return iod; return iod;
...@@ -308,6 +317,30 @@ void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) ...@@ -308,6 +317,30 @@ void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
kfree(iod); kfree(iod);
} }
static void nvme_start_io_acct(struct bio *bio)
{
struct gendisk *disk = bio->bi_bdev->bd_disk;
const int rw = bio_data_dir(bio);
int cpu = part_stat_lock();
part_round_stats(cpu, &disk->part0);
part_stat_inc(cpu, &disk->part0, ios[rw]);
part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio));
part_inc_in_flight(&disk->part0, rw);
part_stat_unlock();
}
static void nvme_end_io_acct(struct bio *bio, unsigned long start_time)
{
struct gendisk *disk = bio->bi_bdev->bd_disk;
const int rw = bio_data_dir(bio);
unsigned long duration = jiffies - start_time;
int cpu = part_stat_lock();
part_stat_add(cpu, &disk->part0, ticks[rw], duration);
part_round_stats(cpu, &disk->part0);
part_dec_in_flight(&disk->part0, rw);
part_stat_unlock();
}
static void bio_completion(struct nvme_dev *dev, void *ctx, static void bio_completion(struct nvme_dev *dev, void *ctx,
struct nvme_completion *cqe) struct nvme_completion *cqe)
{ {
...@@ -315,9 +348,11 @@ static void bio_completion(struct nvme_dev *dev, void *ctx, ...@@ -315,9 +348,11 @@ static void bio_completion(struct nvme_dev *dev, void *ctx,
struct bio *bio = iod->private; struct bio *bio = iod->private;
u16 status = le16_to_cpup(&cqe->status) >> 1; u16 status = le16_to_cpup(&cqe->status) >> 1;
if (iod->nents) if (iod->nents) {
dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents,
bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
nvme_end_io_acct(bio, iod->start_time);
}
nvme_free_iod(dev, iod); nvme_free_iod(dev, iod);
if (status) if (status)
bio_endio(bio, -EIO); bio_endio(bio, -EIO);
...@@ -422,10 +457,8 @@ static void nvme_bio_pair_endio(struct bio *bio, int err) ...@@ -422,10 +457,8 @@ static void nvme_bio_pair_endio(struct bio *bio, int err)
if (atomic_dec_and_test(&bp->cnt)) { if (atomic_dec_and_test(&bp->cnt)) {
bio_endio(bp->parent, bp->err); bio_endio(bp->parent, bp->err);
if (bp->bv1) kfree(bp->bv1);
kfree(bp->bv1); kfree(bp->bv2);
if (bp->bv2)
kfree(bp->bv2);
kfree(bp); kfree(bp);
} }
} }
...@@ -695,6 +728,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, ...@@ -695,6 +728,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
cmnd->rw.control = cpu_to_le16(control); cmnd->rw.control = cpu_to_le16(control);
cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
nvme_start_io_acct(bio);
if (++nvmeq->sq_tail == nvmeq->q_depth) if (++nvmeq->sq_tail == nvmeq->q_depth)
nvmeq->sq_tail = 0; nvmeq->sq_tail = 0;
writel(nvmeq->sq_tail, nvmeq->q_db); writel(nvmeq->sq_tail, nvmeq->q_db);
...@@ -709,26 +743,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, ...@@ -709,26 +743,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
return result; return result;
} }
static void nvme_make_request(struct request_queue *q, struct bio *bio) static int nvme_process_cq(struct nvme_queue *nvmeq)
{
struct nvme_ns *ns = q->queuedata;
struct nvme_queue *nvmeq = get_nvmeq(ns->dev);
int result = -EBUSY;
spin_lock_irq(&nvmeq->q_lock);
if (bio_list_empty(&nvmeq->sq_cong))
result = nvme_submit_bio_queue(nvmeq, ns, bio);
if (unlikely(result)) {
if (bio_list_empty(&nvmeq->sq_cong))
add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
bio_list_add(&nvmeq->sq_cong, bio);
}
spin_unlock_irq(&nvmeq->q_lock);
put_nvmeq(nvmeq);
}
static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq)
{ {
u16 head, phase; u16 head, phase;
...@@ -758,13 +773,40 @@ static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq) ...@@ -758,13 +773,40 @@ static irqreturn_t nvme_process_cq(struct nvme_queue *nvmeq)
* a big problem. * a big problem.
*/ */
if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) if (head == nvmeq->cq_head && phase == nvmeq->cq_phase)
return IRQ_NONE; return 0;
writel(head, nvmeq->q_db + (1 << nvmeq->dev->db_stride)); writel(head, nvmeq->q_db + (1 << nvmeq->dev->db_stride));
nvmeq->cq_head = head; nvmeq->cq_head = head;
nvmeq->cq_phase = phase; nvmeq->cq_phase = phase;
return IRQ_HANDLED; nvmeq->cqe_seen = 1;
return 1;
}
static void nvme_make_request(struct request_queue *q, struct bio *bio)
{
struct nvme_ns *ns = q->queuedata;
struct nvme_queue *nvmeq = get_nvmeq(ns->dev);
int result = -EBUSY;
if (!nvmeq) {
put_nvmeq(NULL);
bio_endio(bio, -EIO);
return;
}
spin_lock_irq(&nvmeq->q_lock);
if (!nvmeq->q_suspended && bio_list_empty(&nvmeq->sq_cong))
result = nvme_submit_bio_queue(nvmeq, ns, bio);
if (unlikely(result)) {
if (bio_list_empty(&nvmeq->sq_cong))
add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
bio_list_add(&nvmeq->sq_cong, bio);
}
nvme_process_cq(nvmeq);
spin_unlock_irq(&nvmeq->q_lock);
put_nvmeq(nvmeq);
} }
static irqreturn_t nvme_irq(int irq, void *data) static irqreturn_t nvme_irq(int irq, void *data)
...@@ -772,7 +814,9 @@ static irqreturn_t nvme_irq(int irq, void *data) ...@@ -772,7 +814,9 @@ static irqreturn_t nvme_irq(int irq, void *data)
irqreturn_t result; irqreturn_t result;
struct nvme_queue *nvmeq = data; struct nvme_queue *nvmeq = data;
spin_lock(&nvmeq->q_lock); spin_lock(&nvmeq->q_lock);
result = nvme_process_cq(nvmeq); nvme_process_cq(nvmeq);
result = nvmeq->cqe_seen ? IRQ_HANDLED : IRQ_NONE;
nvmeq->cqe_seen = 0;
spin_unlock(&nvmeq->q_lock); spin_unlock(&nvmeq->q_lock);
return result; return result;
} }
...@@ -986,8 +1030,15 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout) ...@@ -986,8 +1030,15 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
} }
} }
static void nvme_free_queue_mem(struct nvme_queue *nvmeq) static void nvme_free_queue(struct nvme_queue *nvmeq)
{ {
spin_lock_irq(&nvmeq->q_lock);
while (bio_list_peek(&nvmeq->sq_cong)) {
struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
bio_endio(bio, -EIO);
}
spin_unlock_irq(&nvmeq->q_lock);
dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
(void *)nvmeq->cqes, nvmeq->cq_dma_addr); (void *)nvmeq->cqes, nvmeq->cq_dma_addr);
dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
...@@ -995,17 +1046,28 @@ static void nvme_free_queue_mem(struct nvme_queue *nvmeq) ...@@ -995,17 +1046,28 @@ static void nvme_free_queue_mem(struct nvme_queue *nvmeq)
kfree(nvmeq); kfree(nvmeq);
} }
static void nvme_free_queue(struct nvme_dev *dev, int qid) static void nvme_free_queues(struct nvme_dev *dev)
{
int i;
for (i = dev->queue_count - 1; i >= 0; i--) {
nvme_free_queue(dev->queues[i]);
dev->queue_count--;
dev->queues[i] = NULL;
}
}
static void nvme_disable_queue(struct nvme_dev *dev, int qid)
{ {
struct nvme_queue *nvmeq = dev->queues[qid]; struct nvme_queue *nvmeq = dev->queues[qid];
int vector = dev->entry[nvmeq->cq_vector].vector; int vector = dev->entry[nvmeq->cq_vector].vector;
spin_lock_irq(&nvmeq->q_lock); spin_lock_irq(&nvmeq->q_lock);
nvme_cancel_ios(nvmeq, false); if (nvmeq->q_suspended) {
while (bio_list_peek(&nvmeq->sq_cong)) { spin_unlock_irq(&nvmeq->q_lock);
struct bio *bio = bio_list_pop(&nvmeq->sq_cong); return;
bio_endio(bio, -EIO);
} }
nvmeq->q_suspended = 1;
spin_unlock_irq(&nvmeq->q_lock); spin_unlock_irq(&nvmeq->q_lock);
irq_set_affinity_hint(vector, NULL); irq_set_affinity_hint(vector, NULL);
...@@ -1017,15 +1079,17 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid) ...@@ -1017,15 +1079,17 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid)
adapter_delete_cq(dev, qid); adapter_delete_cq(dev, qid);
} }
nvme_free_queue_mem(nvmeq); spin_lock_irq(&nvmeq->q_lock);
nvme_process_cq(nvmeq);
nvme_cancel_ios(nvmeq, false);
spin_unlock_irq(&nvmeq->q_lock);
} }
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
int depth, int vector) int depth, int vector)
{ {
struct device *dmadev = &dev->pci_dev->dev; struct device *dmadev = &dev->pci_dev->dev;
unsigned extra = DIV_ROUND_UP(depth, 8) + (depth * unsigned extra = nvme_queue_extra(depth);
sizeof(struct nvme_cmd_info));
struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL); struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL);
if (!nvmeq) if (!nvmeq)
return NULL; return NULL;
...@@ -1052,6 +1116,8 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, ...@@ -1052,6 +1116,8 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)]; nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
nvmeq->q_depth = depth; nvmeq->q_depth = depth;
nvmeq->cq_vector = vector; nvmeq->cq_vector = vector;
nvmeq->q_suspended = 1;
dev->queue_count++;
return nvmeq; return nvmeq;
...@@ -1075,18 +1141,29 @@ static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq, ...@@ -1075,18 +1141,29 @@ static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq,
IRQF_DISABLED | IRQF_SHARED, name, nvmeq); IRQF_DISABLED | IRQF_SHARED, name, nvmeq);
} }
static struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, int qid, static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
int cq_size, int vector)
{ {
int result; struct nvme_dev *dev = nvmeq->dev;
struct nvme_queue *nvmeq = nvme_alloc_queue(dev, qid, cq_size, vector); unsigned extra = nvme_queue_extra(nvmeq->q_depth);
if (!nvmeq) nvmeq->sq_tail = 0;
return ERR_PTR(-ENOMEM); nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
memset(nvmeq->cmdid_data, 0, extra);
memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
nvme_cancel_ios(nvmeq, false);
nvmeq->q_suspended = 0;
}
static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
{
struct nvme_dev *dev = nvmeq->dev;
int result;
result = adapter_alloc_cq(dev, qid, nvmeq); result = adapter_alloc_cq(dev, qid, nvmeq);
if (result < 0) if (result < 0)
goto free_nvmeq; return result;
result = adapter_alloc_sq(dev, qid, nvmeq); result = adapter_alloc_sq(dev, qid, nvmeq);
if (result < 0) if (result < 0)
...@@ -1096,19 +1173,17 @@ static struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, int qid, ...@@ -1096,19 +1173,17 @@ static struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, int qid,
if (result < 0) if (result < 0)
goto release_sq; goto release_sq;
return nvmeq; spin_lock(&nvmeq->q_lock);
nvme_init_queue(nvmeq, qid);
spin_unlock(&nvmeq->q_lock);
return result;
release_sq: release_sq:
adapter_delete_sq(dev, qid); adapter_delete_sq(dev, qid);
release_cq: release_cq:
adapter_delete_cq(dev, qid); adapter_delete_cq(dev, qid);
free_nvmeq: return result;
dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
nvmeq->sq_cmds, nvmeq->sq_dma_addr);
kfree(nvmeq);
return ERR_PTR(result);
} }
static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled) static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled)
...@@ -1152,6 +1227,30 @@ static int nvme_enable_ctrl(struct nvme_dev *dev, u64 cap) ...@@ -1152,6 +1227,30 @@ static int nvme_enable_ctrl(struct nvme_dev *dev, u64 cap)
return nvme_wait_ready(dev, cap, true); return nvme_wait_ready(dev, cap, true);
} }
static int nvme_shutdown_ctrl(struct nvme_dev *dev)
{
unsigned long timeout;
u32 cc;
cc = (readl(&dev->bar->cc) & ~NVME_CC_SHN_MASK) | NVME_CC_SHN_NORMAL;
writel(cc, &dev->bar->cc);
timeout = 2 * HZ + jiffies;
while ((readl(&dev->bar->csts) & NVME_CSTS_SHST_MASK) !=
NVME_CSTS_SHST_CMPLT) {
msleep(100);
if (fatal_signal_pending(current))
return -EINTR;
if (time_after(jiffies, timeout)) {
dev_err(&dev->pci_dev->dev,
"Device shutdown incomplete; abort shutdown\n");
return -ENODEV;
}
}
return 0;
}
static int nvme_configure_admin_queue(struct nvme_dev *dev) static int nvme_configure_admin_queue(struct nvme_dev *dev)
{ {
int result; int result;
...@@ -1159,16 +1258,17 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) ...@@ -1159,16 +1258,17 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
u64 cap = readq(&dev->bar->cap); u64 cap = readq(&dev->bar->cap);
struct nvme_queue *nvmeq; struct nvme_queue *nvmeq;
dev->dbs = ((void __iomem *)dev->bar) + 4096;
dev->db_stride = NVME_CAP_STRIDE(cap);
result = nvme_disable_ctrl(dev, cap); result = nvme_disable_ctrl(dev, cap);
if (result < 0) if (result < 0)
return result; return result;
nvmeq = nvme_alloc_queue(dev, 0, 64, 0); nvmeq = dev->queues[0];
if (!nvmeq) if (!nvmeq) {
return -ENOMEM; nvmeq = nvme_alloc_queue(dev, 0, 64, 0);
if (!nvmeq)
return -ENOMEM;
dev->queues[0] = nvmeq;
}
aqa = nvmeq->q_depth - 1; aqa = nvmeq->q_depth - 1;
aqa |= aqa << 16; aqa |= aqa << 16;
...@@ -1185,17 +1285,15 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) ...@@ -1185,17 +1285,15 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
result = nvme_enable_ctrl(dev, cap); result = nvme_enable_ctrl(dev, cap);
if (result) if (result)
goto free_q; return result;
result = queue_request_irq(dev, nvmeq, "nvme admin"); result = queue_request_irq(dev, nvmeq, "nvme admin");
if (result) if (result)
goto free_q; return result;
dev->queues[0] = nvmeq;
return result;
free_q: spin_lock(&nvmeq->q_lock);
nvme_free_queue_mem(nvmeq); nvme_init_queue(nvmeq, 0);
spin_unlock(&nvmeq->q_lock);
return result; return result;
} }
...@@ -1314,7 +1412,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) ...@@ -1314,7 +1412,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
c.rw.appmask = cpu_to_le16(io.appmask); c.rw.appmask = cpu_to_le16(io.appmask);
if (meta_len) { if (meta_len) {
meta_iod = nvme_map_user_pages(dev, io.opcode & 1, io.metadata, meta_len); meta_iod = nvme_map_user_pages(dev, io.opcode & 1, io.metadata,
meta_len);
if (IS_ERR(meta_iod)) { if (IS_ERR(meta_iod)) {
status = PTR_ERR(meta_iod); status = PTR_ERR(meta_iod);
meta_iod = NULL; meta_iod = NULL;
...@@ -1356,6 +1455,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) ...@@ -1356,6 +1455,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
put_nvmeq(nvmeq); put_nvmeq(nvmeq);
if (length != (io.nblocks + 1) << ns->lba_shift) if (length != (io.nblocks + 1) << ns->lba_shift)
status = -ENOMEM; status = -ENOMEM;
else if (!nvmeq || nvmeq->q_suspended)
status = -EBUSY;
else else
status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT); status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);
...@@ -1453,6 +1554,7 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, ...@@ -1453,6 +1554,7 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
switch (cmd) { switch (cmd) {
case NVME_IOCTL_ID: case NVME_IOCTL_ID:
force_successful_syscall_return();
return ns->ns_id; return ns->ns_id;
case NVME_IOCTL_ADMIN_CMD: case NVME_IOCTL_ADMIN_CMD:
return nvme_user_admin_cmd(ns->dev, (void __user *)arg); return nvme_user_admin_cmd(ns->dev, (void __user *)arg);
...@@ -1506,10 +1608,12 @@ static int nvme_kthread(void *data) ...@@ -1506,10 +1608,12 @@ static int nvme_kthread(void *data)
if (!nvmeq) if (!nvmeq)
continue; continue;
spin_lock_irq(&nvmeq->q_lock); spin_lock_irq(&nvmeq->q_lock);
if (nvme_process_cq(nvmeq)) if (nvmeq->q_suspended)
printk("process_cq did something\n"); goto unlock;
nvme_process_cq(nvmeq);
nvme_cancel_ios(nvmeq, true); nvme_cancel_ios(nvmeq, true);
nvme_resubmit_bios(nvmeq); nvme_resubmit_bios(nvmeq);
unlock:
spin_unlock_irq(&nvmeq->q_lock); spin_unlock_irq(&nvmeq->q_lock);
} }
} }
...@@ -1556,7 +1660,7 @@ static void nvme_config_discard(struct nvme_ns *ns) ...@@ -1556,7 +1660,7 @@ static void nvme_config_discard(struct nvme_ns *ns)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
} }
static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, int nsid, static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
struct nvme_id_ns *id, struct nvme_lba_range_type *rt) struct nvme_id_ns *id, struct nvme_lba_range_type *rt)
{ {
struct nvme_ns *ns; struct nvme_ns *ns;
...@@ -1631,14 +1735,19 @@ static int set_queue_count(struct nvme_dev *dev, int count) ...@@ -1631,14 +1735,19 @@ static int set_queue_count(struct nvme_dev *dev, int count)
status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0, status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0,
&result); &result);
if (status) if (status)
return -EIO; return status < 0 ? -EIO : -EBUSY;
return min(result & 0xffff, result >> 16) + 1; return min(result & 0xffff, result >> 16) + 1;
} }
static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
{
return 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3));
}
static int nvme_setup_io_queues(struct nvme_dev *dev) static int nvme_setup_io_queues(struct nvme_dev *dev)
{ {
struct pci_dev *pdev = dev->pci_dev; struct pci_dev *pdev = dev->pci_dev;
int result, cpu, i, nr_io_queues, db_bar_size, q_depth, q_count; int result, cpu, i, vecs, nr_io_queues, size, q_depth;
nr_io_queues = num_online_cpus(); nr_io_queues = num_online_cpus();
result = set_queue_count(dev, nr_io_queues); result = set_queue_count(dev, nr_io_queues);
...@@ -1647,53 +1756,80 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) ...@@ -1647,53 +1756,80 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
if (result < nr_io_queues) if (result < nr_io_queues)
nr_io_queues = result; nr_io_queues = result;
q_count = nr_io_queues; size = db_bar_size(dev, nr_io_queues);
/* Deregister the admin queue's interrupt */ if (size > 8192) {
free_irq(dev->entry[0].vector, dev->queues[0]);
db_bar_size = 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3));
if (db_bar_size > 8192) {
iounmap(dev->bar); iounmap(dev->bar);
dev->bar = ioremap(pci_resource_start(pdev, 0), db_bar_size); do {
dev->bar = ioremap(pci_resource_start(pdev, 0), size);
if (dev->bar)
break;
if (!--nr_io_queues)
return -ENOMEM;
size = db_bar_size(dev, nr_io_queues);
} while (1);
dev->dbs = ((void __iomem *)dev->bar) + 4096; dev->dbs = ((void __iomem *)dev->bar) + 4096;
dev->queues[0]->q_db = dev->dbs; dev->queues[0]->q_db = dev->dbs;
} }
for (i = 0; i < nr_io_queues; i++) /* Deregister the admin queue's interrupt */
free_irq(dev->entry[0].vector, dev->queues[0]);
vecs = nr_io_queues;
for (i = 0; i < vecs; i++)
dev->entry[i].entry = i; dev->entry[i].entry = i;
for (;;) { for (;;) {
result = pci_enable_msix(pdev, dev->entry, nr_io_queues); result = pci_enable_msix(pdev, dev->entry, vecs);
if (result == 0) { if (result <= 0)
break;
} else if (result > 0) {
nr_io_queues = result;
continue;
} else {
nr_io_queues = 0;
break; break;
} vecs = result;
} }
if (nr_io_queues == 0) { if (result < 0) {
nr_io_queues = q_count; vecs = nr_io_queues;
if (vecs > 32)
vecs = 32;
for (;;) { for (;;) {
result = pci_enable_msi_block(pdev, nr_io_queues); result = pci_enable_msi_block(pdev, vecs);
if (result == 0) { if (result == 0) {
for (i = 0; i < nr_io_queues; i++) for (i = 0; i < vecs; i++)
dev->entry[i].vector = i + pdev->irq; dev->entry[i].vector = i + pdev->irq;
break; break;
} else if (result > 0) { } else if (result < 0) {
nr_io_queues = result; vecs = 1;
continue;
} else {
nr_io_queues = 1;
break; break;
} }
vecs = result;
} }
} }
/*
* Should investigate if there's a performance win from allocating
* more queues than interrupt vectors; it might allow the submission
* path to scale better, even if the receive path is limited by the
* number of interrupts.
*/
nr_io_queues = vecs;
result = queue_request_irq(dev, dev->queues[0], "nvme admin"); result = queue_request_irq(dev, dev->queues[0], "nvme admin");
/* XXX: handle failure here */ if (result) {
dev->queues[0]->q_suspended = 1;
goto free_queues;
}
/* Free previously allocated queues that are no longer usable */
spin_lock(&dev_list_lock);
for (i = dev->queue_count - 1; i > nr_io_queues; i--) {
struct nvme_queue *nvmeq = dev->queues[i];
spin_lock(&nvmeq->q_lock);
nvme_cancel_ios(nvmeq, false);
spin_unlock(&nvmeq->q_lock);
nvme_free_queue(nvmeq);
dev->queue_count--;
dev->queues[i] = NULL;
}
spin_unlock(&dev_list_lock);
cpu = cpumask_first(cpu_online_mask); cpu = cpumask_first(cpu_online_mask);
for (i = 0; i < nr_io_queues; i++) { for (i = 0; i < nr_io_queues; i++) {
...@@ -1703,11 +1839,12 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) ...@@ -1703,11 +1839,12 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1, q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1,
NVME_Q_DEPTH); NVME_Q_DEPTH);
for (i = 0; i < nr_io_queues; i++) { for (i = dev->queue_count - 1; i < nr_io_queues; i++) {
dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i); dev->queues[i + 1] = nvme_alloc_queue(dev, i + 1, q_depth, i);
if (IS_ERR(dev->queues[i + 1])) if (!dev->queues[i + 1]) {
return PTR_ERR(dev->queues[i + 1]); result = -ENOMEM;
dev->queue_count++; goto free_queues;
}
} }
for (; i < num_possible_cpus(); i++) { for (; i < num_possible_cpus(); i++) {
...@@ -1715,15 +1852,20 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) ...@@ -1715,15 +1852,20 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
dev->queues[i + 1] = dev->queues[target + 1]; dev->queues[i + 1] = dev->queues[target + 1];
} }
return 0; for (i = 1; i < dev->queue_count; i++) {
} result = nvme_create_queue(dev->queues[i], i);
if (result) {
for (--i; i > 0; i--)
nvme_disable_queue(dev, i);
goto free_queues;
}
}
static void nvme_free_queues(struct nvme_dev *dev) return 0;
{
int i;
for (i = dev->queue_count - 1; i >= 0; i--) free_queues:
nvme_free_queue(dev, i); nvme_free_queues(dev);
return result;
} }
/* /*
...@@ -1734,7 +1876,8 @@ static void nvme_free_queues(struct nvme_dev *dev) ...@@ -1734,7 +1876,8 @@ static void nvme_free_queues(struct nvme_dev *dev)
*/ */
static int nvme_dev_add(struct nvme_dev *dev) static int nvme_dev_add(struct nvme_dev *dev)
{ {
int res, nn, i; int res;
unsigned nn, i;
struct nvme_ns *ns; struct nvme_ns *ns;
struct nvme_id_ctrl *ctrl; struct nvme_id_ctrl *ctrl;
struct nvme_id_ns *id_ns; struct nvme_id_ns *id_ns;
...@@ -1742,10 +1885,6 @@ static int nvme_dev_add(struct nvme_dev *dev) ...@@ -1742,10 +1885,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
dma_addr_t dma_addr; dma_addr_t dma_addr;
int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
res = nvme_setup_io_queues(dev);
if (res)
return res;
mem = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr, mem = dma_alloc_coherent(&dev->pci_dev->dev, 8192, &dma_addr,
GFP_KERNEL); GFP_KERNEL);
if (!mem) if (!mem)
...@@ -1796,23 +1935,86 @@ static int nvme_dev_add(struct nvme_dev *dev) ...@@ -1796,23 +1935,86 @@ static int nvme_dev_add(struct nvme_dev *dev)
return res; return res;
} }
static int nvme_dev_remove(struct nvme_dev *dev) static int nvme_dev_map(struct nvme_dev *dev)
{ {
struct nvme_ns *ns, *next; int bars, result = -ENOMEM;
struct pci_dev *pdev = dev->pci_dev;
if (pci_enable_device_mem(pdev))
return result;
dev->entry[0].vector = pdev->irq;
pci_set_master(pdev);
bars = pci_select_bars(pdev, IORESOURCE_MEM);
if (pci_request_selected_regions(pdev, bars, "nvme"))
goto disable_pci;
if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)))
dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)))
dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
else
goto disable_pci;
pci_set_drvdata(pdev, dev);
dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
if (!dev->bar)
goto disable;
dev->db_stride = NVME_CAP_STRIDE(readq(&dev->bar->cap));
dev->dbs = ((void __iomem *)dev->bar) + 4096;
return 0;
disable:
pci_release_regions(pdev);
disable_pci:
pci_disable_device(pdev);
return result;
}
static void nvme_dev_unmap(struct nvme_dev *dev)
{
if (dev->pci_dev->msi_enabled)
pci_disable_msi(dev->pci_dev);
else if (dev->pci_dev->msix_enabled)
pci_disable_msix(dev->pci_dev);
if (dev->bar) {
iounmap(dev->bar);
dev->bar = NULL;
}
pci_release_regions(dev->pci_dev);
if (pci_is_enabled(dev->pci_dev))
pci_disable_device(dev->pci_dev);
}
static void nvme_dev_shutdown(struct nvme_dev *dev)
{
int i;
for (i = dev->queue_count - 1; i >= 0; i--)
nvme_disable_queue(dev, i);
spin_lock(&dev_list_lock); spin_lock(&dev_list_lock);
list_del(&dev->node); list_del_init(&dev->node);
spin_unlock(&dev_list_lock); spin_unlock(&dev_list_lock);
if (dev->bar)
nvme_shutdown_ctrl(dev);
nvme_dev_unmap(dev);
}
static void nvme_dev_remove(struct nvme_dev *dev)
{
struct nvme_ns *ns, *next;
list_for_each_entry_safe(ns, next, &dev->namespaces, list) { list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
list_del(&ns->list); list_del(&ns->list);
del_gendisk(ns->disk); del_gendisk(ns->disk);
nvme_ns_free(ns); nvme_ns_free(ns);
} }
nvme_free_queues(dev);
return 0;
} }
static int nvme_setup_prp_pools(struct nvme_dev *dev) static int nvme_setup_prp_pools(struct nvme_dev *dev)
...@@ -1872,15 +2074,10 @@ static void nvme_free_dev(struct kref *kref) ...@@ -1872,15 +2074,10 @@ static void nvme_free_dev(struct kref *kref)
{ {
struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
nvme_dev_remove(dev); nvme_dev_remove(dev);
if (dev->pci_dev->msi_enabled) nvme_dev_shutdown(dev);
pci_disable_msi(dev->pci_dev); nvme_free_queues(dev);
else if (dev->pci_dev->msix_enabled)
pci_disable_msix(dev->pci_dev);
iounmap(dev->bar);
nvme_release_instance(dev); nvme_release_instance(dev);
nvme_release_prp_pools(dev); nvme_release_prp_pools(dev);
pci_disable_device(dev->pci_dev);
pci_release_regions(dev->pci_dev);
kfree(dev->queues); kfree(dev->queues);
kfree(dev->entry); kfree(dev->entry);
kfree(dev); kfree(dev);
...@@ -1921,9 +2118,40 @@ static const struct file_operations nvme_dev_fops = { ...@@ -1921,9 +2118,40 @@ static const struct file_operations nvme_dev_fops = {
.compat_ioctl = nvme_dev_ioctl, .compat_ioctl = nvme_dev_ioctl,
}; };
static int nvme_dev_start(struct nvme_dev *dev)
{
int result;
result = nvme_dev_map(dev);
if (result)
return result;
result = nvme_configure_admin_queue(dev);
if (result)
goto unmap;
spin_lock(&dev_list_lock);
list_add(&dev->node, &dev_list);
spin_unlock(&dev_list_lock);
result = nvme_setup_io_queues(dev);
if (result && result != -EBUSY)
goto disable;
return result;
disable:
spin_lock(&dev_list_lock);
list_del_init(&dev->node);
spin_unlock(&dev_list_lock);
unmap:
nvme_dev_unmap(dev);
return result;
}
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{ {
int bars, result = -ENOMEM; int result = -ENOMEM;
struct nvme_dev *dev; struct nvme_dev *dev;
dev = kzalloc(sizeof(*dev), GFP_KERNEL); dev = kzalloc(sizeof(*dev), GFP_KERNEL);
...@@ -1938,53 +2166,28 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -1938,53 +2166,28 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (!dev->queues) if (!dev->queues)
goto free; goto free;
if (pci_enable_device_mem(pdev))
goto free;
pci_set_master(pdev);
bars = pci_select_bars(pdev, IORESOURCE_MEM);
if (pci_request_selected_regions(pdev, bars, "nvme"))
goto disable;
INIT_LIST_HEAD(&dev->namespaces); INIT_LIST_HEAD(&dev->namespaces);
dev->pci_dev = pdev; dev->pci_dev = pdev;
pci_set_drvdata(pdev, dev);
if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)))
dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)))
dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
else
goto disable;
result = nvme_set_instance(dev); result = nvme_set_instance(dev);
if (result) if (result)
goto disable; goto free;
dev->entry[0].vector = pdev->irq;
result = nvme_setup_prp_pools(dev); result = nvme_setup_prp_pools(dev);
if (result) if (result)
goto disable_msix; goto release;
dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); result = nvme_dev_start(dev);
if (!dev->bar) { if (result) {
result = -ENOMEM; if (result == -EBUSY)
goto disable_msix; goto create_cdev;
goto release_pools;
} }
result = nvme_configure_admin_queue(dev);
if (result)
goto unmap;
dev->queue_count++;
spin_lock(&dev_list_lock);
list_add(&dev->node, &dev_list);
spin_unlock(&dev_list_lock);
result = nvme_dev_add(dev); result = nvme_dev_add(dev);
if (result) if (result)
goto delete; goto shutdown;
create_cdev:
scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance); scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance);
dev->miscdev.minor = MISC_DYNAMIC_MINOR; dev->miscdev.minor = MISC_DYNAMIC_MINOR;
dev->miscdev.parent = &pdev->dev; dev->miscdev.parent = &pdev->dev;
...@@ -1999,24 +2202,13 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -1999,24 +2202,13 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
remove: remove:
nvme_dev_remove(dev); nvme_dev_remove(dev);
delete: shutdown:
spin_lock(&dev_list_lock); nvme_dev_shutdown(dev);
list_del(&dev->node); release_pools:
spin_unlock(&dev_list_lock);
nvme_free_queues(dev); nvme_free_queues(dev);
unmap:
iounmap(dev->bar);
disable_msix:
if (dev->pci_dev->msi_enabled)
pci_disable_msi(dev->pci_dev);
else if (dev->pci_dev->msix_enabled)
pci_disable_msix(dev->pci_dev);
nvme_release_instance(dev);
nvme_release_prp_pools(dev); nvme_release_prp_pools(dev);
disable: release:
pci_disable_device(pdev); nvme_release_instance(dev);
pci_release_regions(pdev);
free: free:
kfree(dev->queues); kfree(dev->queues);
kfree(dev->entry); kfree(dev->entry);
...@@ -2037,8 +2229,30 @@ static void nvme_remove(struct pci_dev *pdev) ...@@ -2037,8 +2229,30 @@ static void nvme_remove(struct pci_dev *pdev)
#define nvme_link_reset NULL #define nvme_link_reset NULL
#define nvme_slot_reset NULL #define nvme_slot_reset NULL
#define nvme_error_resume NULL #define nvme_error_resume NULL
#define nvme_suspend NULL
#define nvme_resume NULL static int nvme_suspend(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
nvme_dev_shutdown(ndev);
return 0;
}
static int nvme_resume(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
int ret;
ret = nvme_dev_start(ndev);
/* XXX: should remove gendisks if resume fails */
if (ret)
nvme_free_queues(ndev);
return ret;
}
static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);
static const struct pci_error_handlers nvme_err_handler = { static const struct pci_error_handlers nvme_err_handler = {
.error_detected = nvme_error_detected, .error_detected = nvme_error_detected,
...@@ -2062,8 +2276,9 @@ static struct pci_driver nvme_driver = { ...@@ -2062,8 +2276,9 @@ static struct pci_driver nvme_driver = {
.id_table = nvme_id_table, .id_table = nvme_id_table,
.probe = nvme_probe, .probe = nvme_probe,
.remove = nvme_remove, .remove = nvme_remove,
.suspend = nvme_suspend, .driver = {
.resume = nvme_resume, .pm = &nvme_dev_pm_ops,
},
.err_handler = &nvme_err_handler, .err_handler = &nvme_err_handler,
}; };
......
...@@ -933,13 +933,12 @@ static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, ...@@ -933,13 +933,12 @@ static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
int res = SNTI_TRANSLATION_SUCCESS; int res = SNTI_TRANSLATION_SUCCESS;
int xfer_len; int xfer_len;
inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL); inq_response = kzalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
if (inq_response == NULL) { if (inq_response == NULL) {
res = -ENOMEM; res = -ENOMEM;
goto out_mem; goto out_mem;
} }
memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
inq_response[1] = INQ_BDEV_CHARACTERISTICS_PAGE; /* Page Code */ inq_response[1] = INQ_BDEV_CHARACTERISTICS_PAGE; /* Page Code */
inq_response[2] = 0x00; /* Page Length MSB */ inq_response[2] = 0x00; /* Page Length MSB */
inq_response[3] = 0x3C; /* Page Length LSB */ inq_response[3] = 0x3C; /* Page Length LSB */
...@@ -964,12 +963,11 @@ static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr, ...@@ -964,12 +963,11 @@ static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
int xfer_len; int xfer_len;
u8 *log_response; u8 *log_response;
log_response = kmalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL); log_response = kzalloc(LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH, GFP_KERNEL);
if (log_response == NULL) { if (log_response == NULL) {
res = -ENOMEM; res = -ENOMEM;
goto out_mem; goto out_mem;
} }
memset(log_response, 0, LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH);
log_response[0] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE; log_response[0] = LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE;
/* Subpage=0x00, Page Length MSB=0 */ /* Subpage=0x00, Page Length MSB=0 */
...@@ -1000,12 +998,11 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns, ...@@ -1000,12 +998,11 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
u8 temp_c; u8 temp_c;
u16 temp_k; u16 temp_k;
log_response = kmalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL); log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL);
if (log_response == NULL) { if (log_response == NULL) {
res = -ENOMEM; res = -ENOMEM;
goto out_mem; goto out_mem;
} }
memset(log_response, 0, LOG_INFO_EXCP_PAGE_LENGTH);
mem = dma_alloc_coherent(&dev->pci_dev->dev, mem = dma_alloc_coherent(&dev->pci_dev->dev,
sizeof(struct nvme_smart_log), sizeof(struct nvme_smart_log),
...@@ -1069,12 +1066,11 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr, ...@@ -1069,12 +1066,11 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 temp_c_cur, temp_c_thresh; u8 temp_c_cur, temp_c_thresh;
u16 temp_k; u16 temp_k;
log_response = kmalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL); log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL);
if (log_response == NULL) { if (log_response == NULL) {
res = -ENOMEM; res = -ENOMEM;
goto out_mem; goto out_mem;
} }
memset(log_response, 0, LOG_TEMP_PAGE_LENGTH);
mem = dma_alloc_coherent(&dev->pci_dev->dev, mem = dma_alloc_coherent(&dev->pci_dev->dev,
sizeof(struct nvme_smart_log), sizeof(struct nvme_smart_log),
...@@ -1380,12 +1376,11 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns, ...@@ -1380,12 +1376,11 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns,
blk_desc_offset = mph_size; blk_desc_offset = mph_size;
mode_pages_offset_1 = blk_desc_offset + blk_desc_len; mode_pages_offset_1 = blk_desc_offset + blk_desc_len;
response = kmalloc(resp_size, GFP_KERNEL); response = kzalloc(resp_size, GFP_KERNEL);
if (response == NULL) { if (response == NULL) {
res = -ENOMEM; res = -ENOMEM;
goto out_mem; goto out_mem;
} }
memset(response, 0, resp_size);
res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10, res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10,
llbaa, mode_data_length, blk_desc_len); llbaa, mode_data_length, blk_desc_len);
...@@ -2480,12 +2475,11 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr, ...@@ -2480,12 +2475,11 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
} }
id_ns = mem; id_ns = mem;
response = kmalloc(resp_size, GFP_KERNEL); response = kzalloc(resp_size, GFP_KERNEL);
if (response == NULL) { if (response == NULL) {
res = -ENOMEM; res = -ENOMEM;
goto out_dma; goto out_dma;
} }
memset(response, 0, resp_size);
nvme_trans_fill_read_cap(response, id_ns, cdb16); nvme_trans_fill_read_cap(response, id_ns, cdb16);
xfer_len = min(alloc_len, resp_size); xfer_len = min(alloc_len, resp_size);
...@@ -2554,12 +2548,11 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr, ...@@ -2554,12 +2548,11 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
goto out_dma; goto out_dma;
} }
response = kmalloc(resp_size, GFP_KERNEL); response = kzalloc(resp_size, GFP_KERNEL);
if (response == NULL) { if (response == NULL) {
res = -ENOMEM; res = -ENOMEM;
goto out_dma; goto out_dma;
} }
memset(response, 0, resp_size);
/* The first LUN ID will always be 0 per the SAM spec */ /* The first LUN ID will always be 0 per the SAM spec */
for (lun_id = 0; lun_id < le32_to_cpu(id_ctrl->nn); lun_id++) { for (lun_id = 0; lun_id < le32_to_cpu(id_ctrl->nn); lun_id++) {
...@@ -2600,12 +2593,11 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, ...@@ -2600,12 +2593,11 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) : resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) :
(FIXED_FMT_SENSE_DATA_SIZE)); (FIXED_FMT_SENSE_DATA_SIZE));
response = kmalloc(resp_size, GFP_KERNEL); response = kzalloc(resp_size, GFP_KERNEL);
if (response == NULL) { if (response == NULL) {
res = -ENOMEM; res = -ENOMEM;
goto out; goto out;
} }
memset(response, 0, resp_size);
if (desc_format == DESCRIPTOR_FORMAT_SENSE_DATA_TYPE) { if (desc_format == DESCRIPTOR_FORMAT_SENSE_DATA_TYPE) {
/* Descriptor Format Sense Data */ /* Descriptor Format Sense Data */
......
/* /*
* Definitions for the NVM Express interface * Definitions for the NVM Express interface
* Copyright (c) 2011, Intel Corporation. * Copyright (c) 2011-2013, Intel Corporation.
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License, * under the terms and conditions of the GNU General Public License,
...@@ -19,7 +19,10 @@ ...@@ -19,7 +19,10 @@
#ifndef _LINUX_NVME_H #ifndef _LINUX_NVME_H
#define _LINUX_NVME_H #define _LINUX_NVME_H
#include <linux/types.h> #include <uapi/linux/nvme.h>
#include <linux/pci.h>
#include <linux/miscdevice.h>
#include <linux/kref.h>
struct nvme_bar { struct nvme_bar {
__u64 cap; /* Controller Capabilities */ __u64 cap; /* Controller Capabilities */
...@@ -50,6 +53,7 @@ enum { ...@@ -50,6 +53,7 @@ enum {
NVME_CC_SHN_NONE = 0 << 14, NVME_CC_SHN_NONE = 0 << 14,
NVME_CC_SHN_NORMAL = 1 << 14, NVME_CC_SHN_NORMAL = 1 << 14,
NVME_CC_SHN_ABRUPT = 2 << 14, NVME_CC_SHN_ABRUPT = 2 << 14,
NVME_CC_SHN_MASK = 3 << 14,
NVME_CC_IOSQES = 6 << 16, NVME_CC_IOSQES = 6 << 16,
NVME_CC_IOCQES = 4 << 20, NVME_CC_IOCQES = 4 << 20,
NVME_CSTS_RDY = 1 << 0, NVME_CSTS_RDY = 1 << 0,
...@@ -57,462 +61,11 @@ enum { ...@@ -57,462 +61,11 @@ enum {
NVME_CSTS_SHST_NORMAL = 0 << 2, NVME_CSTS_SHST_NORMAL = 0 << 2,
NVME_CSTS_SHST_OCCUR = 1 << 2, NVME_CSTS_SHST_OCCUR = 1 << 2,
NVME_CSTS_SHST_CMPLT = 2 << 2, NVME_CSTS_SHST_CMPLT = 2 << 2,
}; NVME_CSTS_SHST_MASK = 3 << 2,
struct nvme_id_power_state {
__le16 max_power; /* centiwatts */
__u16 rsvd2;
__le32 entry_lat; /* microseconds */
__le32 exit_lat; /* microseconds */
__u8 read_tput;
__u8 read_lat;
__u8 write_tput;
__u8 write_lat;
__u8 rsvd16[16];
}; };
#define NVME_VS(major, minor) (major << 16 | minor) #define NVME_VS(major, minor) (major << 16 | minor)
struct nvme_id_ctrl {
__le16 vid;
__le16 ssvid;
char sn[20];
char mn[40];
char fr[8];
__u8 rab;
__u8 ieee[3];
__u8 mic;
__u8 mdts;
__u8 rsvd78[178];
__le16 oacs;
__u8 acl;
__u8 aerl;
__u8 frmw;
__u8 lpa;
__u8 elpe;
__u8 npss;
__u8 rsvd264[248];
__u8 sqes;
__u8 cqes;
__u8 rsvd514[2];
__le32 nn;
__le16 oncs;
__le16 fuses;
__u8 fna;
__u8 vwc;
__le16 awun;
__le16 awupf;
__u8 rsvd530[1518];
struct nvme_id_power_state psd[32];
__u8 vs[1024];
};
enum {
NVME_CTRL_ONCS_COMPARE = 1 << 0,
NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1,
NVME_CTRL_ONCS_DSM = 1 << 2,
};
struct nvme_lbaf {
__le16 ms;
__u8 ds;
__u8 rp;
};
struct nvme_id_ns {
__le64 nsze;
__le64 ncap;
__le64 nuse;
__u8 nsfeat;
__u8 nlbaf;
__u8 flbas;
__u8 mc;
__u8 dpc;
__u8 dps;
__u8 rsvd30[98];
struct nvme_lbaf lbaf[16];
__u8 rsvd192[192];
__u8 vs[3712];
};
enum {
NVME_NS_FEAT_THIN = 1 << 0,
NVME_LBAF_RP_BEST = 0,
NVME_LBAF_RP_BETTER = 1,
NVME_LBAF_RP_GOOD = 2,
NVME_LBAF_RP_DEGRADED = 3,
};
struct nvme_smart_log {
__u8 critical_warning;
__u8 temperature[2];
__u8 avail_spare;
__u8 spare_thresh;
__u8 percent_used;
__u8 rsvd6[26];
__u8 data_units_read[16];
__u8 data_units_written[16];
__u8 host_reads[16];
__u8 host_writes[16];
__u8 ctrl_busy_time[16];
__u8 power_cycles[16];
__u8 power_on_hours[16];
__u8 unsafe_shutdowns[16];
__u8 media_errors[16];
__u8 num_err_log_entries[16];
__u8 rsvd192[320];
};
enum {
NVME_SMART_CRIT_SPARE = 1 << 0,
NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
NVME_SMART_CRIT_RELIABILITY = 1 << 2,
NVME_SMART_CRIT_MEDIA = 1 << 3,
NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4,
};
struct nvme_lba_range_type {
__u8 type;
__u8 attributes;
__u8 rsvd2[14];
__u64 slba;
__u64 nlb;
__u8 guid[16];
__u8 rsvd48[16];
};
enum {
NVME_LBART_TYPE_FS = 0x01,
NVME_LBART_TYPE_RAID = 0x02,
NVME_LBART_TYPE_CACHE = 0x03,
NVME_LBART_TYPE_SWAP = 0x04,
NVME_LBART_ATTRIB_TEMP = 1 << 0,
NVME_LBART_ATTRIB_HIDE = 1 << 1,
};
/* I/O commands */
enum nvme_opcode {
nvme_cmd_flush = 0x00,
nvme_cmd_write = 0x01,
nvme_cmd_read = 0x02,
nvme_cmd_write_uncor = 0x04,
nvme_cmd_compare = 0x05,
nvme_cmd_dsm = 0x09,
};
struct nvme_common_command {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__le32 cdw2[2];
__le64 metadata;
__le64 prp1;
__le64 prp2;
__le32 cdw10[6];
};
struct nvme_rw_command {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2;
__le64 metadata;
__le64 prp1;
__le64 prp2;
__le64 slba;
__le16 length;
__le16 control;
__le32 dsmgmt;
__le32 reftag;
__le16 apptag;
__le16 appmask;
};
enum {
NVME_RW_LR = 1 << 15,
NVME_RW_FUA = 1 << 14,
NVME_RW_DSM_FREQ_UNSPEC = 0,
NVME_RW_DSM_FREQ_TYPICAL = 1,
NVME_RW_DSM_FREQ_RARE = 2,
NVME_RW_DSM_FREQ_READS = 3,
NVME_RW_DSM_FREQ_WRITES = 4,
NVME_RW_DSM_FREQ_RW = 5,
NVME_RW_DSM_FREQ_ONCE = 6,
NVME_RW_DSM_FREQ_PREFETCH = 7,
NVME_RW_DSM_FREQ_TEMP = 8,
NVME_RW_DSM_LATENCY_NONE = 0 << 4,
NVME_RW_DSM_LATENCY_IDLE = 1 << 4,
NVME_RW_DSM_LATENCY_NORM = 2 << 4,
NVME_RW_DSM_LATENCY_LOW = 3 << 4,
NVME_RW_DSM_SEQ_REQ = 1 << 6,
NVME_RW_DSM_COMPRESSED = 1 << 7,
};
struct nvme_dsm_cmd {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2[2];
__le64 prp1;
__le64 prp2;
__le32 nr;
__le32 attributes;
__u32 rsvd12[4];
};
enum {
NVME_DSMGMT_IDR = 1 << 0,
NVME_DSMGMT_IDW = 1 << 1,
NVME_DSMGMT_AD = 1 << 2,
};
struct nvme_dsm_range {
__le32 cattr;
__le32 nlb;
__le64 slba;
};
/* Admin commands */
enum nvme_admin_opcode {
nvme_admin_delete_sq = 0x00,
nvme_admin_create_sq = 0x01,
nvme_admin_get_log_page = 0x02,
nvme_admin_delete_cq = 0x04,
nvme_admin_create_cq = 0x05,
nvme_admin_identify = 0x06,
nvme_admin_abort_cmd = 0x08,
nvme_admin_set_features = 0x09,
nvme_admin_get_features = 0x0a,
nvme_admin_async_event = 0x0c,
nvme_admin_activate_fw = 0x10,
nvme_admin_download_fw = 0x11,
nvme_admin_format_nvm = 0x80,
nvme_admin_security_send = 0x81,
nvme_admin_security_recv = 0x82,
};
enum {
NVME_QUEUE_PHYS_CONTIG = (1 << 0),
NVME_CQ_IRQ_ENABLED = (1 << 1),
NVME_SQ_PRIO_URGENT = (0 << 1),
NVME_SQ_PRIO_HIGH = (1 << 1),
NVME_SQ_PRIO_MEDIUM = (2 << 1),
NVME_SQ_PRIO_LOW = (3 << 1),
NVME_FEAT_ARBITRATION = 0x01,
NVME_FEAT_POWER_MGMT = 0x02,
NVME_FEAT_LBA_RANGE = 0x03,
NVME_FEAT_TEMP_THRESH = 0x04,
NVME_FEAT_ERR_RECOVERY = 0x05,
NVME_FEAT_VOLATILE_WC = 0x06,
NVME_FEAT_NUM_QUEUES = 0x07,
NVME_FEAT_IRQ_COALESCE = 0x08,
NVME_FEAT_IRQ_CONFIG = 0x09,
NVME_FEAT_WRITE_ATOMIC = 0x0a,
NVME_FEAT_ASYNC_EVENT = 0x0b,
NVME_FEAT_SW_PROGRESS = 0x0c,
NVME_FWACT_REPL = (0 << 3),
NVME_FWACT_REPL_ACTV = (1 << 3),
NVME_FWACT_ACTV = (2 << 3),
};
struct nvme_identify {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2[2];
__le64 prp1;
__le64 prp2;
__le32 cns;
__u32 rsvd11[5];
};
struct nvme_features {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2[2];
__le64 prp1;
__le64 prp2;
__le32 fid;
__le32 dword11;
__u32 rsvd12[4];
};
struct nvme_create_cq {
__u8 opcode;
__u8 flags;
__u16 command_id;
__u32 rsvd1[5];
__le64 prp1;
__u64 rsvd8;
__le16 cqid;
__le16 qsize;
__le16 cq_flags;
__le16 irq_vector;
__u32 rsvd12[4];
};
struct nvme_create_sq {
__u8 opcode;
__u8 flags;
__u16 command_id;
__u32 rsvd1[5];
__le64 prp1;
__u64 rsvd8;
__le16 sqid;
__le16 qsize;
__le16 sq_flags;
__le16 cqid;
__u32 rsvd12[4];
};
struct nvme_delete_queue {
__u8 opcode;
__u8 flags;
__u16 command_id;
__u32 rsvd1[9];
__le16 qid;
__u16 rsvd10;
__u32 rsvd11[5];
};
struct nvme_download_firmware {
__u8 opcode;
__u8 flags;
__u16 command_id;
__u32 rsvd1[5];
__le64 prp1;
__le64 prp2;
__le32 numd;
__le32 offset;
__u32 rsvd12[4];
};
struct nvme_format_cmd {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2[4];
__le32 cdw10;
__u32 rsvd11[5];
};
struct nvme_command {
union {
struct nvme_common_command common;
struct nvme_rw_command rw;
struct nvme_identify identify;
struct nvme_features features;
struct nvme_create_cq create_cq;
struct nvme_create_sq create_sq;
struct nvme_delete_queue delete_queue;
struct nvme_download_firmware dlfw;
struct nvme_format_cmd format;
struct nvme_dsm_cmd dsm;
};
};
enum {
NVME_SC_SUCCESS = 0x0,
NVME_SC_INVALID_OPCODE = 0x1,
NVME_SC_INVALID_FIELD = 0x2,
NVME_SC_CMDID_CONFLICT = 0x3,
NVME_SC_DATA_XFER_ERROR = 0x4,
NVME_SC_POWER_LOSS = 0x5,
NVME_SC_INTERNAL = 0x6,
NVME_SC_ABORT_REQ = 0x7,
NVME_SC_ABORT_QUEUE = 0x8,
NVME_SC_FUSED_FAIL = 0x9,
NVME_SC_FUSED_MISSING = 0xa,
NVME_SC_INVALID_NS = 0xb,
NVME_SC_CMD_SEQ_ERROR = 0xc,
NVME_SC_LBA_RANGE = 0x80,
NVME_SC_CAP_EXCEEDED = 0x81,
NVME_SC_NS_NOT_READY = 0x82,
NVME_SC_CQ_INVALID = 0x100,
NVME_SC_QID_INVALID = 0x101,
NVME_SC_QUEUE_SIZE = 0x102,
NVME_SC_ABORT_LIMIT = 0x103,
NVME_SC_ABORT_MISSING = 0x104,
NVME_SC_ASYNC_LIMIT = 0x105,
NVME_SC_FIRMWARE_SLOT = 0x106,
NVME_SC_FIRMWARE_IMAGE = 0x107,
NVME_SC_INVALID_VECTOR = 0x108,
NVME_SC_INVALID_LOG_PAGE = 0x109,
NVME_SC_INVALID_FORMAT = 0x10a,
NVME_SC_BAD_ATTRIBUTES = 0x180,
NVME_SC_WRITE_FAULT = 0x280,
NVME_SC_READ_ERROR = 0x281,
NVME_SC_GUARD_CHECK = 0x282,
NVME_SC_APPTAG_CHECK = 0x283,
NVME_SC_REFTAG_CHECK = 0x284,
NVME_SC_COMPARE_FAILED = 0x285,
NVME_SC_ACCESS_DENIED = 0x286,
};
struct nvme_completion {
__le32 result; /* Used by admin commands to return data */
__u32 rsvd;
__le16 sq_head; /* how much of this queue may be reclaimed */
__le16 sq_id; /* submission queue that generated this entry */
__u16 command_id; /* of the command which completed */
__le16 status; /* did the command fail, and if so, why? */
};
struct nvme_user_io {
__u8 opcode;
__u8 flags;
__u16 control;
__u16 nblocks;
__u16 rsvd;
__u64 metadata;
__u64 addr;
__u64 slba;
__u32 dsmgmt;
__u32 reftag;
__u16 apptag;
__u16 appmask;
};
struct nvme_admin_cmd {
__u8 opcode;
__u8 flags;
__u16 rsvd1;
__u32 nsid;
__u32 cdw2;
__u32 cdw3;
__u64 metadata;
__u64 addr;
__u32 metadata_len;
__u32 data_len;
__u32 cdw10;
__u32 cdw11;
__u32 cdw12;
__u32 cdw13;
__u32 cdw14;
__u32 cdw15;
__u32 timeout_ms;
__u32 result;
};
#define NVME_IOCTL_ID _IO('N', 0x40)
#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd)
#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
#ifdef __KERNEL__
#include <linux/pci.h>
#include <linux/miscdevice.h>
#include <linux/kref.h>
#define NVME_IO_TIMEOUT (5 * HZ) #define NVME_IO_TIMEOUT (5 * HZ)
/* /*
...@@ -553,7 +106,7 @@ struct nvme_ns { ...@@ -553,7 +106,7 @@ struct nvme_ns {
struct request_queue *queue; struct request_queue *queue;
struct gendisk *disk; struct gendisk *disk;
int ns_id; unsigned ns_id;
int lba_shift; int lba_shift;
int ms; int ms;
u64 mode_select_num_blocks; u64 mode_select_num_blocks;
...@@ -572,6 +125,7 @@ struct nvme_iod { ...@@ -572,6 +125,7 @@ struct nvme_iod {
int offset; /* Of PRP list */ int offset; /* Of PRP list */
int nents; /* Used in scatterlist */ int nents; /* Used in scatterlist */
int length; /* Of data, in bytes */ int length; /* Of data, in bytes */
unsigned long start_time;
dma_addr_t first_dma; dma_addr_t first_dma;
struct scatterlist sg[0]; struct scatterlist sg[0];
}; };
...@@ -613,6 +167,4 @@ struct sg_io_hdr; ...@@ -613,6 +167,4 @@ struct sg_io_hdr;
int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr); int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr);
int nvme_sg_get_version_num(int __user *ip); int nvme_sg_get_version_num(int __user *ip);
#endif
#endif /* _LINUX_NVME_H */ #endif /* _LINUX_NVME_H */
...@@ -284,6 +284,7 @@ header-y += nfs_mount.h ...@@ -284,6 +284,7 @@ header-y += nfs_mount.h
header-y += nfsacl.h header-y += nfsacl.h
header-y += nl80211.h header-y += nl80211.h
header-y += nubus.h header-y += nubus.h
header-y += nvme.h
header-y += nvram.h header-y += nvram.h
header-y += omap3isp.h header-y += omap3isp.h
header-y += omapfb.h header-y += omapfb.h
......
/*
* Definitions for the NVM Express interface
* Copyright (c) 2011-2013, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef _UAPI_LINUX_NVME_H
#define _UAPI_LINUX_NVME_H
#include <linux/types.h>
struct nvme_id_power_state {
__le16 max_power; /* centiwatts */
__u8 rsvd2;
__u8 flags;
__le32 entry_lat; /* microseconds */
__le32 exit_lat; /* microseconds */
__u8 read_tput;
__u8 read_lat;
__u8 write_tput;
__u8 write_lat;
__u8 rsvd16[16];
};
enum {
NVME_PS_FLAGS_MAX_POWER_SCALE = 1 << 0,
NVME_PS_FLAGS_NON_OP_STATE = 1 << 1,
};
struct nvme_id_ctrl {
__le16 vid;
__le16 ssvid;
char sn[20];
char mn[40];
char fr[8];
__u8 rab;
__u8 ieee[3];
__u8 mic;
__u8 mdts;
__u8 rsvd78[178];
__le16 oacs;
__u8 acl;
__u8 aerl;
__u8 frmw;
__u8 lpa;
__u8 elpe;
__u8 npss;
__u8 rsvd264[248];
__u8 sqes;
__u8 cqes;
__u8 rsvd514[2];
__le32 nn;
__le16 oncs;
__le16 fuses;
__u8 fna;
__u8 vwc;
__le16 awun;
__le16 awupf;
__u8 rsvd530[1518];
struct nvme_id_power_state psd[32];
__u8 vs[1024];
};
enum {
NVME_CTRL_ONCS_COMPARE = 1 << 0,
NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1,
NVME_CTRL_ONCS_DSM = 1 << 2,
};
struct nvme_lbaf {
__le16 ms;
__u8 ds;
__u8 rp;
};
struct nvme_id_ns {
__le64 nsze;
__le64 ncap;
__le64 nuse;
__u8 nsfeat;
__u8 nlbaf;
__u8 flbas;
__u8 mc;
__u8 dpc;
__u8 dps;
__u8 rsvd30[98];
struct nvme_lbaf lbaf[16];
__u8 rsvd192[192];
__u8 vs[3712];
};
enum {
NVME_NS_FEAT_THIN = 1 << 0,
NVME_LBAF_RP_BEST = 0,
NVME_LBAF_RP_BETTER = 1,
NVME_LBAF_RP_GOOD = 2,
NVME_LBAF_RP_DEGRADED = 3,
};
struct nvme_smart_log {
__u8 critical_warning;
__u8 temperature[2];
__u8 avail_spare;
__u8 spare_thresh;
__u8 percent_used;
__u8 rsvd6[26];
__u8 data_units_read[16];
__u8 data_units_written[16];
__u8 host_reads[16];
__u8 host_writes[16];
__u8 ctrl_busy_time[16];
__u8 power_cycles[16];
__u8 power_on_hours[16];
__u8 unsafe_shutdowns[16];
__u8 media_errors[16];
__u8 num_err_log_entries[16];
__u8 rsvd192[320];
};
enum {
NVME_SMART_CRIT_SPARE = 1 << 0,
NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
NVME_SMART_CRIT_RELIABILITY = 1 << 2,
NVME_SMART_CRIT_MEDIA = 1 << 3,
NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4,
};
struct nvme_lba_range_type {
__u8 type;
__u8 attributes;
__u8 rsvd2[14];
__u64 slba;
__u64 nlb;
__u8 guid[16];
__u8 rsvd48[16];
};
enum {
NVME_LBART_TYPE_FS = 0x01,
NVME_LBART_TYPE_RAID = 0x02,
NVME_LBART_TYPE_CACHE = 0x03,
NVME_LBART_TYPE_SWAP = 0x04,
NVME_LBART_ATTRIB_TEMP = 1 << 0,
NVME_LBART_ATTRIB_HIDE = 1 << 1,
};
/* I/O commands */
enum nvme_opcode {
nvme_cmd_flush = 0x00,
nvme_cmd_write = 0x01,
nvme_cmd_read = 0x02,
nvme_cmd_write_uncor = 0x04,
nvme_cmd_compare = 0x05,
nvme_cmd_dsm = 0x09,
};
struct nvme_common_command {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__le32 cdw2[2];
__le64 metadata;
__le64 prp1;
__le64 prp2;
__le32 cdw10[6];
};
struct nvme_rw_command {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2;
__le64 metadata;
__le64 prp1;
__le64 prp2;
__le64 slba;
__le16 length;
__le16 control;
__le32 dsmgmt;
__le32 reftag;
__le16 apptag;
__le16 appmask;
};
enum {
NVME_RW_LR = 1 << 15,
NVME_RW_FUA = 1 << 14,
NVME_RW_DSM_FREQ_UNSPEC = 0,
NVME_RW_DSM_FREQ_TYPICAL = 1,
NVME_RW_DSM_FREQ_RARE = 2,
NVME_RW_DSM_FREQ_READS = 3,
NVME_RW_DSM_FREQ_WRITES = 4,
NVME_RW_DSM_FREQ_RW = 5,
NVME_RW_DSM_FREQ_ONCE = 6,
NVME_RW_DSM_FREQ_PREFETCH = 7,
NVME_RW_DSM_FREQ_TEMP = 8,
NVME_RW_DSM_LATENCY_NONE = 0 << 4,
NVME_RW_DSM_LATENCY_IDLE = 1 << 4,
NVME_RW_DSM_LATENCY_NORM = 2 << 4,
NVME_RW_DSM_LATENCY_LOW = 3 << 4,
NVME_RW_DSM_SEQ_REQ = 1 << 6,
NVME_RW_DSM_COMPRESSED = 1 << 7,
};
struct nvme_dsm_cmd {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2[2];
__le64 prp1;
__le64 prp2;
__le32 nr;
__le32 attributes;
__u32 rsvd12[4];
};
enum {
NVME_DSMGMT_IDR = 1 << 0,
NVME_DSMGMT_IDW = 1 << 1,
NVME_DSMGMT_AD = 1 << 2,
};
struct nvme_dsm_range {
__le32 cattr;
__le32 nlb;
__le64 slba;
};
/* Admin commands */
enum nvme_admin_opcode {
nvme_admin_delete_sq = 0x00,
nvme_admin_create_sq = 0x01,
nvme_admin_get_log_page = 0x02,
nvme_admin_delete_cq = 0x04,
nvme_admin_create_cq = 0x05,
nvme_admin_identify = 0x06,
nvme_admin_abort_cmd = 0x08,
nvme_admin_set_features = 0x09,
nvme_admin_get_features = 0x0a,
nvme_admin_async_event = 0x0c,
nvme_admin_activate_fw = 0x10,
nvme_admin_download_fw = 0x11,
nvme_admin_format_nvm = 0x80,
nvme_admin_security_send = 0x81,
nvme_admin_security_recv = 0x82,
};
enum {
NVME_QUEUE_PHYS_CONTIG = (1 << 0),
NVME_CQ_IRQ_ENABLED = (1 << 1),
NVME_SQ_PRIO_URGENT = (0 << 1),
NVME_SQ_PRIO_HIGH = (1 << 1),
NVME_SQ_PRIO_MEDIUM = (2 << 1),
NVME_SQ_PRIO_LOW = (3 << 1),
NVME_FEAT_ARBITRATION = 0x01,
NVME_FEAT_POWER_MGMT = 0x02,
NVME_FEAT_LBA_RANGE = 0x03,
NVME_FEAT_TEMP_THRESH = 0x04,
NVME_FEAT_ERR_RECOVERY = 0x05,
NVME_FEAT_VOLATILE_WC = 0x06,
NVME_FEAT_NUM_QUEUES = 0x07,
NVME_FEAT_IRQ_COALESCE = 0x08,
NVME_FEAT_IRQ_CONFIG = 0x09,
NVME_FEAT_WRITE_ATOMIC = 0x0a,
NVME_FEAT_ASYNC_EVENT = 0x0b,
NVME_FEAT_SW_PROGRESS = 0x0c,
NVME_FWACT_REPL = (0 << 3),
NVME_FWACT_REPL_ACTV = (1 << 3),
NVME_FWACT_ACTV = (2 << 3),
};
struct nvme_identify {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2[2];
__le64 prp1;
__le64 prp2;
__le32 cns;
__u32 rsvd11[5];
};
struct nvme_features {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2[2];
__le64 prp1;
__le64 prp2;
__le32 fid;
__le32 dword11;
__u32 rsvd12[4];
};
struct nvme_create_cq {
__u8 opcode;
__u8 flags;
__u16 command_id;
__u32 rsvd1[5];
__le64 prp1;
__u64 rsvd8;
__le16 cqid;
__le16 qsize;
__le16 cq_flags;
__le16 irq_vector;
__u32 rsvd12[4];
};
struct nvme_create_sq {
__u8 opcode;
__u8 flags;
__u16 command_id;
__u32 rsvd1[5];
__le64 prp1;
__u64 rsvd8;
__le16 sqid;
__le16 qsize;
__le16 sq_flags;
__le16 cqid;
__u32 rsvd12[4];
};
struct nvme_delete_queue {
__u8 opcode;
__u8 flags;
__u16 command_id;
__u32 rsvd1[9];
__le16 qid;
__u16 rsvd10;
__u32 rsvd11[5];
};
struct nvme_download_firmware {
__u8 opcode;
__u8 flags;
__u16 command_id;
__u32 rsvd1[5];
__le64 prp1;
__le64 prp2;
__le32 numd;
__le32 offset;
__u32 rsvd12[4];
};
struct nvme_format_cmd {
__u8 opcode;
__u8 flags;
__u16 command_id;
__le32 nsid;
__u64 rsvd2[4];
__le32 cdw10;
__u32 rsvd11[5];
};
struct nvme_command {
union {
struct nvme_common_command common;
struct nvme_rw_command rw;
struct nvme_identify identify;
struct nvme_features features;
struct nvme_create_cq create_cq;
struct nvme_create_sq create_sq;
struct nvme_delete_queue delete_queue;
struct nvme_download_firmware dlfw;
struct nvme_format_cmd format;
struct nvme_dsm_cmd dsm;
};
};
enum {
NVME_SC_SUCCESS = 0x0,
NVME_SC_INVALID_OPCODE = 0x1,
NVME_SC_INVALID_FIELD = 0x2,
NVME_SC_CMDID_CONFLICT = 0x3,
NVME_SC_DATA_XFER_ERROR = 0x4,
NVME_SC_POWER_LOSS = 0x5,
NVME_SC_INTERNAL = 0x6,
NVME_SC_ABORT_REQ = 0x7,
NVME_SC_ABORT_QUEUE = 0x8,
NVME_SC_FUSED_FAIL = 0x9,
NVME_SC_FUSED_MISSING = 0xa,
NVME_SC_INVALID_NS = 0xb,
NVME_SC_CMD_SEQ_ERROR = 0xc,
NVME_SC_LBA_RANGE = 0x80,
NVME_SC_CAP_EXCEEDED = 0x81,
NVME_SC_NS_NOT_READY = 0x82,
NVME_SC_CQ_INVALID = 0x100,
NVME_SC_QID_INVALID = 0x101,
NVME_SC_QUEUE_SIZE = 0x102,
NVME_SC_ABORT_LIMIT = 0x103,
NVME_SC_ABORT_MISSING = 0x104,
NVME_SC_ASYNC_LIMIT = 0x105,
NVME_SC_FIRMWARE_SLOT = 0x106,
NVME_SC_FIRMWARE_IMAGE = 0x107,
NVME_SC_INVALID_VECTOR = 0x108,
NVME_SC_INVALID_LOG_PAGE = 0x109,
NVME_SC_INVALID_FORMAT = 0x10a,
NVME_SC_BAD_ATTRIBUTES = 0x180,
NVME_SC_WRITE_FAULT = 0x280,
NVME_SC_READ_ERROR = 0x281,
NVME_SC_GUARD_CHECK = 0x282,
NVME_SC_APPTAG_CHECK = 0x283,
NVME_SC_REFTAG_CHECK = 0x284,
NVME_SC_COMPARE_FAILED = 0x285,
NVME_SC_ACCESS_DENIED = 0x286,
};
struct nvme_completion {
__le32 result; /* Used by admin commands to return data */
__u32 rsvd;
__le16 sq_head; /* how much of this queue may be reclaimed */
__le16 sq_id; /* submission queue that generated this entry */
__u16 command_id; /* of the command which completed */
__le16 status; /* did the command fail, and if so, why? */
};
struct nvme_user_io {
__u8 opcode;
__u8 flags;
__u16 control;
__u16 nblocks;
__u16 rsvd;
__u64 metadata;
__u64 addr;
__u64 slba;
__u32 dsmgmt;
__u32 reftag;
__u16 apptag;
__u16 appmask;
};
struct nvme_admin_cmd {
__u8 opcode;
__u8 flags;
__u16 rsvd1;
__u32 nsid;
__u32 cdw2;
__u32 cdw3;
__u64 metadata;
__u64 addr;
__u32 metadata_len;
__u32 data_len;
__u32 cdw10;
__u32 cdw11;
__u32 cdw12;
__u32 cdw13;
__u32 cdw14;
__u32 cdw15;
__u32 timeout_ms;
__u32 result;
};
#define NVME_IOCTL_ID _IO('N', 0x40)
#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd)
#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
#endif /* _UAPI_LINUX_NVME_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment