Commit 4b394a23 authored by Gary R Hook's avatar Gary R Hook Committed by Herbert Xu

crypto: ccp - Let a v5 CCP provide the same function as v3

Enable equivalent function on a v5 CCP. Add support for a
version 5 CCP which enables AES/XTS/SHA services. Also,
more work on the data structures to virtualize
functionality.
Signed-off-by: default avatarGary R Hook <gary.hook@amd.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent bb4e89b3
......@@ -2,6 +2,7 @@ obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o
ccp-objs := ccp-dev.o \
ccp-ops.o \
ccp-dev-v3.o \
ccp-dev-v5.o \
ccp-platform.o \
ccp-dmaengine.o
ccp-$(CONFIG_PCI) += ccp-pci.o
......
......@@ -4,6 +4,7 @@
* Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
*
* Author: Tom Lendacky <thomas.lendacky@amd.com>
* Author: Gary R Hook <gary.hook@amd.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
......@@ -134,7 +135,22 @@ static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes,
rctx->cmd.engine = CCP_ENGINE_SHA;
rctx->cmd.u.sha.type = rctx->type;
rctx->cmd.u.sha.ctx = &rctx->ctx_sg;
rctx->cmd.u.sha.ctx_len = sizeof(rctx->ctx);
switch (rctx->type) {
case CCP_SHA_TYPE_1:
rctx->cmd.u.sha.ctx_len = SHA1_DIGEST_SIZE;
break;
case CCP_SHA_TYPE_224:
rctx->cmd.u.sha.ctx_len = SHA224_DIGEST_SIZE;
break;
case CCP_SHA_TYPE_256:
rctx->cmd.u.sha.ctx_len = SHA256_DIGEST_SIZE;
break;
default:
/* Should never get here */
break;
}
rctx->cmd.u.sha.src = sg;
rctx->cmd.u.sha.src_len = rctx->hash_cnt;
rctx->cmd.u.sha.opad = ctx->u.sha.key_len ?
......
......@@ -405,6 +405,7 @@ static int ccp_init(struct ccp_device *ccp)
init_waitqueue_head(&ccp->sb_queue);
init_waitqueue_head(&ccp->suspend_queue);
dev_dbg(dev, "Starting threads...\n");
/* Create a kthread for each queue */
for (i = 0; i < ccp->cmd_q_count; i++) {
struct task_struct *kthread;
......@@ -424,6 +425,13 @@ static int ccp_init(struct ccp_device *ccp)
wake_up_process(kthread);
}
dev_dbg(dev, "Enabling interrupts...\n");
/* Enable interrupts */
iowrite32(qim, ccp->io_regs + IRQ_MASK_REG);
dev_dbg(dev, "Registering device...\n");
ccp_add_device(ccp);
/* Register the RNG */
ccp->hwrng.name = ccp->rngname;
ccp->hwrng.read = ccp_trng_read;
......@@ -438,11 +446,6 @@ static int ccp_init(struct ccp_device *ccp)
if (ret)
goto e_hwrng;
ccp_add_device(ccp);
/* Enable interrupts */
iowrite32(qim, ccp->io_regs + IRQ_MASK_REG);
return 0;
e_hwrng:
......@@ -468,7 +471,13 @@ static void ccp_destroy(struct ccp_device *ccp)
struct ccp_cmd *cmd;
unsigned int qim, i;
/* Remove this device from the list of available units first */
/* Unregister the DMA engine */
ccp_dmaengine_unregister(ccp);
/* Unregister the RNG */
hwrng_unregister(&ccp->hwrng);
/* Remove this device from the list of available units */
ccp_del_device(ccp);
/* Build queue interrupt mask (two interrupt masks per queue) */
......@@ -488,12 +497,6 @@ static void ccp_destroy(struct ccp_device *ccp)
}
iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
/* Unregister the DMA engine */
ccp_dmaengine_unregister(ccp);
/* Unregister the RNG */
hwrng_unregister(&ccp->hwrng);
/* Stop the queue kthreads */
for (i = 0; i < ccp->cmd_q_count; i++)
if (ccp->cmd_q[i].kthread)
......@@ -570,6 +573,7 @@ static const struct ccp_actions ccp3_actions = {
struct ccp_vdata ccpv3 = {
.version = CCP_VERSION(3, 0),
.setup = NULL,
.perform = &ccp3_actions,
.bar = 2,
.offset = 0x20000,
......
/*
* AMD Cryptographic Coprocessor (CCP) driver
*
* Copyright (C) 2016 Advanced Micro Devices, Inc.
*
* Author: Gary R Hook <gary.hook@amd.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/kthread.h>
#include <linux/dma-mapping.h>
#include <linux/interrupt.h>
#include <linux/compiler.h>
#include <linux/ccp.h>
#include "ccp-dev.h"
static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count)
{
struct ccp_device *ccp;
int start;
/* First look at the map for the queue */
if (cmd_q->lsb >= 0) {
start = (u32)bitmap_find_next_zero_area(cmd_q->lsbmap,
LSB_SIZE,
0, count, 0);
if (start < LSB_SIZE) {
bitmap_set(cmd_q->lsbmap, start, count);
return start + cmd_q->lsb * LSB_SIZE;
}
}
/* No joy; try to get an entry from the shared blocks */
ccp = cmd_q->ccp;
for (;;) {
mutex_lock(&ccp->sb_mutex);
start = (u32)bitmap_find_next_zero_area(ccp->lsbmap,
MAX_LSB_CNT * LSB_SIZE,
0,
count, 0);
if (start <= MAX_LSB_CNT * LSB_SIZE) {
bitmap_set(ccp->lsbmap, start, count);
mutex_unlock(&ccp->sb_mutex);
return start * LSB_ITEM_SIZE;
}
ccp->sb_avail = 0;
mutex_unlock(&ccp->sb_mutex);
/* Wait for KSB entries to become available */
if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail))
return 0;
}
}
static void ccp_lsb_free(struct ccp_cmd_queue *cmd_q, unsigned int start,
unsigned int count)
{
int lsbno = start / LSB_SIZE;
if (!start)
return;
if (cmd_q->lsb == lsbno) {
/* An entry from the private LSB */
bitmap_clear(cmd_q->lsbmap, start % LSB_SIZE, count);
} else {
/* From the shared LSBs */
struct ccp_device *ccp = cmd_q->ccp;
mutex_lock(&ccp->sb_mutex);
bitmap_clear(ccp->lsbmap, start, count);
ccp->sb_avail = 1;
mutex_unlock(&ccp->sb_mutex);
wake_up_interruptible_all(&ccp->sb_queue);
}
}
/* CCP version 5: Union to define the function field (cmd_reg1/dword0) */
union ccp_function {
struct {
u16 size:7;
u16 encrypt:1;
u16 mode:5;
u16 type:2;
} aes;
struct {
u16 size:7;
u16 encrypt:1;
u16 rsvd:5;
u16 type:2;
} aes_xts;
struct {
u16 rsvd1:10;
u16 type:4;
u16 rsvd2:1;
} sha;
struct {
u16 mode:3;
u16 size:12;
} rsa;
struct {
u16 byteswap:2;
u16 bitwise:3;
u16 reflect:2;
u16 rsvd:8;
} pt;
struct {
u16 rsvd:13;
} zlib;
struct {
u16 size:10;
u16 type:2;
u16 mode:3;
} ecc;
u16 raw;
};
#define CCP_AES_SIZE(p) ((p)->aes.size)
#define CCP_AES_ENCRYPT(p) ((p)->aes.encrypt)
#define CCP_AES_MODE(p) ((p)->aes.mode)
#define CCP_AES_TYPE(p) ((p)->aes.type)
#define CCP_XTS_SIZE(p) ((p)->aes_xts.size)
#define CCP_XTS_ENCRYPT(p) ((p)->aes_xts.encrypt)
#define CCP_SHA_TYPE(p) ((p)->sha.type)
#define CCP_RSA_SIZE(p) ((p)->rsa.size)
#define CCP_PT_BYTESWAP(p) ((p)->pt.byteswap)
#define CCP_PT_BITWISE(p) ((p)->pt.bitwise)
#define CCP_ECC_MODE(p) ((p)->ecc.mode)
#define CCP_ECC_AFFINE(p) ((p)->ecc.one)
/* Word 0 */
#define CCP5_CMD_DW0(p) ((p)->dw0)
#define CCP5_CMD_SOC(p) (CCP5_CMD_DW0(p).soc)
#define CCP5_CMD_IOC(p) (CCP5_CMD_DW0(p).ioc)
#define CCP5_CMD_INIT(p) (CCP5_CMD_DW0(p).init)
#define CCP5_CMD_EOM(p) (CCP5_CMD_DW0(p).eom)
#define CCP5_CMD_FUNCTION(p) (CCP5_CMD_DW0(p).function)
#define CCP5_CMD_ENGINE(p) (CCP5_CMD_DW0(p).engine)
#define CCP5_CMD_PROT(p) (CCP5_CMD_DW0(p).prot)
/* Word 1 */
#define CCP5_CMD_DW1(p) ((p)->length)
#define CCP5_CMD_LEN(p) (CCP5_CMD_DW1(p))
/* Word 2 */
#define CCP5_CMD_DW2(p) ((p)->src_lo)
#define CCP5_CMD_SRC_LO(p) (CCP5_CMD_DW2(p))
/* Word 3 */
#define CCP5_CMD_DW3(p) ((p)->dw3)
#define CCP5_CMD_SRC_MEM(p) ((p)->dw3.src_mem)
#define CCP5_CMD_SRC_HI(p) ((p)->dw3.src_hi)
#define CCP5_CMD_LSB_ID(p) ((p)->dw3.lsb_cxt_id)
#define CCP5_CMD_FIX_SRC(p) ((p)->dw3.fixed)
/* Words 4/5 */
#define CCP5_CMD_DW4(p) ((p)->dw4)
#define CCP5_CMD_DST_LO(p) (CCP5_CMD_DW4(p).dst_lo)
#define CCP5_CMD_DW5(p) ((p)->dw5.fields.dst_hi)
#define CCP5_CMD_DST_HI(p) (CCP5_CMD_DW5(p))
#define CCP5_CMD_DST_MEM(p) ((p)->dw5.fields.dst_mem)
#define CCP5_CMD_FIX_DST(p) ((p)->dw5.fields.fixed)
#define CCP5_CMD_SHA_LO(p) ((p)->dw4.sha_len_lo)
#define CCP5_CMD_SHA_HI(p) ((p)->dw5.sha_len_hi)
/* Word 6/7 */
#define CCP5_CMD_DW6(p) ((p)->key_lo)
#define CCP5_CMD_KEY_LO(p) (CCP5_CMD_DW6(p))
#define CCP5_CMD_DW7(p) ((p)->dw7)
#define CCP5_CMD_KEY_HI(p) ((p)->dw7.key_hi)
#define CCP5_CMD_KEY_MEM(p) ((p)->dw7.key_mem)
static inline u32 low_address(unsigned long addr)
{
return (u64)addr & 0x0ffffffff;
}
static inline u32 high_address(unsigned long addr)
{
return ((u64)addr >> 32) & 0x00000ffff;
}
static unsigned int ccp5_get_free_slots(struct ccp_cmd_queue *cmd_q)
{
unsigned int head_idx, n;
u32 head_lo, queue_start;
queue_start = low_address(cmd_q->qdma_tail);
head_lo = ioread32(cmd_q->reg_head_lo);
head_idx = (head_lo - queue_start) / sizeof(struct ccp5_desc);
n = head_idx + COMMANDS_PER_QUEUE - cmd_q->qidx - 1;
return n % COMMANDS_PER_QUEUE; /* Always one unused spot */
}
static int ccp5_do_cmd(struct ccp5_desc *desc,
struct ccp_cmd_queue *cmd_q)
{
u32 *mP;
__le32 *dP;
u32 tail;
int i;
int ret = 0;
if (CCP5_CMD_SOC(desc)) {
CCP5_CMD_IOC(desc) = 1;
CCP5_CMD_SOC(desc) = 0;
}
mutex_lock(&cmd_q->q_mutex);
mP = (u32 *) &cmd_q->qbase[cmd_q->qidx];
dP = (__le32 *) desc;
for (i = 0; i < 8; i++)
mP[i] = cpu_to_le32(dP[i]); /* handle endianness */
cmd_q->qidx = (cmd_q->qidx + 1) % COMMANDS_PER_QUEUE;
/* The data used by this command must be flushed to memory */
wmb();
/* Write the new tail address back to the queue register */
tail = low_address(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE);
iowrite32(tail, cmd_q->reg_tail_lo);
/* Turn the queue back on using our cached control register */
iowrite32(cmd_q->qcontrol | CMD5_Q_RUN, cmd_q->reg_control);
mutex_unlock(&cmd_q->q_mutex);
if (CCP5_CMD_IOC(desc)) {
/* Wait for the job to complete */
ret = wait_event_interruptible(cmd_q->int_queue,
cmd_q->int_rcvd);
if (ret || cmd_q->cmd_error) {
/* A version 5 device doesn't use Job IDs... */
if (!ret)
ret = -EIO;
}
cmd_q->int_rcvd = 0;
}
return 0;
}
static int ccp5_perform_aes(struct ccp_op *op)
{
struct ccp5_desc desc;
union ccp_function function;
u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
/* Zero out all the fields of the command desc */
memset(&desc, 0, Q_DESC_SIZE);
CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_AES;
CCP5_CMD_SOC(&desc) = op->soc;
CCP5_CMD_IOC(&desc) = 1;
CCP5_CMD_INIT(&desc) = op->init;
CCP5_CMD_EOM(&desc) = op->eom;
CCP5_CMD_PROT(&desc) = 0;
function.raw = 0;
CCP_AES_ENCRYPT(&function) = op->u.aes.action;
CCP_AES_MODE(&function) = op->u.aes.mode;
CCP_AES_TYPE(&function) = op->u.aes.type;
if (op->u.aes.mode == CCP_AES_MODE_CFB)
CCP_AES_SIZE(&function) = 0x7f;
CCP5_CMD_FUNCTION(&desc) = function.raw;
CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
CCP5_CMD_KEY_HI(&desc) = 0;
CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
return ccp5_do_cmd(&desc, op->cmd_q);
}
static int ccp5_perform_xts_aes(struct ccp_op *op)
{
struct ccp5_desc desc;
union ccp_function function;
u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
/* Zero out all the fields of the command desc */
memset(&desc, 0, Q_DESC_SIZE);
CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_XTS_AES_128;
CCP5_CMD_SOC(&desc) = op->soc;
CCP5_CMD_IOC(&desc) = 1;
CCP5_CMD_INIT(&desc) = op->init;
CCP5_CMD_EOM(&desc) = op->eom;
CCP5_CMD_PROT(&desc) = 0;
function.raw = 0;
CCP_XTS_ENCRYPT(&function) = op->u.xts.action;
CCP_XTS_SIZE(&function) = op->u.xts.unit_size;
CCP5_CMD_FUNCTION(&desc) = function.raw;
CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
CCP5_CMD_KEY_HI(&desc) = 0;
CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
return ccp5_do_cmd(&desc, op->cmd_q);
}
static int ccp5_perform_sha(struct ccp_op *op)
{
struct ccp5_desc desc;
union ccp_function function;
/* Zero out all the fields of the command desc */
memset(&desc, 0, Q_DESC_SIZE);
CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_SHA;
CCP5_CMD_SOC(&desc) = op->soc;
CCP5_CMD_IOC(&desc) = 1;
CCP5_CMD_INIT(&desc) = 1;
CCP5_CMD_EOM(&desc) = op->eom;
CCP5_CMD_PROT(&desc) = 0;
function.raw = 0;
CCP_SHA_TYPE(&function) = op->u.sha.type;
CCP5_CMD_FUNCTION(&desc) = function.raw;
CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
if (op->eom) {
CCP5_CMD_SHA_LO(&desc) = lower_32_bits(op->u.sha.msg_bits);
CCP5_CMD_SHA_HI(&desc) = upper_32_bits(op->u.sha.msg_bits);
} else {
CCP5_CMD_SHA_LO(&desc) = 0;
CCP5_CMD_SHA_HI(&desc) = 0;
}
return ccp5_do_cmd(&desc, op->cmd_q);
}
static int ccp5_perform_rsa(struct ccp_op *op)
{
struct ccp5_desc desc;
union ccp_function function;
/* Zero out all the fields of the command desc */
memset(&desc, 0, Q_DESC_SIZE);
CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_RSA;
CCP5_CMD_SOC(&desc) = op->soc;
CCP5_CMD_IOC(&desc) = 1;
CCP5_CMD_INIT(&desc) = 0;
CCP5_CMD_EOM(&desc) = 1;
CCP5_CMD_PROT(&desc) = 0;
function.raw = 0;
CCP_RSA_SIZE(&function) = op->u.rsa.mod_size;
CCP5_CMD_FUNCTION(&desc) = function.raw;
CCP5_CMD_LEN(&desc) = op->u.rsa.input_len;
/* Source is from external memory */
CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
/* Destination is in external memory */
CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
/* Key (Exponent) is in external memory */
CCP5_CMD_KEY_LO(&desc) = ccp_addr_lo(&op->exp.u.dma);
CCP5_CMD_KEY_HI(&desc) = ccp_addr_hi(&op->exp.u.dma);
CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
return ccp5_do_cmd(&desc, op->cmd_q);
}
static int ccp5_perform_passthru(struct ccp_op *op)
{
struct ccp5_desc desc;
union ccp_function function;
struct ccp_dma_info *saddr = &op->src.u.dma;
struct ccp_dma_info *daddr = &op->dst.u.dma;
memset(&desc, 0, Q_DESC_SIZE);
CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_PASSTHRU;
CCP5_CMD_SOC(&desc) = 0;
CCP5_CMD_IOC(&desc) = 1;
CCP5_CMD_INIT(&desc) = 0;
CCP5_CMD_EOM(&desc) = op->eom;
CCP5_CMD_PROT(&desc) = 0;
function.raw = 0;
CCP_PT_BYTESWAP(&function) = op->u.passthru.byte_swap;
CCP_PT_BITWISE(&function) = op->u.passthru.bit_mod;
CCP5_CMD_FUNCTION(&desc) = function.raw;
/* Length of source data is always 256 bytes */
if (op->src.type == CCP_MEMTYPE_SYSTEM)
CCP5_CMD_LEN(&desc) = saddr->length;
else
CCP5_CMD_LEN(&desc) = daddr->length;
if (op->src.type == CCP_MEMTYPE_SYSTEM) {
CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
CCP5_CMD_LSB_ID(&desc) = op->sb_key;
} else {
u32 key_addr = op->src.u.sb * CCP_SB_BYTES;
CCP5_CMD_SRC_LO(&desc) = lower_32_bits(key_addr);
CCP5_CMD_SRC_HI(&desc) = 0;
CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SB;
}
if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
} else {
u32 key_addr = op->dst.u.sb * CCP_SB_BYTES;
CCP5_CMD_DST_LO(&desc) = lower_32_bits(key_addr);
CCP5_CMD_DST_HI(&desc) = 0;
CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SB;
}
return ccp5_do_cmd(&desc, op->cmd_q);
}
static int ccp5_perform_ecc(struct ccp_op *op)
{
struct ccp5_desc desc;
union ccp_function function;
/* Zero out all the fields of the command desc */
memset(&desc, 0, Q_DESC_SIZE);
CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_ECC;
CCP5_CMD_SOC(&desc) = 0;
CCP5_CMD_IOC(&desc) = 1;
CCP5_CMD_INIT(&desc) = 0;
CCP5_CMD_EOM(&desc) = 1;
CCP5_CMD_PROT(&desc) = 0;
function.raw = 0;
function.ecc.mode = op->u.ecc.function;
CCP5_CMD_FUNCTION(&desc) = function.raw;
CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
return ccp5_do_cmd(&desc, op->cmd_q);
}
static int ccp_find_lsb_regions(struct ccp_cmd_queue *cmd_q, u64 status)
{
int q_mask = 1 << cmd_q->id;
int queues = 0;
int j;
/* Build a bit mask to know which LSBs this queue has access to.
* Don't bother with segment 0 as it has special privileges.
*/
for (j = 1; j < MAX_LSB_CNT; j++) {
if (status & q_mask)
bitmap_set(cmd_q->lsbmask, j, 1);
status >>= LSB_REGION_WIDTH;
}
queues = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
dev_info(cmd_q->ccp->dev, "Queue %d can access %d LSB regions\n",
cmd_q->id, queues);
return queues ? 0 : -EINVAL;
}
static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp,
int lsb_cnt, int n_lsbs,
unsigned long *lsb_pub)
{
DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
int bitno;
int qlsb_wgt;
int i;
/* For each queue:
* If the count of potential LSBs available to a queue matches the
* ordinal given to us in lsb_cnt:
* Copy the mask of possible LSBs for this queue into "qlsb";
* For each bit in qlsb, see if the corresponding bit in the
* aggregation mask is set; if so, we have a match.
* If we have a match, clear the bit in the aggregation to
* mark it as no longer available.
* If there is no match, clear the bit in qlsb and keep looking.
*/
for (i = 0; i < ccp->cmd_q_count; i++) {
struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
qlsb_wgt = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
if (qlsb_wgt == lsb_cnt) {
bitmap_copy(qlsb, cmd_q->lsbmask, MAX_LSB_CNT);
bitno = find_first_bit(qlsb, MAX_LSB_CNT);
while (bitno < MAX_LSB_CNT) {
if (test_bit(bitno, lsb_pub)) {
/* We found an available LSB
* that this queue can access
*/
cmd_q->lsb = bitno;
bitmap_clear(lsb_pub, bitno, 1);
dev_info(ccp->dev,
"Queue %d gets LSB %d\n",
i, bitno);
break;
}
bitmap_clear(qlsb, bitno, 1);
bitno = find_first_bit(qlsb, MAX_LSB_CNT);
}
if (bitno >= MAX_LSB_CNT)
return -EINVAL;
n_lsbs--;
}
}
return n_lsbs;
}
/* For each queue, from the most- to least-constrained:
* find an LSB that can be assigned to the queue. If there are N queues that
* can only use M LSBs, where N > M, fail; otherwise, every queue will get a
* dedicated LSB. Remaining LSB regions become a shared resource.
* If we have fewer LSBs than queues, all LSB regions become shared resources.
*/
static int ccp_assign_lsbs(struct ccp_device *ccp)
{
DECLARE_BITMAP(lsb_pub, MAX_LSB_CNT);
DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
int n_lsbs = 0;
int bitno;
int i, lsb_cnt;
int rc = 0;
bitmap_zero(lsb_pub, MAX_LSB_CNT);
/* Create an aggregate bitmap to get a total count of available LSBs */
for (i = 0; i < ccp->cmd_q_count; i++)
bitmap_or(lsb_pub,
lsb_pub, ccp->cmd_q[i].lsbmask,
MAX_LSB_CNT);
n_lsbs = bitmap_weight(lsb_pub, MAX_LSB_CNT);
if (n_lsbs >= ccp->cmd_q_count) {
/* We have enough LSBS to give every queue a private LSB.
* Brute force search to start with the queues that are more
* constrained in LSB choice. When an LSB is privately
* assigned, it is removed from the public mask.
* This is an ugly N squared algorithm with some optimization.
*/
for (lsb_cnt = 1;
n_lsbs && (lsb_cnt <= MAX_LSB_CNT);
lsb_cnt++) {
rc = ccp_find_and_assign_lsb_to_q(ccp, lsb_cnt, n_lsbs,
lsb_pub);
if (rc < 0)
return -EINVAL;
n_lsbs = rc;
}
}
rc = 0;
/* What's left of the LSBs, according to the public mask, now become
* shared. Any zero bits in the lsb_pub mask represent an LSB region
* that can't be used as a shared resource, so mark the LSB slots for
* them as "in use".
*/
bitmap_copy(qlsb, lsb_pub, MAX_LSB_CNT);
bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
while (bitno < MAX_LSB_CNT) {
bitmap_set(ccp->lsbmap, bitno * LSB_SIZE, LSB_SIZE);
bitmap_set(qlsb, bitno, 1);
bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
}
return rc;
}
static int ccp5_init(struct ccp_device *ccp)
{
struct device *dev = ccp->dev;
struct ccp_cmd_queue *cmd_q;
struct dma_pool *dma_pool;
char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
unsigned int qmr, qim, i;
u64 status;
u32 status_lo, status_hi;
int ret;
/* Find available queues */
qim = 0;
qmr = ioread32(ccp->io_regs + Q_MASK_REG);
for (i = 0; i < MAX_HW_QUEUES; i++) {
if (!(qmr & (1 << i)))
continue;
/* Allocate a dma pool for this queue */
snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d",
ccp->name, i);
dma_pool = dma_pool_create(dma_pool_name, dev,
CCP_DMAPOOL_MAX_SIZE,
CCP_DMAPOOL_ALIGN, 0);
if (!dma_pool) {
dev_err(dev, "unable to allocate dma pool\n");
ret = -ENOMEM;
}
cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
ccp->cmd_q_count++;
cmd_q->ccp = ccp;
cmd_q->id = i;
cmd_q->dma_pool = dma_pool;
mutex_init(&cmd_q->q_mutex);
/* Page alignment satisfies our needs for N <= 128 */
BUILD_BUG_ON(COMMANDS_PER_QUEUE > 128);
cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
cmd_q->qbase = dma_zalloc_coherent(dev, cmd_q->qsize,
&cmd_q->qbase_dma,
GFP_KERNEL);
if (!cmd_q->qbase) {
dev_err(dev, "unable to allocate command queue\n");
ret = -ENOMEM;
goto e_pool;
}
cmd_q->qidx = 0;
/* Preset some register values and masks that are queue
* number dependent
*/
cmd_q->reg_control = ccp->io_regs +
CMD5_Q_STATUS_INCR * (i + 1);
cmd_q->reg_tail_lo = cmd_q->reg_control + CMD5_Q_TAIL_LO_BASE;
cmd_q->reg_head_lo = cmd_q->reg_control + CMD5_Q_HEAD_LO_BASE;
cmd_q->reg_int_enable = cmd_q->reg_control +
CMD5_Q_INT_ENABLE_BASE;
cmd_q->reg_interrupt_status = cmd_q->reg_control +
CMD5_Q_INTERRUPT_STATUS_BASE;
cmd_q->reg_status = cmd_q->reg_control + CMD5_Q_STATUS_BASE;
cmd_q->reg_int_status = cmd_q->reg_control +
CMD5_Q_INT_STATUS_BASE;
cmd_q->reg_dma_status = cmd_q->reg_control +
CMD5_Q_DMA_STATUS_BASE;
cmd_q->reg_dma_read_status = cmd_q->reg_control +
CMD5_Q_DMA_READ_STATUS_BASE;
cmd_q->reg_dma_write_status = cmd_q->reg_control +
CMD5_Q_DMA_WRITE_STATUS_BASE;
init_waitqueue_head(&cmd_q->int_queue);
dev_dbg(dev, "queue #%u available\n", i);
}
if (ccp->cmd_q_count == 0) {
dev_notice(dev, "no command queues available\n");
ret = -EIO;
goto e_pool;
}
dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count);
/* Turn off the queues and disable interrupts until ready */
for (i = 0; i < ccp->cmd_q_count; i++) {
cmd_q = &ccp->cmd_q[i];
cmd_q->qcontrol = 0; /* Start with nothing */
iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
/* Disable the interrupts */
iowrite32(0x00, cmd_q->reg_int_enable);
ioread32(cmd_q->reg_int_status);
ioread32(cmd_q->reg_status);
/* Clear the interrupts */
iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
}
dev_dbg(dev, "Requesting an IRQ...\n");
/* Request an irq */
ret = ccp->get_irq(ccp);
if (ret) {
dev_err(dev, "unable to allocate an IRQ\n");
goto e_pool;
}
/* Initialize the queue used to suspend */
init_waitqueue_head(&ccp->suspend_queue);
dev_dbg(dev, "Loading LSB map...\n");
/* Copy the private LSB mask to the public registers */
status_lo = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
status_hi = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
iowrite32(status_lo, ccp->io_regs + LSB_PUBLIC_MASK_LO_OFFSET);
iowrite32(status_hi, ccp->io_regs + LSB_PUBLIC_MASK_HI_OFFSET);
status = ((u64)status_hi<<30) | (u64)status_lo;
dev_dbg(dev, "Configuring virtual queues...\n");
/* Configure size of each virtual queue accessible to host */
for (i = 0; i < ccp->cmd_q_count; i++) {
u32 dma_addr_lo;
u32 dma_addr_hi;
cmd_q = &ccp->cmd_q[i];
cmd_q->qcontrol &= ~(CMD5_Q_SIZE << CMD5_Q_SHIFT);
cmd_q->qcontrol |= QUEUE_SIZE_VAL << CMD5_Q_SHIFT;
cmd_q->qdma_tail = cmd_q->qbase_dma;
dma_addr_lo = low_address(cmd_q->qdma_tail);
iowrite32((u32)dma_addr_lo, cmd_q->reg_tail_lo);
iowrite32((u32)dma_addr_lo, cmd_q->reg_head_lo);
dma_addr_hi = high_address(cmd_q->qdma_tail);
cmd_q->qcontrol |= (dma_addr_hi << 16);
iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
/* Find the LSB regions accessible to the queue */
ccp_find_lsb_regions(cmd_q, status);
cmd_q->lsb = -1; /* Unassigned value */
}
dev_dbg(dev, "Assigning LSBs...\n");
ret = ccp_assign_lsbs(ccp);
if (ret) {
dev_err(dev, "Unable to assign LSBs (%d)\n", ret);
goto e_irq;
}
/* Optimization: pre-allocate LSB slots for each queue */
for (i = 0; i < ccp->cmd_q_count; i++) {
ccp->cmd_q[i].sb_key = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
ccp->cmd_q[i].sb_ctx = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
}
dev_dbg(dev, "Starting threads...\n");
/* Create a kthread for each queue */
for (i = 0; i < ccp->cmd_q_count; i++) {
struct task_struct *kthread;
cmd_q = &ccp->cmd_q[i];
kthread = kthread_create(ccp_cmd_queue_thread, cmd_q,
"%s-q%u", ccp->name, cmd_q->id);
if (IS_ERR(kthread)) {
dev_err(dev, "error creating queue thread (%ld)\n",
PTR_ERR(kthread));
ret = PTR_ERR(kthread);
goto e_kthread;
}
cmd_q->kthread = kthread;
wake_up_process(kthread);
}
dev_dbg(dev, "Enabling interrupts...\n");
/* Enable interrupts */
for (i = 0; i < ccp->cmd_q_count; i++) {
cmd_q = &ccp->cmd_q[i];
iowrite32(ALL_INTERRUPTS, cmd_q->reg_int_enable);
}
dev_dbg(dev, "Registering device...\n");
/* Put this on the unit list to make it available */
ccp_add_device(ccp);
return 0;
e_kthread:
for (i = 0; i < ccp->cmd_q_count; i++)
if (ccp->cmd_q[i].kthread)
kthread_stop(ccp->cmd_q[i].kthread);
e_irq:
ccp->free_irq(ccp);
e_pool:
for (i = 0; i < ccp->cmd_q_count; i++)
dma_pool_destroy(ccp->cmd_q[i].dma_pool);
return ret;
}
static void ccp5_destroy(struct ccp_device *ccp)
{
struct device *dev = ccp->dev;
struct ccp_cmd_queue *cmd_q;
struct ccp_cmd *cmd;
unsigned int i;
/* Remove this device from the list of available units first */
ccp_del_device(ccp);
/* Disable and clear interrupts */
for (i = 0; i < ccp->cmd_q_count; i++) {
cmd_q = &ccp->cmd_q[i];
/* Turn off the run bit */
iowrite32(cmd_q->qcontrol & ~CMD5_Q_RUN, cmd_q->reg_control);
/* Disable the interrupts */
iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
/* Clear the interrupt status */
iowrite32(0x00, cmd_q->reg_int_enable);
ioread32(cmd_q->reg_int_status);
ioread32(cmd_q->reg_status);
}
/* Stop the queue kthreads */
for (i = 0; i < ccp->cmd_q_count; i++)
if (ccp->cmd_q[i].kthread)
kthread_stop(ccp->cmd_q[i].kthread);
ccp->free_irq(ccp);
for (i = 0; i < ccp->cmd_q_count; i++) {
cmd_q = &ccp->cmd_q[i];
dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase,
cmd_q->qbase_dma);
}
/* Flush the cmd and backlog queue */
while (!list_empty(&ccp->cmd)) {
/* Invoke the callback directly with an error code */
cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
list_del(&cmd->entry);
cmd->callback(cmd->data, -ENODEV);
}
while (!list_empty(&ccp->backlog)) {
/* Invoke the callback directly with an error code */
cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
list_del(&cmd->entry);
cmd->callback(cmd->data, -ENODEV);
}
}
static irqreturn_t ccp5_irq_handler(int irq, void *data)
{
struct device *dev = data;
struct ccp_device *ccp = dev_get_drvdata(dev);
u32 status;
unsigned int i;
for (i = 0; i < ccp->cmd_q_count; i++) {
struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
status = ioread32(cmd_q->reg_interrupt_status);
if (status) {
cmd_q->int_status = status;
cmd_q->q_status = ioread32(cmd_q->reg_status);
cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
/* On error, only save the first error value */
if ((status & INT_ERROR) && !cmd_q->cmd_error)
cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
cmd_q->int_rcvd = 1;
/* Acknowledge the interrupt and wake the kthread */
iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
wake_up_interruptible(&cmd_q->int_queue);
}
}
return IRQ_HANDLED;
}
static void ccp5_config(struct ccp_device *ccp)
{
/* Public side */
iowrite32(0x00001249, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET);
}
static const struct ccp_actions ccp5_actions = {
.aes = ccp5_perform_aes,
.xts_aes = ccp5_perform_xts_aes,
.sha = ccp5_perform_sha,
.rsa = ccp5_perform_rsa,
.passthru = ccp5_perform_passthru,
.ecc = ccp5_perform_ecc,
.sballoc = ccp_lsb_alloc,
.sbfree = ccp_lsb_free,
.init = ccp5_init,
.destroy = ccp5_destroy,
.get_free_slots = ccp5_get_free_slots,
.irqhandler = ccp5_irq_handler,
};
struct ccp_vdata ccpv5 = {
.version = CCP_VERSION(5, 0),
.setup = ccp5_config,
.perform = &ccp5_actions,
.bar = 2,
.offset = 0x0,
};
......@@ -61,7 +61,62 @@
#define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f)
#define CMD_Q_DEPTH(__qs) (((__qs) >> 12) & 0x0000000f)
/****** REQ0 Related Values ******/
/* ------------------------ CCP Version 5 Specifics ------------------------ */
#define CMD5_QUEUE_MASK_OFFSET 0x00
#define CMD5_REQID_CONFIG_OFFSET 0x08
#define LSB_PUBLIC_MASK_LO_OFFSET 0x18
#define LSB_PUBLIC_MASK_HI_OFFSET 0x1C
#define LSB_PRIVATE_MASK_LO_OFFSET 0x20
#define LSB_PRIVATE_MASK_HI_OFFSET 0x24
#define CMD5_Q_CONTROL_BASE 0x0000
#define CMD5_Q_TAIL_LO_BASE 0x0004
#define CMD5_Q_HEAD_LO_BASE 0x0008
#define CMD5_Q_INT_ENABLE_BASE 0x000C
#define CMD5_Q_INTERRUPT_STATUS_BASE 0x0010
#define CMD5_Q_STATUS_BASE 0x0100
#define CMD5_Q_INT_STATUS_BASE 0x0104
#define CMD5_Q_DMA_STATUS_BASE 0x0108
#define CMD5_Q_DMA_READ_STATUS_BASE 0x010C
#define CMD5_Q_DMA_WRITE_STATUS_BASE 0x0110
#define CMD5_Q_ABORT_BASE 0x0114
#define CMD5_Q_AX_CACHE_BASE 0x0118
/* Address offset between two virtual queue registers */
#define CMD5_Q_STATUS_INCR 0x1000
/* Bit masks */
#define CMD5_Q_RUN 0x1
#define CMD5_Q_HALT 0x2
#define CMD5_Q_MEM_LOCATION 0x4
#define CMD5_Q_SIZE 0x1F
#define CMD5_Q_SHIFT 3
#define COMMANDS_PER_QUEUE 16
#define QUEUE_SIZE_VAL ((ffs(COMMANDS_PER_QUEUE) - 2) & \
CMD5_Q_SIZE)
#define Q_PTR_MASK (2 << (QUEUE_SIZE_VAL + 5) - 1)
#define Q_DESC_SIZE sizeof(struct ccp5_desc)
#define Q_SIZE(n) (COMMANDS_PER_QUEUE*(n))
#define INT_COMPLETION 0x1
#define INT_ERROR 0x2
#define INT_QUEUE_STOPPED 0x4
#define ALL_INTERRUPTS (INT_COMPLETION| \
INT_ERROR| \
INT_QUEUE_STOPPED)
#define LSB_REGION_WIDTH 5
#define MAX_LSB_CNT 8
#define LSB_SIZE 16
#define LSB_ITEM_SIZE 32
#define PLSB_MAP_SIZE (LSB_SIZE)
#define SLSB_MAP_SIZE (MAX_LSB_CNT * LSB_SIZE)
#define LSB_ENTRY_NUMBER(LSB_ADDR) (LSB_ADDR / LSB_ITEM_SIZE)
/* ------------------------ CCP Version 3 Specifics ------------------------ */
#define REQ0_WAIT_FOR_WRITE 0x00000004
#define REQ0_INT_ON_COMPLETE 0x00000002
#define REQ0_STOP_ON_COMPLETE 0x00000001
......@@ -115,6 +170,8 @@
#define CCP_JOBID_MASK 0x0000003f
/* ------------------------ General CCP Defines ------------------------ */
#define CCP_DMAPOOL_MAX_SIZE 64
#define CCP_DMAPOOL_ALIGN BIT(5)
......@@ -149,6 +206,7 @@
struct ccp_op;
struct ccp_device;
struct ccp_cmd;
struct ccp_fns;
struct ccp_dma_cmd {
struct list_head entry;
......@@ -192,10 +250,30 @@ struct ccp_cmd_queue {
/* Queue dma pool */
struct dma_pool *dma_pool;
/* Queue base address (not neccessarily aligned)*/
struct ccp5_desc *qbase;
/* Aligned queue start address (per requirement) */
struct mutex q_mutex ____cacheline_aligned;
unsigned int qidx;
/* Version 5 has different requirements for queue memory */
unsigned int qsize;
dma_addr_t qbase_dma;
dma_addr_t qdma_tail;
/* Per-queue reserved storage block(s) */
u32 sb_key;
u32 sb_ctx;
/* Bitmap of LSBs that can be accessed by this queue */
DECLARE_BITMAP(lsbmask, MAX_LSB_CNT);
/* Private LSB that is assigned to this queue, or -1 if none.
* Bitmap for my private LSB, unused otherwise
*/
unsigned int lsb;
DECLARE_BITMAP(lsbmap, PLSB_MAP_SIZE);
/* Queue processing thread */
struct task_struct *kthread;
unsigned int active;
......@@ -209,8 +287,17 @@ struct ccp_cmd_queue {
u32 int_err;
/* Register addresses for queue */
void __iomem *reg_control;
void __iomem *reg_tail_lo;
void __iomem *reg_head_lo;
void __iomem *reg_int_enable;
void __iomem *reg_interrupt_status;
void __iomem *reg_status;
void __iomem *reg_int_status;
void __iomem *reg_dma_status;
void __iomem *reg_dma_read_status;
void __iomem *reg_dma_write_status;
u32 qcontrol; /* Cached control register */
/* Status values from job */
u32 int_status;
......@@ -306,6 +393,9 @@ struct ccp_device {
unsigned int sb_count;
u32 sb_start;
/* Bitmap of shared LSBs, if any */
DECLARE_BITMAP(lsbmap, SLSB_MAP_SIZE);
/* Suspend support */
unsigned int suspending;
wait_queue_head_t suspend_queue;
......@@ -320,6 +410,7 @@ enum ccp_memtype {
CCP_MEMTYPE_LOCAL,
CCP_MEMTYPE__LAST,
};
#define CCP_MEMTYPE_LSB CCP_MEMTYPE_KSB
struct ccp_dma_info {
dma_addr_t address;
......@@ -407,6 +498,7 @@ struct ccp_op {
struct ccp_mem src;
struct ccp_mem dst;
struct ccp_mem exp;
union {
struct ccp_aes_op aes;
......@@ -416,6 +508,7 @@ struct ccp_op {
struct ccp_passthru_op passthru;
struct ccp_ecc_op ecc;
} u;
struct ccp_mem key;
};
static inline u32 ccp_addr_lo(struct ccp_dma_info *info)
......@@ -428,6 +521,70 @@ static inline u32 ccp_addr_hi(struct ccp_dma_info *info)
return upper_32_bits(info->address + info->offset) & 0x0000ffff;
}
/**
* descriptor for version 5 CPP commands
* 8 32-bit words:
* word 0: function; engine; control bits
* word 1: length of source data
* word 2: low 32 bits of source pointer
* word 3: upper 16 bits of source pointer; source memory type
* word 4: low 32 bits of destination pointer
* word 5: upper 16 bits of destination pointer; destination memory type
* word 6: low 32 bits of key pointer
* word 7: upper 16 bits of key pointer; key memory type
*/
struct dword0 {
__le32 soc:1;
__le32 ioc:1;
__le32 rsvd1:1;
__le32 init:1;
__le32 eom:1; /* AES/SHA only */
__le32 function:15;
__le32 engine:4;
__le32 prot:1;
__le32 rsvd2:7;
};
struct dword3 {
__le32 src_hi:16;
__le32 src_mem:2;
__le32 lsb_cxt_id:8;
__le32 rsvd1:5;
__le32 fixed:1;
};
union dword4 {
__le32 dst_lo; /* NON-SHA */
__le32 sha_len_lo; /* SHA */
};
union dword5 {
struct {
__le32 dst_hi:16;
__le32 dst_mem:2;
__le32 rsvd1:13;
__le32 fixed:1;
} fields;
__le32 sha_len_hi;
};
struct dword7 {
__le32 key_hi:16;
__le32 key_mem:2;
__le32 rsvd1:14;
};
struct ccp5_desc {
struct dword0 dw0;
__le32 length;
__le32 src_lo;
struct dword3 dw3;
union dword4 dw4;
union dword5 dw5;
__le32 key_lo;
struct dword7 dw7;
};
int ccp_pci_init(void);
void ccp_pci_exit(void);
......@@ -466,13 +623,14 @@ struct ccp_actions {
/* Structure to hold CCP version-specific values */
struct ccp_vdata {
unsigned int version;
int (*init)(struct ccp_device *);
const unsigned int version;
void (*setup)(struct ccp_device *);
const struct ccp_actions *perform;
const unsigned int bar;
const unsigned int offset;
};
extern struct ccp_vdata ccpv3;
extern struct ccp_vdata ccpv5;
#endif
......@@ -21,26 +21,29 @@
#include "ccp-dev.h"
/* SHA initial context values */
static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
static const __be32 ccp_sha1_init[SHA1_DIGEST_SIZE / sizeof(__be32)] = {
cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
cpu_to_be32(SHA1_H4), 0, 0, 0,
cpu_to_be32(SHA1_H4),
};
static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
static const __be32 ccp_sha224_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
};
static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
static const __be32 ccp_sha256_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
};
#define CCP_NEW_JOBID(ccp) ((ccp->vdata->version == CCP_VERSION(3, 0)) ? \
ccp_gen_jobid(ccp) : 0)
static u32 ccp_gen_jobid(struct ccp_device *ccp)
{
return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
......@@ -487,7 +490,7 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
ret = -EIO;
memset(&op, 0, sizeof(op));
op.cmd_q = cmd_q;
op.jobid = ccp_gen_jobid(cmd_q->ccp);
op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
op.sb_key = cmd_q->sb_key;
op.sb_ctx = cmd_q->sb_ctx;
op.init = 1;
......@@ -640,7 +643,7 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
ret = -EIO;
memset(&op, 0, sizeof(op));
op.cmd_q = cmd_q;
op.jobid = ccp_gen_jobid(cmd_q->ccp);
op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
op.sb_key = cmd_q->sb_key;
op.sb_ctx = cmd_q->sb_ctx;
op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
......@@ -679,7 +682,7 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
goto e_key;
if (aes->mode != CCP_AES_MODE_ECB) {
/* Load the AES context - conver to LE */
/* Load the AES context - convert to LE */
dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
......@@ -817,7 +820,7 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
ret = -EIO;
memset(&op, 0, sizeof(op));
op.cmd_q = cmd_q;
op.jobid = ccp_gen_jobid(cmd_q->ccp);
op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
op.sb_key = cmd_q->sb_key;
op.sb_ctx = cmd_q->sb_ctx;
op.init = 1;
......@@ -936,98 +939,154 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
struct ccp_dm_workarea ctx;
struct ccp_data src;
struct ccp_op op;
unsigned int ioffset, ooffset;
unsigned int digest_size;
int sb_count;
const void *init;
u64 block_size;
int ctx_size;
int ret;
if (sha->ctx_len != CCP_SHA_CTXSIZE)
switch (sha->type) {
case CCP_SHA_TYPE_1:
if (sha->ctx_len < SHA1_DIGEST_SIZE)
return -EINVAL;
block_size = SHA1_BLOCK_SIZE;
break;
case CCP_SHA_TYPE_224:
if (sha->ctx_len < SHA224_DIGEST_SIZE)
return -EINVAL;
block_size = SHA224_BLOCK_SIZE;
break;
case CCP_SHA_TYPE_256:
if (sha->ctx_len < SHA256_DIGEST_SIZE)
return -EINVAL;
block_size = SHA256_BLOCK_SIZE;
break;
default:
return -EINVAL;
}
if (!sha->ctx)
return -EINVAL;
if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
if (!sha->final && (sha->src_len & (block_size - 1)))
return -EINVAL;
if (!sha->src_len) {
const u8 *sha_zero;
/* The version 3 device can't handle zero-length input */
if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
/* Not final, just return */
if (!sha->final)
return 0;
if (!sha->src_len) {
unsigned int digest_len;
const u8 *sha_zero;
/* CCP can't do a zero length sha operation so the caller
* must buffer the data.
*/
if (sha->msg_bits)
return -EINVAL;
/* Not final, just return */
if (!sha->final)
return 0;
/* The CCP cannot perform zero-length sha operations so the
* caller is required to buffer data for the final operation.
* However, a sha operation for a message with a total length
* of zero is valid so known values are required to supply
* the result.
*/
switch (sha->type) {
case CCP_SHA_TYPE_1:
sha_zero = sha1_zero_message_hash;
break;
case CCP_SHA_TYPE_224:
sha_zero = sha224_zero_message_hash;
break;
case CCP_SHA_TYPE_256:
sha_zero = sha256_zero_message_hash;
break;
default:
return -EINVAL;
}
/* CCP can't do a zero length sha operation so the
* caller must buffer the data.
*/
if (sha->msg_bits)
return -EINVAL;
scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
sha->ctx_len, 1);
/* The CCP cannot perform zero-length sha operations
* so the caller is required to buffer data for the
* final operation. However, a sha operation for a
* message with a total length of zero is valid so
* known values are required to supply the result.
*/
switch (sha->type) {
case CCP_SHA_TYPE_1:
sha_zero = sha1_zero_message_hash;
digest_len = SHA1_DIGEST_SIZE;
break;
case CCP_SHA_TYPE_224:
sha_zero = sha224_zero_message_hash;
digest_len = SHA224_DIGEST_SIZE;
break;
case CCP_SHA_TYPE_256:
sha_zero = sha256_zero_message_hash;
digest_len = SHA256_DIGEST_SIZE;
break;
default:
return -EINVAL;
}
return 0;
scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
digest_len, 1);
return 0;
}
}
if (!sha->src)
return -EINVAL;
/* Set variables used throughout */
switch (sha->type) {
case CCP_SHA_TYPE_1:
digest_size = SHA1_DIGEST_SIZE;
init = (void *) ccp_sha1_init;
ctx_size = SHA1_DIGEST_SIZE;
sb_count = 1;
if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
ooffset = ioffset = CCP_SB_BYTES - SHA1_DIGEST_SIZE;
else
ooffset = ioffset = 0;
break;
case CCP_SHA_TYPE_224:
digest_size = SHA224_DIGEST_SIZE;
init = (void *) ccp_sha224_init;
ctx_size = SHA256_DIGEST_SIZE;
sb_count = 1;
ioffset = 0;
if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
ooffset = CCP_SB_BYTES - SHA224_DIGEST_SIZE;
else
ooffset = 0;
break;
case CCP_SHA_TYPE_256:
digest_size = SHA256_DIGEST_SIZE;
init = (void *) ccp_sha256_init;
ctx_size = SHA256_DIGEST_SIZE;
sb_count = 1;
ooffset = ioffset = 0;
break;
default:
ret = -EINVAL;
goto e_data;
}
BUILD_BUG_ON(CCP_SHA_SB_COUNT != 1);
/* For zero-length plaintext the src pointer is ignored;
* otherwise both parts must be valid
*/
if (sha->src_len && !sha->src)
return -EINVAL;
memset(&op, 0, sizeof(op));
op.cmd_q = cmd_q;
op.jobid = ccp_gen_jobid(cmd_q->ccp);
op.sb_ctx = cmd_q->sb_ctx;
op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
op.u.sha.type = sha->type;
op.u.sha.msg_bits = sha->msg_bits;
/* The SHA context fits in a single (32-byte) SB entry and
* must be in little endian format. Use the 256-bit byte swap
* passthru option to convert from big endian to little endian.
*/
ret = ccp_init_dm_workarea(&ctx, cmd_q,
CCP_SHA_SB_COUNT * CCP_SB_BYTES,
ret = ccp_init_dm_workarea(&ctx, cmd_q, sb_count * CCP_SB_BYTES,
DMA_BIDIRECTIONAL);
if (ret)
return ret;
if (sha->first) {
const __be32 *init;
switch (sha->type) {
case CCP_SHA_TYPE_1:
init = ccp_sha1_init;
break;
case CCP_SHA_TYPE_224:
init = ccp_sha224_init;
break;
case CCP_SHA_TYPE_256:
init = ccp_sha256_init;
memcpy(ctx.address + ioffset, init, ctx_size);
break;
default:
ret = -EINVAL;
goto e_ctx;
}
memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
} else {
ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
/* Restore the context */
ccp_set_dm_area(&ctx, 0, sha->ctx, 0,
sb_count * CCP_SB_BYTES);
}
ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
......@@ -1037,24 +1096,33 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
goto e_ctx;
}
/* Send data to the CCP SHA engine */
ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
if (ret)
goto e_ctx;
if (sha->src) {
/* Send data to the CCP SHA engine; block_size is set above */
ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
block_size, DMA_TO_DEVICE);
if (ret)
goto e_ctx;
while (src.sg_wa.bytes_left) {
ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
if (sha->final && !src.sg_wa.bytes_left)
op.eom = 1;
while (src.sg_wa.bytes_left) {
ccp_prepare_data(&src, NULL, &op, block_size, false);
if (sha->final && !src.sg_wa.bytes_left)
op.eom = 1;
ret = cmd_q->ccp->vdata->perform->sha(&op);
if (ret) {
cmd->engine_error = cmd_q->cmd_error;
goto e_data;
}
ccp_process_data(&src, NULL, &op);
}
} else {
op.eom = 1;
ret = cmd_q->ccp->vdata->perform->sha(&op);
if (ret) {
cmd->engine_error = cmd_q->cmd_error;
goto e_data;
}
ccp_process_data(&src, NULL, &op);
}
/* Retrieve the SHA context - convert from LE to BE using
......@@ -1067,32 +1135,31 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
goto e_data;
}
ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
if (sha->final && sha->opad) {
/* HMAC operation, recursively perform final SHA */
struct ccp_cmd hmac_cmd;
struct scatterlist sg;
u64 block_size, digest_size;
u8 *hmac_buf;
if (sha->final) {
/* Finishing up, so get the digest */
switch (sha->type) {
case CCP_SHA_TYPE_1:
block_size = SHA1_BLOCK_SIZE;
digest_size = SHA1_DIGEST_SIZE;
break;
case CCP_SHA_TYPE_224:
block_size = SHA224_BLOCK_SIZE;
digest_size = SHA224_DIGEST_SIZE;
break;
case CCP_SHA_TYPE_256:
block_size = SHA256_BLOCK_SIZE;
digest_size = SHA256_DIGEST_SIZE;
ccp_get_dm_area(&ctx, ooffset,
sha->ctx, 0,
digest_size);
break;
default:
ret = -EINVAL;
goto e_data;
goto e_ctx;
}
} else {
/* Stash the context */
ccp_get_dm_area(&ctx, 0, sha->ctx, 0,
sb_count * CCP_SB_BYTES);
}
if (sha->final && sha->opad) {
/* HMAC operation, recursively perform final SHA */
struct ccp_cmd hmac_cmd;
struct scatterlist sg;
u8 *hmac_buf;
if (sha->opad_len != block_size) {
ret = -EINVAL;
......@@ -1107,7 +1174,18 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
sg_init_one(&sg, hmac_buf, block_size + digest_size);
scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
memcpy(hmac_buf + block_size, ctx.address, digest_size);
switch (sha->type) {
case CCP_SHA_TYPE_1:
case CCP_SHA_TYPE_224:
case CCP_SHA_TYPE_256:
memcpy(hmac_buf + block_size,
ctx.address + ooffset,
digest_size);
break;
default:
ret = -EINVAL;
goto e_ctx;
}
memset(&hmac_cmd, 0, sizeof(hmac_cmd));
hmac_cmd.engine = CCP_ENGINE_SHA;
......@@ -1130,7 +1208,8 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
}
e_data:
ccp_free_data(&src, cmd_q);
if (sha->src)
ccp_free_data(&src, cmd_q);
e_ctx:
ccp_dm_free(&ctx);
......@@ -1261,7 +1340,7 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
struct ccp_op op;
bool in_place = false;
unsigned int i;
int ret;
int ret = 0;
if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
return -EINVAL;
......@@ -1280,7 +1359,7 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
memset(&op, 0, sizeof(op));
op.cmd_q = cmd_q;
op.jobid = ccp_gen_jobid(cmd_q->ccp);
op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
/* Load the mask */
......@@ -1469,7 +1548,7 @@ static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
memset(&op, 0, sizeof(op));
op.cmd_q = cmd_q;
op.jobid = ccp_gen_jobid(cmd_q->ccp);
op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
/* Concatenate the modulus and the operands. Both the modulus and
* the operands must be in little endian format. Since the input
......@@ -1594,7 +1673,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
memset(&op, 0, sizeof(op));
op.cmd_q = cmd_q;
op.jobid = ccp_gen_jobid(cmd_q->ccp);
op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
/* Concatenate the modulus and the operands. Both the modulus and
* the operands must be in little endian format. Since the input
......@@ -1632,7 +1711,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
goto e_src;
src.address += CCP_ECC_OPERAND_SIZE;
/* Set the first point Z coordianate to 1 */
/* Set the first point Z coordinate to 1 */
*src.address = 0x01;
src.address += CCP_ECC_OPERAND_SIZE;
......@@ -1651,7 +1730,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
goto e_src;
src.address += CCP_ECC_OPERAND_SIZE;
/* Set the second point Z coordianate to 1 */
/* Set the second point Z coordinate to 1 */
*src.address = 0x01;
src.address += CCP_ECC_OPERAND_SIZE;
} else {
......
......@@ -141,10 +141,11 @@ static void ccp_free_irqs(struct ccp_device *ccp)
free_irq(ccp_pci->msix[ccp_pci->msix_count].vector,
dev);
pci_disable_msix(pdev);
} else {
} else if (ccp->irq) {
free_irq(ccp->irq, dev);
pci_disable_msi(pdev);
}
ccp->irq = 0;
}
static int ccp_find_mmio_area(struct ccp_device *ccp)
......@@ -229,6 +230,8 @@ static int ccp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
dev_set_drvdata(dev, ccp);
if (ccp->vdata->setup)
ccp->vdata->setup(ccp);
ret = ccp->vdata->perform->init(ccp);
if (ret)
goto e_iomap;
......@@ -321,6 +324,7 @@ static int ccp_pci_resume(struct pci_dev *pdev)
static const struct pci_device_id ccp_pci_table[] = {
{ PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&ccpv3 },
{ PCI_VDEVICE(AMD, 0x1456), (kernel_ulong_t)&ccpv5 },
/* Last entry must be zero */
{ 0, }
};
......
......@@ -238,9 +238,6 @@ struct ccp_xts_aes_engine {
};
/***** SHA engine *****/
#define CCP_SHA_BLOCKSIZE SHA256_BLOCK_SIZE
#define CCP_SHA_CTXSIZE SHA256_DIGEST_SIZE
/**
* ccp_sha_type - type of SHA operation
*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment