Commit b2f46fd8 authored by Dan Williams's avatar Dan Williams

async_tx: add support for asynchronous GF multiplication

[ Based on an original patch by Yuri Tikhonov ]

This adds support for doing asynchronous GF multiplication by adding
two additional functions to the async_tx API:

 async_gen_syndrome() does simultaneous XOR and Galois field
    multiplication of sources.

 async_syndrome_val() validates the given source buffers against known P
    and Q values.

When a request is made to run async_pq against more than the hardware
maximum number of supported sources we need to reuse the previous
generated P and Q values as sources into the next operation.  Care must
be taken to remove Q from P' and P from Q'.  For example to perform a 5
source pq op with hardware that only supports 4 sources at a time the
following approach is taken:

p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08}))
p', q' = PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10}))

p' = p + q + q + src4 = p + src4
q' = {00}*p + {01}*q + {00}*q + {10}*src4 = q + {10}*src4

Note: 4 is the minimum acceptable maxpq otherwise we punt to
synchronous-software path.

The DMA_PREP_CONTINUE flag indicates to the driver to reuse p and q as
sources (in the above manner) and fill the remaining slots up to maxpq
with the new sources/coefficients.

Note1: Some devices have native support for P+Q continuation and can skip
this extra work.  Devices with this capability can advertise it with
dma_set_maxpq.  It is up to each driver how to handle the
DMA_PREP_CONTINUE flag.

Note2: The api supports disabling the generation of P when generating Q,
this is ignored by the synchronous path but is implemented by some dma
devices to save unnecessary writes.  In this case the continuation
algorithm is simplified to only reuse Q as a source.

Cc: H. Peter Anvin <hpa@zytor.com>
Cc: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: default avatarYuri Tikhonov <yur@emcraft.com>
Signed-off-by: default avatarIlya Yanok <yanok@emcraft.com>
Reviewed-by: default avatarAndre Noll <maan@systemlinux.org>
Acked-by: default avatarMaciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent 95475e57
...@@ -64,6 +64,9 @@ xor - xor a series of source buffers and write the result to a ...@@ -64,6 +64,9 @@ xor - xor a series of source buffers and write the result to a
xor_val - xor a series of source buffers and set a flag if the xor_val - xor a series of source buffers and set a flag if the
result is zero. The implementation attempts to prevent result is zero. The implementation attempts to prevent
writes to memory writes to memory
pq - generate the p+q (raid6 syndrome) from a series of source buffers
pq_val - validate that a p and or q buffer are in sync with a given series of
sources
3.3 Descriptor management: 3.3 Descriptor management:
The return value is non-NULL and points to a 'descriptor' when the operation The return value is non-NULL and points to a 'descriptor' when the operation
......
...@@ -506,7 +506,7 @@ void __init iop13xx_platform_init(void) ...@@ -506,7 +506,7 @@ void __init iop13xx_platform_init(void)
dma_cap_set(DMA_MEMSET, plat_data->cap_mask); dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask); dma_cap_set(DMA_PQ, plat_data->cap_mask);
dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask); dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask);
dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask); dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask);
break; break;
......
...@@ -14,3 +14,7 @@ config ASYNC_MEMSET ...@@ -14,3 +14,7 @@ config ASYNC_MEMSET
tristate tristate
select ASYNC_CORE select ASYNC_CORE
config ASYNC_PQ
tristate
select ASYNC_CORE
...@@ -2,3 +2,4 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o ...@@ -2,3 +2,4 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o
obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
obj-$(CONFIG_ASYNC_XOR) += async_xor.o obj-$(CONFIG_ASYNC_XOR) += async_xor.o
obj-$(CONFIG_ASYNC_PQ) += async_pq.o
This diff is collapsed.
...@@ -62,7 +62,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, ...@@ -62,7 +62,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
while (src_cnt) { while (src_cnt) {
submit->flags = flags_orig; submit->flags = flags_orig;
dma_flags = 0; dma_flags = 0;
xor_src_cnt = min(src_cnt, dma->max_xor); xor_src_cnt = min(src_cnt, (int)dma->max_xor);
/* if we are submitting additional xors, leave the chain open, /* if we are submitting additional xors, leave the chain open,
* clear the callback parameters, and leave the destination * clear the callback parameters, and leave the destination
* buffer mapped * buffer mapped
......
...@@ -646,6 +646,10 @@ int dma_async_device_register(struct dma_device *device) ...@@ -646,6 +646,10 @@ int dma_async_device_register(struct dma_device *device)
!device->device_prep_dma_xor); !device->device_prep_dma_xor);
BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) && BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) &&
!device->device_prep_dma_xor_val); !device->device_prep_dma_xor_val);
BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) &&
!device->device_prep_dma_pq);
BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
!device->device_prep_dma_pq_val);
BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
!device->device_prep_dma_memset); !device->device_prep_dma_memset);
BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
......
...@@ -1257,7 +1257,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev) ...@@ -1257,7 +1257,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
"( %s%s%s%s%s%s%s%s%s%s)\n", "( %s%s%s%s%s%s%s%s%s%s)\n",
dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "", dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "", dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
......
...@@ -185,5 +185,14 @@ async_memset(struct page *dest, int val, unsigned int offset, ...@@ -185,5 +185,14 @@ async_memset(struct page *dest, int val, unsigned int offset,
struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit); struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt,
size_t len, struct async_submit_ctl *submit);
struct dma_async_tx_descriptor *
async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
size_t len, enum sum_check_flags *pqres, struct page *spare,
struct async_submit_ctl *submit);
void async_tx_quiesce(struct dma_async_tx_descriptor **tx); void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
#endif /* _ASYNC_TX_H_ */ #endif /* _ASYNC_TX_H_ */
...@@ -52,7 +52,7 @@ enum dma_status { ...@@ -52,7 +52,7 @@ enum dma_status {
enum dma_transaction_type { enum dma_transaction_type {
DMA_MEMCPY, DMA_MEMCPY,
DMA_XOR, DMA_XOR,
DMA_PQ_XOR, DMA_PQ,
DMA_DUAL_XOR, DMA_DUAL_XOR,
DMA_PQ_UPDATE, DMA_PQ_UPDATE,
DMA_XOR_VAL, DMA_XOR_VAL,
...@@ -70,20 +70,28 @@ enum dma_transaction_type { ...@@ -70,20 +70,28 @@ enum dma_transaction_type {
/** /**
* enum dma_ctrl_flags - DMA flags to augment operation preparation, * enum dma_ctrl_flags - DMA flags to augment operation preparation,
* control completion, and communicate status. * control completion, and communicate status.
* @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
* this transaction * this transaction
* @DMA_CTRL_ACK - the descriptor cannot be reused until the client * @DMA_CTRL_ACK - the descriptor cannot be reused until the client
* acknowledges receipt, i.e. has has a chance to establish any * acknowledges receipt, i.e. has has a chance to establish any dependency
* dependency chains * chains
* @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
* @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
* @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
* @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
* @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
* sources that were the result of a previous operation, in the case of a PQ
* operation it continues the calculation with new sources
*/ */
enum dma_ctrl_flags { enum dma_ctrl_flags {
DMA_PREP_INTERRUPT = (1 << 0), DMA_PREP_INTERRUPT = (1 << 0),
DMA_CTRL_ACK = (1 << 1), DMA_CTRL_ACK = (1 << 1),
DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2), DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),
DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
DMA_PREP_PQ_DISABLE_P = (1 << 4),
DMA_PREP_PQ_DISABLE_Q = (1 << 5),
DMA_PREP_CONTINUE = (1 << 6),
}; };
/** /**
...@@ -226,6 +234,7 @@ struct dma_async_tx_descriptor { ...@@ -226,6 +234,7 @@ struct dma_async_tx_descriptor {
* @global_node: list_head for global dma_device_list * @global_node: list_head for global dma_device_list
* @cap_mask: one or more dma_capability flags * @cap_mask: one or more dma_capability flags
* @max_xor: maximum number of xor sources, 0 if no capability * @max_xor: maximum number of xor sources, 0 if no capability
* @max_pq: maximum number of PQ sources and PQ-continue capability
* @dev_id: unique device ID * @dev_id: unique device ID
* @dev: struct device reference for dma mapping api * @dev: struct device reference for dma mapping api
* @device_alloc_chan_resources: allocate resources and return the * @device_alloc_chan_resources: allocate resources and return the
...@@ -234,6 +243,8 @@ struct dma_async_tx_descriptor { ...@@ -234,6 +243,8 @@ struct dma_async_tx_descriptor {
* @device_prep_dma_memcpy: prepares a memcpy operation * @device_prep_dma_memcpy: prepares a memcpy operation
* @device_prep_dma_xor: prepares a xor operation * @device_prep_dma_xor: prepares a xor operation
* @device_prep_dma_xor_val: prepares a xor validation operation * @device_prep_dma_xor_val: prepares a xor validation operation
* @device_prep_dma_pq: prepares a pq operation
* @device_prep_dma_pq_val: prepares a pqzero_sum operation
* @device_prep_dma_memset: prepares a memset operation * @device_prep_dma_memset: prepares a memset operation
* @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
* @device_prep_slave_sg: prepares a slave dma operation * @device_prep_slave_sg: prepares a slave dma operation
...@@ -248,7 +259,9 @@ struct dma_device { ...@@ -248,7 +259,9 @@ struct dma_device {
struct list_head channels; struct list_head channels;
struct list_head global_node; struct list_head global_node;
dma_cap_mask_t cap_mask; dma_cap_mask_t cap_mask;
int max_xor; unsigned short max_xor;
unsigned short max_pq;
#define DMA_HAS_PQ_CONTINUE (1 << 15)
int dev_id; int dev_id;
struct device *dev; struct device *dev;
...@@ -265,6 +278,14 @@ struct dma_device { ...@@ -265,6 +278,14 @@ struct dma_device {
struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)( struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
size_t len, enum sum_check_flags *result, unsigned long flags); size_t len, enum sum_check_flags *result, unsigned long flags);
struct dma_async_tx_descriptor *(*device_prep_dma_pq)(
struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
unsigned int src_cnt, const unsigned char *scf,
size_t len, unsigned long flags);
struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)(
struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
unsigned int src_cnt, const unsigned char *scf, size_t len,
enum sum_check_flags *pqres, unsigned long flags);
struct dma_async_tx_descriptor *(*device_prep_dma_memset)( struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
struct dma_chan *chan, dma_addr_t dest, int value, size_t len, struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
unsigned long flags); unsigned long flags);
...@@ -283,6 +304,60 @@ struct dma_device { ...@@ -283,6 +304,60 @@ struct dma_device {
void (*device_issue_pending)(struct dma_chan *chan); void (*device_issue_pending)(struct dma_chan *chan);
}; };
static inline void
dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
{
dma->max_pq = maxpq;
if (has_pq_continue)
dma->max_pq |= DMA_HAS_PQ_CONTINUE;
}
static inline bool dmaf_continue(enum dma_ctrl_flags flags)
{
return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE;
}
static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags)
{
enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P;
return (flags & mask) == mask;
}
static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
{
return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
}
static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
{
return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
}
/* dma_maxpq - reduce maxpq in the face of continued operations
* @dma - dma device with PQ capability
* @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set
*
* When an engine does not support native continuation we need 3 extra
* source slots to reuse P and Q with the following coefficients:
* 1/ {00} * P : remove P from Q', but use it as a source for P'
* 2/ {01} * Q : use Q to continue Q' calculation
* 3/ {00} * Q : subtract Q from P' to cancel (2)
*
* In the case where P is disabled we only need 1 extra source:
* 1/ {01} * Q : use Q to continue Q' calculation
*/
static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags)
{
if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags))
return dma_dev_to_maxpq(dma);
else if (dmaf_p_disabled_continue(flags))
return dma_dev_to_maxpq(dma) - 1;
else if (dmaf_continue(flags))
return dma_dev_to_maxpq(dma) - 3;
BUG();
}
/* --- public DMA engine API --- */ /* --- public DMA engine API --- */
#ifdef CONFIG_DMA_ENGINE #ifdef CONFIG_DMA_ENGINE
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment