Commit 5554b359 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/async_tx

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/djbw/async_tx: (24 commits)
  I/OAT: I/OAT version 3.0 support
  I/OAT: tcp_dma_copybreak default value dependent on I/OAT version
  I/OAT: Add watchdog/reset functionality to ioatdma
  iop_adma: cleanup iop_chan_xor_slot_count
  iop_adma: document how to calculate the minimum descriptor pool size
  iop_adma: directly reclaim descriptors on allocation failure
  async_tx: make async_tx_test_ack a boolean routine
  async_tx: remove depend_tx from async_tx_sync_epilog
  async_tx: export async_tx_quiesce
  async_tx: fix handling of the "out of descriptor" condition in async_xor
  async_tx: ensure the xor destination buffer remains dma-mapped
  async_tx: list_for_each_entry_rcu() cleanup
  dmaengine: Driver for the Synopsys DesignWare DMA controller
  dmaengine: Add slave DMA interface
  dmaengine: add DMA_COMPL_SKIP_{SRC,DEST}_UNMAP flags to control dma unmap
  dmaengine: Add dma_client parameter to device_alloc_chan_resources
  dmatest: Simple DMA memcpy test client
  dmaengine: DMA engine driver for Marvell XOR engine
  iop-adma: fix platform driver hotplug/coldplug
  dmaengine: track the number of clients using a channel
  ...

Fixed up conflict in drivers/dca/dca-sysfs.c manually
parents 0f6e38a6 7f1b358a
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
*/ */
#include <linux/clk.h> #include <linux/clk.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/dw_dmac.h>
#include <linux/fb.h> #include <linux/fb.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/platform_device.h> #include <linux/platform_device.h>
...@@ -594,6 +595,17 @@ static void __init genclk_init_parent(struct clk *clk) ...@@ -594,6 +595,17 @@ static void __init genclk_init_parent(struct clk *clk)
clk->parent = parent; clk->parent = parent;
} }
static struct dw_dma_platform_data dw_dmac0_data = {
.nr_channels = 3,
};
static struct resource dw_dmac0_resource[] = {
PBMEM(0xff200000),
IRQ(2),
};
DEFINE_DEV_DATA(dw_dmac, 0);
DEV_CLK(hclk, dw_dmac0, hsb, 10);
/* -------------------------------------------------------------------- /* --------------------------------------------------------------------
* System peripherals * System peripherals
* -------------------------------------------------------------------- */ * -------------------------------------------------------------------- */
...@@ -708,17 +720,6 @@ static struct clk pico_clk = { ...@@ -708,17 +720,6 @@ static struct clk pico_clk = {
.users = 1, .users = 1,
}; };
static struct resource dmaca0_resource[] = {
{
.start = 0xff200000,
.end = 0xff20ffff,
.flags = IORESOURCE_MEM,
},
IRQ(2),
};
DEFINE_DEV(dmaca, 0);
DEV_CLK(hclk, dmaca0, hsb, 10);
/* -------------------------------------------------------------------- /* --------------------------------------------------------------------
* HMATRIX * HMATRIX
* -------------------------------------------------------------------- */ * -------------------------------------------------------------------- */
...@@ -831,7 +832,7 @@ void __init at32_add_system_devices(void) ...@@ -831,7 +832,7 @@ void __init at32_add_system_devices(void)
platform_device_register(&at32_eic0_device); platform_device_register(&at32_eic0_device);
platform_device_register(&smc0_device); platform_device_register(&smc0_device);
platform_device_register(&pdc_device); platform_device_register(&pdc_device);
platform_device_register(&dmaca0_device); platform_device_register(&dw_dmac0_device);
platform_device_register(&at32_tcb0_device); platform_device_register(&at32_tcb0_device);
platform_device_register(&at32_tcb1_device); platform_device_register(&at32_tcb1_device);
...@@ -2032,7 +2033,7 @@ struct clk *at32_clock_list[] = { ...@@ -2032,7 +2033,7 @@ struct clk *at32_clock_list[] = {
&smc0_mck, &smc0_mck,
&pdc_hclk, &pdc_hclk,
&pdc_pclk, &pdc_pclk,
&dmaca0_hclk, &dw_dmac0_hclk,
&pico_clk, &pico_clk,
&pio0_mck, &pio0_mck,
&pio1_mck, &pio1_mck,
......
...@@ -73,15 +73,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, ...@@ -73,15 +73,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
pr_debug("%s: (sync) len: %zu\n", __func__, len); pr_debug("%s: (sync) len: %zu\n", __func__, len);
/* wait for any prerequisite operations */ /* wait for any prerequisite operations */
if (depend_tx) { async_tx_quiesce(&depend_tx);
/* if ack is already set then we cannot be sure
* we are referring to the correct operation
*/
BUG_ON(async_tx_test_ack(depend_tx));
if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
panic("%s: DMA_ERROR waiting for depend_tx\n",
__func__);
}
dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
src_buf = kmap_atomic(src, KM_USER1) + src_offset; src_buf = kmap_atomic(src, KM_USER1) + src_offset;
...@@ -91,7 +83,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, ...@@ -91,7 +83,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
kunmap_atomic(dest_buf, KM_USER0); kunmap_atomic(dest_buf, KM_USER0);
kunmap_atomic(src_buf, KM_USER1); kunmap_atomic(src_buf, KM_USER1);
async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); async_tx_sync_epilog(cb_fn, cb_param);
} }
return tx; return tx;
......
...@@ -72,19 +72,11 @@ async_memset(struct page *dest, int val, unsigned int offset, ...@@ -72,19 +72,11 @@ async_memset(struct page *dest, int val, unsigned int offset,
dest_buf = (void *) (((char *) page_address(dest)) + offset); dest_buf = (void *) (((char *) page_address(dest)) + offset);
/* wait for any prerequisite operations */ /* wait for any prerequisite operations */
if (depend_tx) { async_tx_quiesce(&depend_tx);
/* if ack is already set then we cannot be sure
* we are referring to the correct operation
*/
BUG_ON(depend_tx->ack);
if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
panic("%s: DMA_ERROR waiting for depend_tx\n",
__func__);
}
memset(dest_buf, val, len); memset(dest_buf, val, len);
async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); async_tx_sync_epilog(cb_fn, cb_param);
} }
return tx; return tx;
......
...@@ -295,7 +295,7 @@ dma_channel_add_remove(struct dma_client *client, ...@@ -295,7 +295,7 @@ dma_channel_add_remove(struct dma_client *client,
case DMA_RESOURCE_REMOVED: case DMA_RESOURCE_REMOVED:
found = 0; found = 0;
spin_lock_irqsave(&async_tx_lock, flags); spin_lock_irqsave(&async_tx_lock, flags);
list_for_each_entry_rcu(ref, &async_tx_master_list, node) list_for_each_entry(ref, &async_tx_master_list, node)
if (ref->chan == chan) { if (ref->chan == chan) {
/* permit backing devices to go away */ /* permit backing devices to go away */
dma_chan_put(ref->chan); dma_chan_put(ref->chan);
...@@ -608,23 +608,34 @@ async_trigger_callback(enum async_tx_flags flags, ...@@ -608,23 +608,34 @@ async_trigger_callback(enum async_tx_flags flags,
pr_debug("%s: (sync)\n", __func__); pr_debug("%s: (sync)\n", __func__);
/* wait for any prerequisite operations */ /* wait for any prerequisite operations */
if (depend_tx) { async_tx_quiesce(&depend_tx);
/* if ack is already set then we cannot be sure
* we are referring to the correct operation
*/
BUG_ON(async_tx_test_ack(depend_tx));
if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
panic("%s: DMA_ERROR waiting for depend_tx\n",
__func__);
}
async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); async_tx_sync_epilog(cb_fn, cb_param);
} }
return tx; return tx;
} }
EXPORT_SYMBOL_GPL(async_trigger_callback); EXPORT_SYMBOL_GPL(async_trigger_callback);
/**
* async_tx_quiesce - ensure tx is complete and freeable upon return
* @tx - transaction to quiesce
*/
void async_tx_quiesce(struct dma_async_tx_descriptor **tx)
{
if (*tx) {
/* if ack is already set then we cannot be sure
* we are referring to the correct operation
*/
BUG_ON(async_tx_test_ack(*tx));
if (dma_wait_for_async_tx(*tx) == DMA_ERROR)
panic("DMA_ERROR waiting for transaction\n");
async_tx_ack(*tx);
*tx = NULL;
}
}
EXPORT_SYMBOL_GPL(async_tx_quiesce);
module_init(async_tx_init); module_init(async_tx_init);
module_exit(async_tx_exit); module_exit(async_tx_exit);
......
...@@ -35,74 +35,121 @@ ...@@ -35,74 +35,121 @@
* when CONFIG_DMA_ENGINE=n * when CONFIG_DMA_ENGINE=n
*/ */
static __always_inline struct dma_async_tx_descriptor * static __always_inline struct dma_async_tx_descriptor *
do_async_xor(struct dma_device *device, do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
struct dma_chan *chan, struct page *dest, struct page **src_list, unsigned int offset, int src_cnt, size_t len,
unsigned int offset, unsigned int src_cnt, size_t len, enum async_tx_flags flags,
enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_param) dma_async_tx_callback cb_fn, void *cb_param)
{ {
dma_addr_t dma_dest; struct dma_device *dma = chan->device;
dma_addr_t *dma_src = (dma_addr_t *) src_list; dma_addr_t *dma_src = (dma_addr_t *) src_list;
struct dma_async_tx_descriptor *tx; struct dma_async_tx_descriptor *tx = NULL;
int src_off = 0;
int i; int i;
unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; dma_async_tx_callback _cb_fn;
void *_cb_param;
pr_debug("%s: len: %zu\n", __func__, len); enum async_tx_flags async_flags;
enum dma_ctrl_flags dma_flags;
dma_dest = dma_map_page(device->dev, dest, offset, len, int xor_src_cnt;
DMA_FROM_DEVICE); dma_addr_t dma_dest;
dma_dest = dma_map_page(dma->dev, dest, offset, len, DMA_FROM_DEVICE);
for (i = 0; i < src_cnt; i++) for (i = 0; i < src_cnt; i++)
dma_src[i] = dma_map_page(device->dev, src_list[i], offset, dma_src[i] = dma_map_page(dma->dev, src_list[i], offset,
len, DMA_TO_DEVICE); len, DMA_TO_DEVICE);
/* Since we have clobbered the src_list we are committed while (src_cnt) {
* to doing this asynchronously. Drivers force forward progress async_flags = flags;
* in case they can not provide a descriptor dma_flags = 0;
*/ xor_src_cnt = min(src_cnt, dma->max_xor);
tx = device->device_prep_dma_xor(chan, dma_dest, dma_src, src_cnt, len, /* if we are submitting additional xors, leave the chain open,
dma_prep_flags); * clear the callback parameters, and leave the destination
if (!tx) { * buffer mapped
if (depend_tx) */
dma_wait_for_async_tx(depend_tx); if (src_cnt > xor_src_cnt) {
async_flags &= ~ASYNC_TX_ACK;
while (!tx) dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
tx = device->device_prep_dma_xor(chan, dma_dest, _cb_fn = NULL;
dma_src, src_cnt, len, _cb_param = NULL;
dma_prep_flags); } else {
} _cb_fn = cb_fn;
_cb_param = cb_param;
}
if (_cb_fn)
dma_flags |= DMA_PREP_INTERRUPT;
async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); /* Since we have clobbered the src_list we are committed
* to doing this asynchronously. Drivers force forward progress
* in case they can not provide a descriptor
*/
tx = dma->device_prep_dma_xor(chan, dma_dest, &dma_src[src_off],
xor_src_cnt, len, dma_flags);
if (unlikely(!tx))
async_tx_quiesce(&depend_tx);
/* spin wait for the preceeding transactions to complete */
while (unlikely(!tx)) {
dma_async_issue_pending(chan);
tx = dma->device_prep_dma_xor(chan, dma_dest,
&dma_src[src_off],
xor_src_cnt, len,
dma_flags);
}
async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn,
_cb_param);
depend_tx = tx;
flags |= ASYNC_TX_DEP_ACK;
if (src_cnt > xor_src_cnt) {
/* drop completed sources */
src_cnt -= xor_src_cnt;
src_off += xor_src_cnt;
/* use the intermediate result a source */
dma_src[--src_off] = dma_dest;
src_cnt++;
} else
break;
}
return tx; return tx;
} }
static void static void
do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
unsigned int src_cnt, size_t len, enum async_tx_flags flags, int src_cnt, size_t len, enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx, dma_async_tx_callback cb_fn, void *cb_param)
dma_async_tx_callback cb_fn, void *cb_param)
{ {
void *_dest;
int i; int i;
int xor_src_cnt;
pr_debug("%s: len: %zu\n", __func__, len); int src_off = 0;
void *dest_buf;
void **srcs = (void **) src_list;
/* reuse the 'src_list' array to convert to buffer pointers */ /* reuse the 'src_list' array to convert to buffer pointers */
for (i = 0; i < src_cnt; i++) for (i = 0; i < src_cnt; i++)
src_list[i] = (struct page *) srcs[i] = page_address(src_list[i]) + offset;
(page_address(src_list[i]) + offset);
/* set destination address */ /* set destination address */
_dest = page_address(dest) + offset; dest_buf = page_address(dest) + offset;
if (flags & ASYNC_TX_XOR_ZERO_DST) if (flags & ASYNC_TX_XOR_ZERO_DST)
memset(_dest, 0, len); memset(dest_buf, 0, len);
xor_blocks(src_cnt, len, _dest, while (src_cnt > 0) {
(void **) src_list); /* process up to 'MAX_XOR_BLOCKS' sources */
xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
xor_blocks(xor_src_cnt, len, dest_buf, &srcs[src_off]);
async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); /* drop completed sources */
src_cnt -= xor_src_cnt;
src_off += xor_src_cnt;
}
async_tx_sync_epilog(cb_fn, cb_param);
} }
/** /**
...@@ -132,106 +179,34 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, ...@@ -132,106 +179,34 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR, struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR,
&dest, 1, src_list, &dest, 1, src_list,
src_cnt, len); src_cnt, len);
struct dma_device *device = chan ? chan->device : NULL;
struct dma_async_tx_descriptor *tx = NULL;
dma_async_tx_callback _cb_fn;
void *_cb_param;
unsigned long local_flags;
int xor_src_cnt;
int i = 0, src_off = 0;
BUG_ON(src_cnt <= 1); BUG_ON(src_cnt <= 1);
while (src_cnt) { if (chan) {
local_flags = flags; /* run the xor asynchronously */
if (device) { /* run the xor asynchronously */ pr_debug("%s (async): len: %zu\n", __func__, len);
xor_src_cnt = min(src_cnt, device->max_xor);
/* if we are submitting additional xors
* only set the callback on the last transaction
*/
if (src_cnt > xor_src_cnt) {
local_flags &= ~ASYNC_TX_ACK;
_cb_fn = NULL;
_cb_param = NULL;
} else {
_cb_fn = cb_fn;
_cb_param = cb_param;
}
tx = do_async_xor(device, chan, dest,
&src_list[src_off], offset,
xor_src_cnt, len, local_flags,
depend_tx, _cb_fn, _cb_param);
} else { /* run the xor synchronously */
/* in the sync case the dest is an implied source
* (assumes the dest is at the src_off index)
*/
if (flags & ASYNC_TX_XOR_DROP_DST) {
src_cnt--;
src_off++;
}
/* process up to 'MAX_XOR_BLOCKS' sources */
xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
/* if we are submitting additional xors
* only set the callback on the last transaction
*/
if (src_cnt > xor_src_cnt) {
local_flags &= ~ASYNC_TX_ACK;
_cb_fn = NULL;
_cb_param = NULL;
} else {
_cb_fn = cb_fn;
_cb_param = cb_param;
}
/* wait for any prerequisite operations */
if (depend_tx) {
/* if ack is already set then we cannot be sure
* we are referring to the correct operation
*/
BUG_ON(async_tx_test_ack(depend_tx));
if (dma_wait_for_async_tx(depend_tx) ==
DMA_ERROR)
panic("%s: DMA_ERROR waiting for "
"depend_tx\n",
__func__);
}
do_sync_xor(dest, &src_list[src_off], offset,
xor_src_cnt, len, local_flags, depend_tx,
_cb_fn, _cb_param);
}
/* the previous tx is hidden from the client, return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
* so ack it flags, depend_tx, cb_fn, cb_param);
*/ } else {
if (i && depend_tx) /* run the xor synchronously */
async_tx_ack(depend_tx); pr_debug("%s (sync): len: %zu\n", __func__, len);
depend_tx = tx; /* in the sync case the dest is an implied source
* (assumes the dest is the first source)
*/
if (flags & ASYNC_TX_XOR_DROP_DST) {
src_cnt--;
src_list++;
}
if (src_cnt > xor_src_cnt) { /* wait for any prerequisite operations */
/* drop completed sources */ async_tx_quiesce(&depend_tx);
src_cnt -= xor_src_cnt;
src_off += xor_src_cnt;
/* unconditionally preserve the destination */ do_sync_xor(dest, src_list, offset, src_cnt, len,
flags &= ~ASYNC_TX_XOR_ZERO_DST; flags, cb_fn, cb_param);
/* use the intermediate result a source, but remember return NULL;
* it's dropped, because it's implied, in the sync case
*/
src_list[--src_off] = dest;
src_cnt++;
flags |= ASYNC_TX_XOR_DROP_DST;
} else
src_cnt = 0;
i++;
} }
return tx;
} }
EXPORT_SYMBOL_GPL(async_xor); EXPORT_SYMBOL_GPL(async_xor);
...@@ -285,11 +260,11 @@ async_xor_zero_sum(struct page *dest, struct page **src_list, ...@@ -285,11 +260,11 @@ async_xor_zero_sum(struct page *dest, struct page **src_list,
tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt, tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt,
len, result, len, result,
dma_prep_flags); dma_prep_flags);
if (!tx) { if (unlikely(!tx)) {
if (depend_tx) async_tx_quiesce(&depend_tx);
dma_wait_for_async_tx(depend_tx);
while (!tx) while (!tx)
dma_async_issue_pending(chan);
tx = device->device_prep_dma_zero_sum(chan, tx = device->device_prep_dma_zero_sum(chan,
dma_src, src_cnt, len, result, dma_src, src_cnt, len, result,
dma_prep_flags); dma_prep_flags);
...@@ -307,18 +282,11 @@ async_xor_zero_sum(struct page *dest, struct page **src_list, ...@@ -307,18 +282,11 @@ async_xor_zero_sum(struct page *dest, struct page **src_list,
tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags, tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags,
depend_tx, NULL, NULL); depend_tx, NULL, NULL);
if (tx) { async_tx_quiesce(&tx);
if (dma_wait_for_async_tx(tx) == DMA_ERROR)
panic("%s: DMA_ERROR waiting for tx\n",
__func__);
async_tx_ack(tx);
}
*result = page_is_zero(dest, offset, len) ? 0 : 1; *result = page_is_zero(dest, offset, len) ? 0 : 1;
tx = NULL; async_tx_sync_epilog(cb_fn, cb_param);
async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
} }
return tx; return tx;
......
...@@ -28,13 +28,29 @@ ...@@ -28,13 +28,29 @@
#include <linux/device.h> #include <linux/device.h>
#include <linux/dca.h> #include <linux/dca.h>
MODULE_LICENSE("GPL"); #define DCA_VERSION "1.4"
/* For now we're assuming a single, global, DCA provider for the system. */ MODULE_VERSION(DCA_VERSION);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Intel Corporation");
static DEFINE_SPINLOCK(dca_lock); static DEFINE_SPINLOCK(dca_lock);
static struct dca_provider *global_dca = NULL; static LIST_HEAD(dca_providers);
static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
{
struct dca_provider *dca, *ret = NULL;
list_for_each_entry(dca, &dca_providers, node) {
if ((!dev) || (dca->ops->dev_managed(dca, dev))) {
ret = dca;
break;
}
}
return ret;
}
/** /**
* dca_add_requester - add a dca client to the list * dca_add_requester - add a dca client to the list
...@@ -42,25 +58,39 @@ static struct dca_provider *global_dca = NULL; ...@@ -42,25 +58,39 @@ static struct dca_provider *global_dca = NULL;
*/ */
int dca_add_requester(struct device *dev) int dca_add_requester(struct device *dev)
{ {
int err, slot; struct dca_provider *dca;
int err, slot = -ENODEV;
if (!global_dca) if (!dev)
return -ENODEV; return -EFAULT;
spin_lock(&dca_lock); spin_lock(&dca_lock);
slot = global_dca->ops->add_requester(global_dca, dev);
spin_unlock(&dca_lock); /* check if the requester has not been added already */
if (slot < 0) dca = dca_find_provider_by_dev(dev);
if (dca) {
spin_unlock(&dca_lock);
return -EEXIST;
}
list_for_each_entry(dca, &dca_providers, node) {
slot = dca->ops->add_requester(dca, dev);
if (slot >= 0)
break;
}
if (slot < 0) {
spin_unlock(&dca_lock);
return slot; return slot;
}
err = dca_sysfs_add_req(global_dca, dev, slot); err = dca_sysfs_add_req(dca, dev, slot);
if (err) { if (err) {
spin_lock(&dca_lock); dca->ops->remove_requester(dca, dev);
global_dca->ops->remove_requester(global_dca, dev);
spin_unlock(&dca_lock); spin_unlock(&dca_lock);
return err; return err;
} }
spin_unlock(&dca_lock);
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(dca_add_requester); EXPORT_SYMBOL_GPL(dca_add_requester);
...@@ -71,30 +101,78 @@ EXPORT_SYMBOL_GPL(dca_add_requester); ...@@ -71,30 +101,78 @@ EXPORT_SYMBOL_GPL(dca_add_requester);
*/ */
int dca_remove_requester(struct device *dev) int dca_remove_requester(struct device *dev)
{ {
struct dca_provider *dca;
int slot; int slot;
if (!global_dca)
return -ENODEV; if (!dev)
return -EFAULT;
spin_lock(&dca_lock); spin_lock(&dca_lock);
slot = global_dca->ops->remove_requester(global_dca, dev); dca = dca_find_provider_by_dev(dev);
spin_unlock(&dca_lock); if (!dca) {
if (slot < 0) spin_unlock(&dca_lock);
return -ENODEV;
}
slot = dca->ops->remove_requester(dca, dev);
if (slot < 0) {
spin_unlock(&dca_lock);
return slot; return slot;
}
dca_sysfs_remove_req(global_dca, slot); dca_sysfs_remove_req(dca, slot);
spin_unlock(&dca_lock);
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(dca_remove_requester); EXPORT_SYMBOL_GPL(dca_remove_requester);
/** /**
* dca_get_tag - return the dca tag for the given cpu * dca_common_get_tag - return the dca tag (serves both new and old api)
* @dev - the device that wants dca service
* @cpu - the cpuid as returned by get_cpu() * @cpu - the cpuid as returned by get_cpu()
*/ */
u8 dca_get_tag(int cpu) u8 dca_common_get_tag(struct device *dev, int cpu)
{ {
if (!global_dca) struct dca_provider *dca;
u8 tag;
spin_lock(&dca_lock);
dca = dca_find_provider_by_dev(dev);
if (!dca) {
spin_unlock(&dca_lock);
return -ENODEV; return -ENODEV;
return global_dca->ops->get_tag(global_dca, cpu); }
tag = dca->ops->get_tag(dca, dev, cpu);
spin_unlock(&dca_lock);
return tag;
}
/**
* dca3_get_tag - return the dca tag to the requester device
* for the given cpu (new api)
* @dev - the device that wants dca service
* @cpu - the cpuid as returned by get_cpu()
*/
u8 dca3_get_tag(struct device *dev, int cpu)
{
if (!dev)
return -EFAULT;
return dca_common_get_tag(dev, cpu);
}
EXPORT_SYMBOL_GPL(dca3_get_tag);
/**
* dca_get_tag - return the dca tag for the given cpu (old api)
* @cpu - the cpuid as returned by get_cpu()
*/
u8 dca_get_tag(int cpu)
{
struct device *dev = NULL;
return dca_common_get_tag(dev, cpu);
} }
EXPORT_SYMBOL_GPL(dca_get_tag); EXPORT_SYMBOL_GPL(dca_get_tag);
...@@ -140,12 +218,10 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev) ...@@ -140,12 +218,10 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev)
{ {
int err; int err;
if (global_dca)
return -EEXIST;
err = dca_sysfs_add_provider(dca, dev); err = dca_sysfs_add_provider(dca, dev);
if (err) if (err)
return err; return err;
global_dca = dca; list_add(&dca->node, &dca_providers);
blocking_notifier_call_chain(&dca_provider_chain, blocking_notifier_call_chain(&dca_provider_chain,
DCA_PROVIDER_ADD, NULL); DCA_PROVIDER_ADD, NULL);
return 0; return 0;
...@@ -158,11 +234,9 @@ EXPORT_SYMBOL_GPL(register_dca_provider); ...@@ -158,11 +234,9 @@ EXPORT_SYMBOL_GPL(register_dca_provider);
*/ */
void unregister_dca_provider(struct dca_provider *dca) void unregister_dca_provider(struct dca_provider *dca)
{ {
if (!global_dca)
return;
blocking_notifier_call_chain(&dca_provider_chain, blocking_notifier_call_chain(&dca_provider_chain,
DCA_PROVIDER_REMOVE, NULL); DCA_PROVIDER_REMOVE, NULL);
global_dca = NULL; list_del(&dca->node);
dca_sysfs_remove_provider(dca); dca_sysfs_remove_provider(dca);
} }
EXPORT_SYMBOL_GPL(unregister_dca_provider); EXPORT_SYMBOL_GPL(unregister_dca_provider);
...@@ -187,6 +261,7 @@ EXPORT_SYMBOL_GPL(dca_unregister_notify); ...@@ -187,6 +261,7 @@ EXPORT_SYMBOL_GPL(dca_unregister_notify);
static int __init dca_init(void) static int __init dca_init(void)
{ {
printk(KERN_ERR "dca service started, version %s\n", DCA_VERSION);
return dca_sysfs_init(); return dca_sysfs_init();
} }
......
...@@ -13,10 +13,11 @@ static spinlock_t dca_idr_lock; ...@@ -13,10 +13,11 @@ static spinlock_t dca_idr_lock;
int dca_sysfs_add_req(struct dca_provider *dca, struct device *dev, int slot) int dca_sysfs_add_req(struct dca_provider *dca, struct device *dev, int slot)
{ {
struct device *cd; struct device *cd;
static int req_count;
cd = device_create_drvdata(dca_class, dca->cd, cd = device_create_drvdata(dca_class, dca->cd,
MKDEV(0, slot + 1), NULL, MKDEV(0, slot + 1), NULL,
"requester%d", slot); "requester%d", req_count++);
if (IS_ERR(cd)) if (IS_ERR(cd))
return PTR_ERR(cd); return PTR_ERR(cd);
return 0; return 0;
......
...@@ -4,13 +4,14 @@ ...@@ -4,13 +4,14 @@
menuconfig DMADEVICES menuconfig DMADEVICES
bool "DMA Engine support" bool "DMA Engine support"
depends on (PCI && X86) || ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX || PPC depends on !HIGHMEM64G && HAS_DMA
depends on !HIGHMEM64G
help help
DMA engines can do asynchronous data transfers without DMA engines can do asynchronous data transfers without
involving the host CPU. Currently, this framework can be involving the host CPU. Currently, this framework can be
used to offload memory copies in the network stack and used to offload memory copies in the network stack and
RAID operations in the MD driver. RAID operations in the MD driver. This menu only presents
DMA Device drivers supported by the configured arch, it may
be empty in some cases.
if DMADEVICES if DMADEVICES
...@@ -37,6 +38,15 @@ config INTEL_IOP_ADMA ...@@ -37,6 +38,15 @@ config INTEL_IOP_ADMA
help help
Enable support for the Intel(R) IOP Series RAID engines. Enable support for the Intel(R) IOP Series RAID engines.
config DW_DMAC
tristate "Synopsys DesignWare AHB DMA support"
depends on AVR32
select DMA_ENGINE
default y if CPU_AT32AP7000
help
Support the Synopsys DesignWare AHB DMA controller. This
can be integrated in chips such as the Atmel AT32ap7000.
config FSL_DMA config FSL_DMA
bool "Freescale MPC85xx/MPC83xx DMA support" bool "Freescale MPC85xx/MPC83xx DMA support"
depends on PPC depends on PPC
...@@ -46,6 +56,14 @@ config FSL_DMA ...@@ -46,6 +56,14 @@ config FSL_DMA
MPC8560/40, MPC8555, MPC8548 and MPC8641 processors. MPC8560/40, MPC8555, MPC8548 and MPC8641 processors.
The MPC8349, MPC8360 is also supported. The MPC8349, MPC8360 is also supported.
config MV_XOR
bool "Marvell XOR engine support"
depends on PLAT_ORION
select ASYNC_CORE
select DMA_ENGINE
---help---
Enable support for the Marvell XOR engine.
config DMA_ENGINE config DMA_ENGINE
bool bool
...@@ -55,10 +73,19 @@ comment "DMA Clients" ...@@ -55,10 +73,19 @@ comment "DMA Clients"
config NET_DMA config NET_DMA
bool "Network: TCP receive copy offload" bool "Network: TCP receive copy offload"
depends on DMA_ENGINE && NET depends on DMA_ENGINE && NET
default (INTEL_IOATDMA || FSL_DMA)
help help
This enables the use of DMA engines in the network stack to This enables the use of DMA engines in the network stack to
offload receive copy-to-user operations, freeing CPU cycles. offload receive copy-to-user operations, freeing CPU cycles.
Since this is the main user of the DMA engine, it should be enabled;
say Y here. Say Y here if you enabled INTEL_IOATDMA or FSL_DMA, otherwise
say N.
config DMATEST
tristate "DMA Test client"
depends on DMA_ENGINE
help
Simple DMA test client. Say N unless you're debugging a
DMA Device driver.
endif endif
obj-$(CONFIG_DMA_ENGINE) += dmaengine.o obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
obj-$(CONFIG_NET_DMA) += iovlock.o obj-$(CONFIG_NET_DMA) += iovlock.o
obj-$(CONFIG_DMATEST) += dmatest.o
obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o
obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
obj-$(CONFIG_FSL_DMA) += fsldma.o obj-$(CONFIG_FSL_DMA) += fsldma.o
obj-$(CONFIG_MV_XOR) += mv_xor.o
obj-$(CONFIG_DW_DMAC) += dw_dmac.o
...@@ -169,12 +169,18 @@ static void dma_client_chan_alloc(struct dma_client *client) ...@@ -169,12 +169,18 @@ static void dma_client_chan_alloc(struct dma_client *client)
enum dma_state_client ack; enum dma_state_client ack;
/* Find a channel */ /* Find a channel */
list_for_each_entry(device, &dma_device_list, global_node) list_for_each_entry(device, &dma_device_list, global_node) {
/* Does the client require a specific DMA controller? */
if (client->slave && client->slave->dma_dev
&& client->slave->dma_dev != device->dev)
continue;
list_for_each_entry(chan, &device->channels, device_node) { list_for_each_entry(chan, &device->channels, device_node) {
if (!dma_chan_satisfies_mask(chan, client->cap_mask)) if (!dma_chan_satisfies_mask(chan, client->cap_mask))
continue; continue;
desc = chan->device->device_alloc_chan_resources(chan); desc = chan->device->device_alloc_chan_resources(
chan, client);
if (desc >= 0) { if (desc >= 0) {
ack = client->event_callback(client, ack = client->event_callback(client,
chan, chan,
...@@ -183,12 +189,14 @@ static void dma_client_chan_alloc(struct dma_client *client) ...@@ -183,12 +189,14 @@ static void dma_client_chan_alloc(struct dma_client *client)
/* we are done once this client rejects /* we are done once this client rejects
* an available resource * an available resource
*/ */
if (ack == DMA_ACK) if (ack == DMA_ACK) {
dma_chan_get(chan); dma_chan_get(chan);
else if (ack == DMA_NAK) chan->client_count++;
} else if (ack == DMA_NAK)
return; return;
} }
} }
}
} }
enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie) enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
...@@ -272,8 +280,10 @@ static void dma_clients_notify_removed(struct dma_chan *chan) ...@@ -272,8 +280,10 @@ static void dma_clients_notify_removed(struct dma_chan *chan)
/* client was holding resources for this channel so /* client was holding resources for this channel so
* free it * free it
*/ */
if (ack == DMA_ACK) if (ack == DMA_ACK) {
dma_chan_put(chan); dma_chan_put(chan);
chan->client_count--;
}
} }
mutex_unlock(&dma_list_mutex); mutex_unlock(&dma_list_mutex);
...@@ -285,6 +295,10 @@ static void dma_clients_notify_removed(struct dma_chan *chan) ...@@ -285,6 +295,10 @@ static void dma_clients_notify_removed(struct dma_chan *chan)
*/ */
void dma_async_client_register(struct dma_client *client) void dma_async_client_register(struct dma_client *client)
{ {
/* validate client data */
BUG_ON(dma_has_cap(DMA_SLAVE, client->cap_mask) &&
!client->slave);
mutex_lock(&dma_list_mutex); mutex_lock(&dma_list_mutex);
list_add_tail(&client->global_node, &dma_client_list); list_add_tail(&client->global_node, &dma_client_list);
mutex_unlock(&dma_list_mutex); mutex_unlock(&dma_list_mutex);
...@@ -313,8 +327,10 @@ void dma_async_client_unregister(struct dma_client *client) ...@@ -313,8 +327,10 @@ void dma_async_client_unregister(struct dma_client *client)
ack = client->event_callback(client, chan, ack = client->event_callback(client, chan,
DMA_RESOURCE_REMOVED); DMA_RESOURCE_REMOVED);
if (ack == DMA_ACK) if (ack == DMA_ACK) {
dma_chan_put(chan); dma_chan_put(chan);
chan->client_count--;
}
} }
list_del(&client->global_node); list_del(&client->global_node);
...@@ -359,6 +375,10 @@ int dma_async_device_register(struct dma_device *device) ...@@ -359,6 +375,10 @@ int dma_async_device_register(struct dma_device *device)
!device->device_prep_dma_memset); !device->device_prep_dma_memset);
BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
!device->device_prep_dma_interrupt); !device->device_prep_dma_interrupt);
BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
!device->device_prep_slave_sg);
BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
!device->device_terminate_all);
BUG_ON(!device->device_alloc_chan_resources); BUG_ON(!device->device_alloc_chan_resources);
BUG_ON(!device->device_free_chan_resources); BUG_ON(!device->device_free_chan_resources);
...@@ -378,7 +398,7 @@ int dma_async_device_register(struct dma_device *device) ...@@ -378,7 +398,7 @@ int dma_async_device_register(struct dma_device *device)
chan->chan_id = chancnt++; chan->chan_id = chancnt++;
chan->dev.class = &dma_devclass; chan->dev.class = &dma_devclass;
chan->dev.parent = NULL; chan->dev.parent = device->dev;
snprintf(chan->dev.bus_id, BUS_ID_SIZE, "dma%dchan%d", snprintf(chan->dev.bus_id, BUS_ID_SIZE, "dma%dchan%d",
device->dev_id, chan->chan_id); device->dev_id, chan->chan_id);
...@@ -394,6 +414,7 @@ int dma_async_device_register(struct dma_device *device) ...@@ -394,6 +414,7 @@ int dma_async_device_register(struct dma_device *device)
kref_get(&device->refcount); kref_get(&device->refcount);
kref_get(&device->refcount); kref_get(&device->refcount);
kref_init(&chan->refcount); kref_init(&chan->refcount);
chan->client_count = 0;
chan->slow_ref = 0; chan->slow_ref = 0;
INIT_RCU_HEAD(&chan->rcu); INIT_RCU_HEAD(&chan->rcu);
} }
......
/*
* DMA Engine test module
*
* Copyright (C) 2007 Atmel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/delay.h>
#include <linux/dmaengine.h>
#include <linux/init.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/random.h>
#include <linux/wait.h>
static unsigned int test_buf_size = 16384;
module_param(test_buf_size, uint, S_IRUGO);
MODULE_PARM_DESC(test_buf_size, "Size of the memcpy test buffer");
static char test_channel[BUS_ID_SIZE];
module_param_string(channel, test_channel, sizeof(test_channel), S_IRUGO);
MODULE_PARM_DESC(channel, "Bus ID of the channel to test (default: any)");
static char test_device[BUS_ID_SIZE];
module_param_string(device, test_device, sizeof(test_device), S_IRUGO);
MODULE_PARM_DESC(device, "Bus ID of the DMA Engine to test (default: any)");
static unsigned int threads_per_chan = 1;
module_param(threads_per_chan, uint, S_IRUGO);
MODULE_PARM_DESC(threads_per_chan,
"Number of threads to start per channel (default: 1)");
static unsigned int max_channels;
module_param(max_channels, uint, S_IRUGO);
MODULE_PARM_DESC(nr_channels,
"Maximum number of channels to use (default: all)");
/*
* Initialization patterns. All bytes in the source buffer has bit 7
* set, all bytes in the destination buffer has bit 7 cleared.
*
* Bit 6 is set for all bytes which are to be copied by the DMA
* engine. Bit 5 is set for all bytes which are to be overwritten by
* the DMA engine.
*
* The remaining bits are the inverse of a counter which increments by
* one for each byte address.
*/
#define PATTERN_SRC 0x80
#define PATTERN_DST 0x00
#define PATTERN_COPY 0x40
#define PATTERN_OVERWRITE 0x20
#define PATTERN_COUNT_MASK 0x1f
struct dmatest_thread {
struct list_head node;
struct task_struct *task;
struct dma_chan *chan;
u8 *srcbuf;
u8 *dstbuf;
};
struct dmatest_chan {
struct list_head node;
struct dma_chan *chan;
struct list_head threads;
};
/*
* These are protected by dma_list_mutex since they're only used by
* the DMA client event callback
*/
static LIST_HEAD(dmatest_channels);
static unsigned int nr_channels;
static bool dmatest_match_channel(struct dma_chan *chan)
{
if (test_channel[0] == '\0')
return true;
return strcmp(chan->dev.bus_id, test_channel) == 0;
}
static bool dmatest_match_device(struct dma_device *device)
{
if (test_device[0] == '\0')
return true;
return strcmp(device->dev->bus_id, test_device) == 0;
}
static unsigned long dmatest_random(void)
{
unsigned long buf;
get_random_bytes(&buf, sizeof(buf));
return buf;
}
static void dmatest_init_srcbuf(u8 *buf, unsigned int start, unsigned int len)
{
unsigned int i;
for (i = 0; i < start; i++)
buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
for ( ; i < start + len; i++)
buf[i] = PATTERN_SRC | PATTERN_COPY
| (~i & PATTERN_COUNT_MASK);;
for ( ; i < test_buf_size; i++)
buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
}
static void dmatest_init_dstbuf(u8 *buf, unsigned int start, unsigned int len)
{
unsigned int i;
for (i = 0; i < start; i++)
buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
for ( ; i < start + len; i++)
buf[i] = PATTERN_DST | PATTERN_OVERWRITE
| (~i & PATTERN_COUNT_MASK);
for ( ; i < test_buf_size; i++)
buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
}
static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index,
unsigned int counter, bool is_srcbuf)
{
u8 diff = actual ^ pattern;
u8 expected = pattern | (~counter & PATTERN_COUNT_MASK);
const char *thread_name = current->comm;
if (is_srcbuf)
pr_warning("%s: srcbuf[0x%x] overwritten!"
" Expected %02x, got %02x\n",
thread_name, index, expected, actual);
else if ((pattern & PATTERN_COPY)
&& (diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
pr_warning("%s: dstbuf[0x%x] not copied!"
" Expected %02x, got %02x\n",
thread_name, index, expected, actual);
else if (diff & PATTERN_SRC)
pr_warning("%s: dstbuf[0x%x] was copied!"
" Expected %02x, got %02x\n",
thread_name, index, expected, actual);
else
pr_warning("%s: dstbuf[0x%x] mismatch!"
" Expected %02x, got %02x\n",
thread_name, index, expected, actual);
}
static unsigned int dmatest_verify(u8 *buf, unsigned int start,
unsigned int end, unsigned int counter, u8 pattern,
bool is_srcbuf)
{
unsigned int i;
unsigned int error_count = 0;
u8 actual;
for (i = start; i < end; i++) {
actual = buf[i];
if (actual != (pattern | (~counter & PATTERN_COUNT_MASK))) {
if (error_count < 32)
dmatest_mismatch(actual, pattern, i, counter,
is_srcbuf);
error_count++;
}
counter++;
}
if (error_count > 32)
pr_warning("%s: %u errors suppressed\n",
current->comm, error_count - 32);
return error_count;
}
/*
* This function repeatedly tests DMA transfers of various lengths and
* offsets until it is told to exit by kthread_stop(). There may be
* multiple threads running this function in parallel for a single
* channel, and there may be multiple channels being tested in
* parallel.
*
* Before each test, the source and destination buffer is initialized
* with a known pattern. This pattern is different depending on
* whether it's in an area which is supposed to be copied or
* overwritten, and different in the source and destination buffers.
* So if the DMA engine doesn't copy exactly what we tell it to copy,
* we'll notice.
*/
static int dmatest_func(void *data)
{
struct dmatest_thread *thread = data;
struct dma_chan *chan;
const char *thread_name;
unsigned int src_off, dst_off, len;
unsigned int error_count;
unsigned int failed_tests = 0;
unsigned int total_tests = 0;
dma_cookie_t cookie;
enum dma_status status;
int ret;
thread_name = current->comm;
ret = -ENOMEM;
thread->srcbuf = kmalloc(test_buf_size, GFP_KERNEL);
if (!thread->srcbuf)
goto err_srcbuf;
thread->dstbuf = kmalloc(test_buf_size, GFP_KERNEL);
if (!thread->dstbuf)
goto err_dstbuf;
smp_rmb();
chan = thread->chan;
dma_chan_get(chan);
while (!kthread_should_stop()) {
total_tests++;
len = dmatest_random() % test_buf_size + 1;
src_off = dmatest_random() % (test_buf_size - len + 1);
dst_off = dmatest_random() % (test_buf_size - len + 1);
dmatest_init_srcbuf(thread->srcbuf, src_off, len);
dmatest_init_dstbuf(thread->dstbuf, dst_off, len);
cookie = dma_async_memcpy_buf_to_buf(chan,
thread->dstbuf + dst_off,
thread->srcbuf + src_off,
len);
if (dma_submit_error(cookie)) {
pr_warning("%s: #%u: submit error %d with src_off=0x%x "
"dst_off=0x%x len=0x%x\n",
thread_name, total_tests - 1, cookie,
src_off, dst_off, len);
msleep(100);
failed_tests++;
continue;
}
dma_async_memcpy_issue_pending(chan);
do {
msleep(1);
status = dma_async_memcpy_complete(
chan, cookie, NULL, NULL);
} while (status == DMA_IN_PROGRESS);
if (status == DMA_ERROR) {
pr_warning("%s: #%u: error during copy\n",
thread_name, total_tests - 1);
failed_tests++;
continue;
}
error_count = 0;
pr_debug("%s: verifying source buffer...\n", thread_name);
error_count += dmatest_verify(thread->srcbuf, 0, src_off,
0, PATTERN_SRC, true);
error_count += dmatest_verify(thread->srcbuf, src_off,
src_off + len, src_off,
PATTERN_SRC | PATTERN_COPY, true);
error_count += dmatest_verify(thread->srcbuf, src_off + len,
test_buf_size, src_off + len,
PATTERN_SRC, true);
pr_debug("%s: verifying dest buffer...\n",
thread->task->comm);
error_count += dmatest_verify(thread->dstbuf, 0, dst_off,
0, PATTERN_DST, false);
error_count += dmatest_verify(thread->dstbuf, dst_off,
dst_off + len, src_off,
PATTERN_SRC | PATTERN_COPY, false);
error_count += dmatest_verify(thread->dstbuf, dst_off + len,
test_buf_size, dst_off + len,
PATTERN_DST, false);
if (error_count) {
pr_warning("%s: #%u: %u errors with "
"src_off=0x%x dst_off=0x%x len=0x%x\n",
thread_name, total_tests - 1, error_count,
src_off, dst_off, len);
failed_tests++;
} else {
pr_debug("%s: #%u: No errors with "
"src_off=0x%x dst_off=0x%x len=0x%x\n",
thread_name, total_tests - 1,
src_off, dst_off, len);
}
}
ret = 0;
dma_chan_put(chan);
kfree(thread->dstbuf);
err_dstbuf:
kfree(thread->srcbuf);
err_srcbuf:
pr_notice("%s: terminating after %u tests, %u failures (status %d)\n",
thread_name, total_tests, failed_tests, ret);
return ret;
}
static void dmatest_cleanup_channel(struct dmatest_chan *dtc)
{
struct dmatest_thread *thread;
struct dmatest_thread *_thread;
int ret;
list_for_each_entry_safe(thread, _thread, &dtc->threads, node) {
ret = kthread_stop(thread->task);
pr_debug("dmatest: thread %s exited with status %d\n",
thread->task->comm, ret);
list_del(&thread->node);
kfree(thread);
}
kfree(dtc);
}
static enum dma_state_client dmatest_add_channel(struct dma_chan *chan)
{
struct dmatest_chan *dtc;
struct dmatest_thread *thread;
unsigned int i;
dtc = kmalloc(sizeof(struct dmatest_chan), GFP_ATOMIC);
if (!dtc) {
pr_warning("dmatest: No memory for %s\n", chan->dev.bus_id);
return DMA_NAK;
}
dtc->chan = chan;
INIT_LIST_HEAD(&dtc->threads);
for (i = 0; i < threads_per_chan; i++) {
thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL);
if (!thread) {
pr_warning("dmatest: No memory for %s-test%u\n",
chan->dev.bus_id, i);
break;
}
thread->chan = dtc->chan;
smp_wmb();
thread->task = kthread_run(dmatest_func, thread, "%s-test%u",
chan->dev.bus_id, i);
if (IS_ERR(thread->task)) {
pr_warning("dmatest: Failed to run thread %s-test%u\n",
chan->dev.bus_id, i);
kfree(thread);
break;
}
/* srcbuf and dstbuf are allocated by the thread itself */
list_add_tail(&thread->node, &dtc->threads);
}
pr_info("dmatest: Started %u threads using %s\n", i, chan->dev.bus_id);
list_add_tail(&dtc->node, &dmatest_channels);
nr_channels++;
return DMA_ACK;
}
static enum dma_state_client dmatest_remove_channel(struct dma_chan *chan)
{
struct dmatest_chan *dtc, *_dtc;
list_for_each_entry_safe(dtc, _dtc, &dmatest_channels, node) {
if (dtc->chan == chan) {
list_del(&dtc->node);
dmatest_cleanup_channel(dtc);
pr_debug("dmatest: lost channel %s\n",
chan->dev.bus_id);
return DMA_ACK;
}
}
return DMA_DUP;
}
/*
* Start testing threads as new channels are assigned to us, and kill
* them when the channels go away.
*
* When we unregister the client, all channels are removed so this
* will also take care of cleaning things up when the module is
* unloaded.
*/
static enum dma_state_client
dmatest_event(struct dma_client *client, struct dma_chan *chan,
enum dma_state state)
{
enum dma_state_client ack = DMA_NAK;
switch (state) {
case DMA_RESOURCE_AVAILABLE:
if (!dmatest_match_channel(chan)
|| !dmatest_match_device(chan->device))
ack = DMA_DUP;
else if (max_channels && nr_channels >= max_channels)
ack = DMA_NAK;
else
ack = dmatest_add_channel(chan);
break;
case DMA_RESOURCE_REMOVED:
ack = dmatest_remove_channel(chan);
break;
default:
pr_info("dmatest: Unhandled event %u (%s)\n",
state, chan->dev.bus_id);
break;
}
return ack;
}
static struct dma_client dmatest_client = {
.event_callback = dmatest_event,
};
static int __init dmatest_init(void)
{
dma_cap_set(DMA_MEMCPY, dmatest_client.cap_mask);
dma_async_client_register(&dmatest_client);
dma_async_client_chan_request(&dmatest_client);
return 0;
}
module_init(dmatest_init);
static void __exit dmatest_exit(void)
{
dma_async_client_unregister(&dmatest_client);
}
module_exit(dmatest_exit);
MODULE_AUTHOR("Haavard Skinnemoen <hskinnemoen@atmel.com>");
MODULE_LICENSE("GPL v2");
/*
* Driver for the Synopsys DesignWare DMA Controller (aka DMACA on
* AVR32 systems.)
*
* Copyright (C) 2007-2008 Atmel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/dmaengine.h>
#include <linux/dma-mapping.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include "dw_dmac_regs.h"
/*
* This supports the Synopsys "DesignWare AHB Central DMA Controller",
* (DW_ahb_dmac) which is used with various AMBA 2.0 systems (not all
* of which use ARM any more). See the "Databook" from Synopsys for
* information beyond what licensees probably provide.
*
* The driver has currently been tested only with the Atmel AT32AP7000,
* which does not support descriptor writeback.
*/
/* NOTE: DMS+SMS is system-specific. We should get this information
* from the platform code somehow.
*/
#define DWC_DEFAULT_CTLLO (DWC_CTLL_DST_MSIZE(0) \
| DWC_CTLL_SRC_MSIZE(0) \
| DWC_CTLL_DMS(0) \
| DWC_CTLL_SMS(1) \
| DWC_CTLL_LLP_D_EN \
| DWC_CTLL_LLP_S_EN)
/*
* This is configuration-dependent and usually a funny size like 4095.
* Let's round it down to the nearest power of two.
*
* Note that this is a transfer count, i.e. if we transfer 32-bit
* words, we can do 8192 bytes per descriptor.
*
* This parameter is also system-specific.
*/
#define DWC_MAX_COUNT 2048U
/*
* Number of descriptors to allocate for each channel. This should be
* made configurable somehow; preferably, the clients (at least the
* ones using slave transfers) should be able to give us a hint.
*/
#define NR_DESCS_PER_CHANNEL 64
/*----------------------------------------------------------------------*/
/*
* Because we're not relying on writeback from the controller (it may not
* even be configured into the core!) we don't need to use dma_pool. These
* descriptors -- and associated data -- are cacheable. We do need to make
* sure their dcache entries are written back before handing them off to
* the controller, though.
*/
static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc)
{
return list_entry(dwc->active_list.next, struct dw_desc, desc_node);
}
static struct dw_desc *dwc_first_queued(struct dw_dma_chan *dwc)
{
return list_entry(dwc->queue.next, struct dw_desc, desc_node);
}
static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
{
struct dw_desc *desc, *_desc;
struct dw_desc *ret = NULL;
unsigned int i = 0;
spin_lock_bh(&dwc->lock);
list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) {
if (async_tx_test_ack(&desc->txd)) {
list_del(&desc->desc_node);
ret = desc;
break;
}
dev_dbg(&dwc->chan.dev, "desc %p not ACKed\n", desc);
i++;
}
spin_unlock_bh(&dwc->lock);
dev_vdbg(&dwc->chan.dev, "scanned %u descriptors on freelist\n", i);
return ret;
}
static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc)
{
struct dw_desc *child;
list_for_each_entry(child, &desc->txd.tx_list, desc_node)
dma_sync_single_for_cpu(dwc->chan.dev.parent,
child->txd.phys, sizeof(child->lli),
DMA_TO_DEVICE);
dma_sync_single_for_cpu(dwc->chan.dev.parent,
desc->txd.phys, sizeof(desc->lli),
DMA_TO_DEVICE);
}
/*
* Move a descriptor, including any children, to the free list.
* `desc' must not be on any lists.
*/
static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
{
if (desc) {
struct dw_desc *child;
dwc_sync_desc_for_cpu(dwc, desc);
spin_lock_bh(&dwc->lock);
list_for_each_entry(child, &desc->txd.tx_list, desc_node)
dev_vdbg(&dwc->chan.dev,
"moving child desc %p to freelist\n",
child);
list_splice_init(&desc->txd.tx_list, &dwc->free_list);
dev_vdbg(&dwc->chan.dev, "moving desc %p to freelist\n", desc);
list_add(&desc->desc_node, &dwc->free_list);
spin_unlock_bh(&dwc->lock);
}
}
/* Called with dwc->lock held and bh disabled */
static dma_cookie_t
dwc_assign_cookie(struct dw_dma_chan *dwc, struct dw_desc *desc)
{
dma_cookie_t cookie = dwc->chan.cookie;
if (++cookie < 0)
cookie = 1;
dwc->chan.cookie = cookie;
desc->txd.cookie = cookie;
return cookie;
}
/*----------------------------------------------------------------------*/
/* Called with dwc->lock held and bh disabled */
static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
{
struct dw_dma *dw = to_dw_dma(dwc->chan.device);
/* ASSERT: channel is idle */
if (dma_readl(dw, CH_EN) & dwc->mask) {
dev_err(&dwc->chan.dev,
"BUG: Attempted to start non-idle channel\n");
dev_err(&dwc->chan.dev,
" SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
channel_readl(dwc, SAR),
channel_readl(dwc, DAR),
channel_readl(dwc, LLP),
channel_readl(dwc, CTL_HI),
channel_readl(dwc, CTL_LO));
/* The tasklet will hopefully advance the queue... */
return;
}
channel_writel(dwc, LLP, first->txd.phys);
channel_writel(dwc, CTL_LO,
DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
channel_writel(dwc, CTL_HI, 0);
channel_set_bit(dw, CH_EN, dwc->mask);
}
/*----------------------------------------------------------------------*/
static void
dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc)
{
dma_async_tx_callback callback;
void *param;
struct dma_async_tx_descriptor *txd = &desc->txd;
dev_vdbg(&dwc->chan.dev, "descriptor %u complete\n", txd->cookie);
dwc->completed = txd->cookie;
callback = txd->callback;
param = txd->callback_param;
dwc_sync_desc_for_cpu(dwc, desc);
list_splice_init(&txd->tx_list, &dwc->free_list);
list_move(&desc->desc_node, &dwc->free_list);
/*
* We use dma_unmap_page() regardless of how the buffers were
* mapped before they were submitted...
*/
if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP))
dma_unmap_page(dwc->chan.dev.parent, desc->lli.dar, desc->len,
DMA_FROM_DEVICE);
if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP))
dma_unmap_page(dwc->chan.dev.parent, desc->lli.sar, desc->len,
DMA_TO_DEVICE);
/*
* The API requires that no submissions are done from a
* callback, so we don't need to drop the lock here
*/
if (callback)
callback(param);
}
static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc)
{
struct dw_desc *desc, *_desc;
LIST_HEAD(list);
if (dma_readl(dw, CH_EN) & dwc->mask) {
dev_err(&dwc->chan.dev,
"BUG: XFER bit set, but channel not idle!\n");
/* Try to continue after resetting the channel... */
channel_clear_bit(dw, CH_EN, dwc->mask);
while (dma_readl(dw, CH_EN) & dwc->mask)
cpu_relax();
}
/*
* Submit queued descriptors ASAP, i.e. before we go through
* the completed ones.
*/
if (!list_empty(&dwc->queue))
dwc_dostart(dwc, dwc_first_queued(dwc));
list_splice_init(&dwc->active_list, &list);
list_splice_init(&dwc->queue, &dwc->active_list);
list_for_each_entry_safe(desc, _desc, &list, desc_node)
dwc_descriptor_complete(dwc, desc);
}
static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
{
dma_addr_t llp;
struct dw_desc *desc, *_desc;
struct dw_desc *child;
u32 status_xfer;
/*
* Clear block interrupt flag before scanning so that we don't
* miss any, and read LLP before RAW_XFER to ensure it is
* valid if we decide to scan the list.
*/
dma_writel(dw, CLEAR.BLOCK, dwc->mask);
llp = channel_readl(dwc, LLP);
status_xfer = dma_readl(dw, RAW.XFER);
if (status_xfer & dwc->mask) {
/* Everything we've submitted is done */
dma_writel(dw, CLEAR.XFER, dwc->mask);
dwc_complete_all(dw, dwc);
return;
}
dev_vdbg(&dwc->chan.dev, "scan_descriptors: llp=0x%x\n", llp);
list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) {
if (desc->lli.llp == llp)
/* This one is currently in progress */
return;
list_for_each_entry(child, &desc->txd.tx_list, desc_node)
if (child->lli.llp == llp)
/* Currently in progress */
return;
/*
* No descriptors so far seem to be in progress, i.e.
* this one must be done.
*/
dwc_descriptor_complete(dwc, desc);
}
dev_err(&dwc->chan.dev,
"BUG: All descriptors done, but channel not idle!\n");
/* Try to continue after resetting the channel... */
channel_clear_bit(dw, CH_EN, dwc->mask);
while (dma_readl(dw, CH_EN) & dwc->mask)
cpu_relax();
if (!list_empty(&dwc->queue)) {
dwc_dostart(dwc, dwc_first_queued(dwc));
list_splice_init(&dwc->queue, &dwc->active_list);
}
}
static void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli)
{
dev_printk(KERN_CRIT, &dwc->chan.dev,
" desc: s0x%x d0x%x l0x%x c0x%x:%x\n",
lli->sar, lli->dar, lli->llp,
lli->ctlhi, lli->ctllo);
}
static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
{
struct dw_desc *bad_desc;
struct dw_desc *child;
dwc_scan_descriptors(dw, dwc);
/*
* The descriptor currently at the head of the active list is
* borked. Since we don't have any way to report errors, we'll
* just have to scream loudly and try to carry on.
*/
bad_desc = dwc_first_active(dwc);
list_del_init(&bad_desc->desc_node);
list_splice_init(&dwc->queue, dwc->active_list.prev);
/* Clear the error flag and try to restart the controller */
dma_writel(dw, CLEAR.ERROR, dwc->mask);
if (!list_empty(&dwc->active_list))
dwc_dostart(dwc, dwc_first_active(dwc));
/*
* KERN_CRITICAL may seem harsh, but since this only happens
* when someone submits a bad physical address in a
* descriptor, we should consider ourselves lucky that the
* controller flagged an error instead of scribbling over
* random memory locations.
*/
dev_printk(KERN_CRIT, &dwc->chan.dev,
"Bad descriptor submitted for DMA!\n");
dev_printk(KERN_CRIT, &dwc->chan.dev,
" cookie: %d\n", bad_desc->txd.cookie);
dwc_dump_lli(dwc, &bad_desc->lli);
list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
dwc_dump_lli(dwc, &child->lli);
/* Pretend the descriptor completed successfully */
dwc_descriptor_complete(dwc, bad_desc);
}
static void dw_dma_tasklet(unsigned long data)
{
struct dw_dma *dw = (struct dw_dma *)data;
struct dw_dma_chan *dwc;
u32 status_block;
u32 status_xfer;
u32 status_err;
int i;
status_block = dma_readl(dw, RAW.BLOCK);
status_xfer = dma_readl(dw, RAW.BLOCK);
status_err = dma_readl(dw, RAW.ERROR);
dev_vdbg(dw->dma.dev, "tasklet: status_block=%x status_err=%x\n",
status_block, status_err);
for (i = 0; i < dw->dma.chancnt; i++) {
dwc = &dw->chan[i];
spin_lock(&dwc->lock);
if (status_err & (1 << i))
dwc_handle_error(dw, dwc);
else if ((status_block | status_xfer) & (1 << i))
dwc_scan_descriptors(dw, dwc);
spin_unlock(&dwc->lock);
}
/*
* Re-enable interrupts. Block Complete interrupts are only
* enabled if the INT_EN bit in the descriptor is set. This
* will trigger a scan before the whole list is done.
*/
channel_set_bit(dw, MASK.XFER, dw->all_chan_mask);
channel_set_bit(dw, MASK.BLOCK, dw->all_chan_mask);
channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask);
}
static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
{
struct dw_dma *dw = dev_id;
u32 status;
dev_vdbg(dw->dma.dev, "interrupt: status=0x%x\n",
dma_readl(dw, STATUS_INT));
/*
* Just disable the interrupts. We'll turn them back on in the
* softirq handler.
*/
channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
status = dma_readl(dw, STATUS_INT);
if (status) {
dev_err(dw->dma.dev,
"BUG: Unexpected interrupts pending: 0x%x\n",
status);
/* Try to recover */
channel_clear_bit(dw, MASK.XFER, (1 << 8) - 1);
channel_clear_bit(dw, MASK.BLOCK, (1 << 8) - 1);
channel_clear_bit(dw, MASK.SRC_TRAN, (1 << 8) - 1);
channel_clear_bit(dw, MASK.DST_TRAN, (1 << 8) - 1);
channel_clear_bit(dw, MASK.ERROR, (1 << 8) - 1);
}
tasklet_schedule(&dw->tasklet);
return IRQ_HANDLED;
}
/*----------------------------------------------------------------------*/
static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
{
struct dw_desc *desc = txd_to_dw_desc(tx);
struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan);
dma_cookie_t cookie;
spin_lock_bh(&dwc->lock);
cookie = dwc_assign_cookie(dwc, desc);
/*
* REVISIT: We should attempt to chain as many descriptors as
* possible, perhaps even appending to those already submitted
* for DMA. But this is hard to do in a race-free manner.
*/
if (list_empty(&dwc->active_list)) {
dev_vdbg(&tx->chan->dev, "tx_submit: started %u\n",
desc->txd.cookie);
dwc_dostart(dwc, desc);
list_add_tail(&desc->desc_node, &dwc->active_list);
} else {
dev_vdbg(&tx->chan->dev, "tx_submit: queued %u\n",
desc->txd.cookie);
list_add_tail(&desc->desc_node, &dwc->queue);
}
spin_unlock_bh(&dwc->lock);
return cookie;
}
static struct dma_async_tx_descriptor *
dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
size_t len, unsigned long flags)
{
struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
struct dw_desc *desc;
struct dw_desc *first;
struct dw_desc *prev;
size_t xfer_count;
size_t offset;
unsigned int src_width;
unsigned int dst_width;
u32 ctllo;
dev_vdbg(&chan->dev, "prep_dma_memcpy d0x%x s0x%x l0x%zx f0x%lx\n",
dest, src, len, flags);
if (unlikely(!len)) {
dev_dbg(&chan->dev, "prep_dma_memcpy: length is zero!\n");
return NULL;
}
/*
* We can be a lot more clever here, but this should take care
* of the most common optimization.
*/
if (!((src | dest | len) & 3))
src_width = dst_width = 2;
else if (!((src | dest | len) & 1))
src_width = dst_width = 1;
else
src_width = dst_width = 0;
ctllo = DWC_DEFAULT_CTLLO
| DWC_CTLL_DST_WIDTH(dst_width)
| DWC_CTLL_SRC_WIDTH(src_width)
| DWC_CTLL_DST_INC
| DWC_CTLL_SRC_INC
| DWC_CTLL_FC_M2M;
prev = first = NULL;
for (offset = 0; offset < len; offset += xfer_count << src_width) {
xfer_count = min_t(size_t, (len - offset) >> src_width,
DWC_MAX_COUNT);
desc = dwc_desc_get(dwc);
if (!desc)
goto err_desc_get;
desc->lli.sar = src + offset;
desc->lli.dar = dest + offset;
desc->lli.ctllo = ctllo;
desc->lli.ctlhi = xfer_count;
if (!first) {
first = desc;
} else {
prev->lli.llp = desc->txd.phys;
dma_sync_single_for_device(chan->dev.parent,
prev->txd.phys, sizeof(prev->lli),
DMA_TO_DEVICE);
list_add_tail(&desc->desc_node,
&first->txd.tx_list);
}
prev = desc;
}
if (flags & DMA_PREP_INTERRUPT)
/* Trigger interrupt after last block */
prev->lli.ctllo |= DWC_CTLL_INT_EN;
prev->lli.llp = 0;
dma_sync_single_for_device(chan->dev.parent,
prev->txd.phys, sizeof(prev->lli),
DMA_TO_DEVICE);
first->txd.flags = flags;
first->len = len;
return &first->txd;
err_desc_get:
dwc_desc_put(dwc, first);
return NULL;
}
static struct dma_async_tx_descriptor *
dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
unsigned int sg_len, enum dma_data_direction direction,
unsigned long flags)
{
struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
struct dw_dma_slave *dws = dwc->dws;
struct dw_desc *prev;
struct dw_desc *first;
u32 ctllo;
dma_addr_t reg;
unsigned int reg_width;
unsigned int mem_width;
unsigned int i;
struct scatterlist *sg;
size_t total_len = 0;
dev_vdbg(&chan->dev, "prep_dma_slave\n");
if (unlikely(!dws || !sg_len))
return NULL;
reg_width = dws->slave.reg_width;
prev = first = NULL;
sg_len = dma_map_sg(chan->dev.parent, sgl, sg_len, direction);
switch (direction) {
case DMA_TO_DEVICE:
ctllo = (DWC_DEFAULT_CTLLO
| DWC_CTLL_DST_WIDTH(reg_width)
| DWC_CTLL_DST_FIX
| DWC_CTLL_SRC_INC
| DWC_CTLL_FC_M2P);
reg = dws->slave.tx_reg;
for_each_sg(sgl, sg, sg_len, i) {
struct dw_desc *desc;
u32 len;
u32 mem;
desc = dwc_desc_get(dwc);
if (!desc) {
dev_err(&chan->dev,
"not enough descriptors available\n");
goto err_desc_get;
}
mem = sg_phys(sg);
len = sg_dma_len(sg);
mem_width = 2;
if (unlikely(mem & 3 || len & 3))
mem_width = 0;
desc->lli.sar = mem;
desc->lli.dar = reg;
desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width);
desc->lli.ctlhi = len >> mem_width;
if (!first) {
first = desc;
} else {
prev->lli.llp = desc->txd.phys;
dma_sync_single_for_device(chan->dev.parent,
prev->txd.phys,
sizeof(prev->lli),
DMA_TO_DEVICE);
list_add_tail(&desc->desc_node,
&first->txd.tx_list);
}
prev = desc;
total_len += len;
}
break;
case DMA_FROM_DEVICE:
ctllo = (DWC_DEFAULT_CTLLO
| DWC_CTLL_SRC_WIDTH(reg_width)
| DWC_CTLL_DST_INC
| DWC_CTLL_SRC_FIX
| DWC_CTLL_FC_P2M);
reg = dws->slave.rx_reg;
for_each_sg(sgl, sg, sg_len, i) {
struct dw_desc *desc;
u32 len;
u32 mem;
desc = dwc_desc_get(dwc);
if (!desc) {
dev_err(&chan->dev,
"not enough descriptors available\n");
goto err_desc_get;
}
mem = sg_phys(sg);
len = sg_dma_len(sg);
mem_width = 2;
if (unlikely(mem & 3 || len & 3))
mem_width = 0;
desc->lli.sar = reg;
desc->lli.dar = mem;
desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width);
desc->lli.ctlhi = len >> reg_width;
if (!first) {
first = desc;
} else {
prev->lli.llp = desc->txd.phys;
dma_sync_single_for_device(chan->dev.parent,
prev->txd.phys,
sizeof(prev->lli),
DMA_TO_DEVICE);
list_add_tail(&desc->desc_node,
&first->txd.tx_list);
}
prev = desc;
total_len += len;
}
break;
default:
return NULL;
}
if (flags & DMA_PREP_INTERRUPT)
/* Trigger interrupt after last block */
prev->lli.ctllo |= DWC_CTLL_INT_EN;
prev->lli.llp = 0;
dma_sync_single_for_device(chan->dev.parent,
prev->txd.phys, sizeof(prev->lli),
DMA_TO_DEVICE);
first->len = total_len;
return &first->txd;
err_desc_get:
dwc_desc_put(dwc, first);
return NULL;
}
static void dwc_terminate_all(struct dma_chan *chan)
{
struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
struct dw_dma *dw = to_dw_dma(chan->device);
struct dw_desc *desc, *_desc;
LIST_HEAD(list);
/*
* This is only called when something went wrong elsewhere, so
* we don't really care about the data. Just disable the
* channel. We still have to poll the channel enable bit due
* to AHB/HSB limitations.
*/
spin_lock_bh(&dwc->lock);
channel_clear_bit(dw, CH_EN, dwc->mask);
while (dma_readl(dw, CH_EN) & dwc->mask)
cpu_relax();
/* active_list entries will end up before queued entries */
list_splice_init(&dwc->queue, &list);
list_splice_init(&dwc->active_list, &list);
spin_unlock_bh(&dwc->lock);
/* Flush all pending and queued descriptors */
list_for_each_entry_safe(desc, _desc, &list, desc_node)
dwc_descriptor_complete(dwc, desc);
}
static enum dma_status
dwc_is_tx_complete(struct dma_chan *chan,
dma_cookie_t cookie,
dma_cookie_t *done, dma_cookie_t *used)
{
struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
dma_cookie_t last_used;
dma_cookie_t last_complete;
int ret;
last_complete = dwc->completed;
last_used = chan->cookie;
ret = dma_async_is_complete(cookie, last_complete, last_used);
if (ret != DMA_SUCCESS) {
dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
last_complete = dwc->completed;
last_used = chan->cookie;
ret = dma_async_is_complete(cookie, last_complete, last_used);
}
if (done)
*done = last_complete;
if (used)
*used = last_used;
return ret;
}
static void dwc_issue_pending(struct dma_chan *chan)
{
struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
spin_lock_bh(&dwc->lock);
if (!list_empty(&dwc->queue))
dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
spin_unlock_bh(&dwc->lock);
}
static int dwc_alloc_chan_resources(struct dma_chan *chan,
struct dma_client *client)
{
struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
struct dw_dma *dw = to_dw_dma(chan->device);
struct dw_desc *desc;
struct dma_slave *slave;
struct dw_dma_slave *dws;
int i;
u32 cfghi;
u32 cfglo;
dev_vdbg(&chan->dev, "alloc_chan_resources\n");
/* Channels doing slave DMA can only handle one client. */
if (dwc->dws || client->slave) {
if (chan->client_count)
return -EBUSY;
}
/* ASSERT: channel is idle */
if (dma_readl(dw, CH_EN) & dwc->mask) {
dev_dbg(&chan->dev, "DMA channel not idle?\n");
return -EIO;
}
dwc->completed = chan->cookie = 1;
cfghi = DWC_CFGH_FIFO_MODE;
cfglo = 0;
slave = client->slave;
if (slave) {
/*
* We need controller-specific data to set up slave
* transfers.
*/
BUG_ON(!slave->dma_dev || slave->dma_dev != dw->dma.dev);
dws = container_of(slave, struct dw_dma_slave, slave);
dwc->dws = dws;
cfghi = dws->cfg_hi;
cfglo = dws->cfg_lo;
} else {
dwc->dws = NULL;
}
channel_writel(dwc, CFG_LO, cfglo);
channel_writel(dwc, CFG_HI, cfghi);
/*
* NOTE: some controllers may have additional features that we
* need to initialize here, like "scatter-gather" (which
* doesn't mean what you think it means), and status writeback.
*/
spin_lock_bh(&dwc->lock);
i = dwc->descs_allocated;
while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) {
spin_unlock_bh(&dwc->lock);
desc = kzalloc(sizeof(struct dw_desc), GFP_KERNEL);
if (!desc) {
dev_info(&chan->dev,
"only allocated %d descriptors\n", i);
spin_lock_bh(&dwc->lock);
break;
}
dma_async_tx_descriptor_init(&desc->txd, chan);
desc->txd.tx_submit = dwc_tx_submit;
desc->txd.flags = DMA_CTRL_ACK;
INIT_LIST_HEAD(&desc->txd.tx_list);
desc->txd.phys = dma_map_single(chan->dev.parent, &desc->lli,
sizeof(desc->lli), DMA_TO_DEVICE);
dwc_desc_put(dwc, desc);
spin_lock_bh(&dwc->lock);
i = ++dwc->descs_allocated;
}
/* Enable interrupts */
channel_set_bit(dw, MASK.XFER, dwc->mask);
channel_set_bit(dw, MASK.BLOCK, dwc->mask);
channel_set_bit(dw, MASK.ERROR, dwc->mask);
spin_unlock_bh(&dwc->lock);
dev_dbg(&chan->dev,
"alloc_chan_resources allocated %d descriptors\n", i);
return i;
}
static void dwc_free_chan_resources(struct dma_chan *chan)
{
struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
struct dw_dma *dw = to_dw_dma(chan->device);
struct dw_desc *desc, *_desc;
LIST_HEAD(list);
dev_dbg(&chan->dev, "free_chan_resources (descs allocated=%u)\n",
dwc->descs_allocated);
/* ASSERT: channel is idle */
BUG_ON(!list_empty(&dwc->active_list));
BUG_ON(!list_empty(&dwc->queue));
BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask);
spin_lock_bh(&dwc->lock);
list_splice_init(&dwc->free_list, &list);
dwc->descs_allocated = 0;
dwc->dws = NULL;
/* Disable interrupts */
channel_clear_bit(dw, MASK.XFER, dwc->mask);
channel_clear_bit(dw, MASK.BLOCK, dwc->mask);
channel_clear_bit(dw, MASK.ERROR, dwc->mask);
spin_unlock_bh(&dwc->lock);
list_for_each_entry_safe(desc, _desc, &list, desc_node) {
dev_vdbg(&chan->dev, " freeing descriptor %p\n", desc);
dma_unmap_single(chan->dev.parent, desc->txd.phys,
sizeof(desc->lli), DMA_TO_DEVICE);
kfree(desc);
}
dev_vdbg(&chan->dev, "free_chan_resources done\n");
}
/*----------------------------------------------------------------------*/
static void dw_dma_off(struct dw_dma *dw)
{
dma_writel(dw, CFG, 0);
channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
while (dma_readl(dw, CFG) & DW_CFG_DMA_EN)
cpu_relax();
}
static int __init dw_probe(struct platform_device *pdev)
{
struct dw_dma_platform_data *pdata;
struct resource *io;
struct dw_dma *dw;
size_t size;
int irq;
int err;
int i;
pdata = pdev->dev.platform_data;
if (!pdata || pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS)
return -EINVAL;
io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!io)
return -EINVAL;
irq = platform_get_irq(pdev, 0);
if (irq < 0)
return irq;
size = sizeof(struct dw_dma);
size += pdata->nr_channels * sizeof(struct dw_dma_chan);
dw = kzalloc(size, GFP_KERNEL);
if (!dw)
return -ENOMEM;
if (!request_mem_region(io->start, DW_REGLEN, pdev->dev.driver->name)) {
err = -EBUSY;
goto err_kfree;
}
memset(dw, 0, sizeof *dw);
dw->regs = ioremap(io->start, DW_REGLEN);
if (!dw->regs) {
err = -ENOMEM;
goto err_release_r;
}
dw->clk = clk_get(&pdev->dev, "hclk");
if (IS_ERR(dw->clk)) {
err = PTR_ERR(dw->clk);
goto err_clk;
}
clk_enable(dw->clk);
/* force dma off, just in case */
dw_dma_off(dw);
err = request_irq(irq, dw_dma_interrupt, 0, "dw_dmac", dw);
if (err)
goto err_irq;
platform_set_drvdata(pdev, dw);
tasklet_init(&dw->tasklet, dw_dma_tasklet, (unsigned long)dw);
dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
INIT_LIST_HEAD(&dw->dma.channels);
for (i = 0; i < pdata->nr_channels; i++, dw->dma.chancnt++) {
struct dw_dma_chan *dwc = &dw->chan[i];
dwc->chan.device = &dw->dma;
dwc->chan.cookie = dwc->completed = 1;
dwc->chan.chan_id = i;
list_add_tail(&dwc->chan.device_node, &dw->dma.channels);
dwc->ch_regs = &__dw_regs(dw)->CHAN[i];
spin_lock_init(&dwc->lock);
dwc->mask = 1 << i;
INIT_LIST_HEAD(&dwc->active_list);
INIT_LIST_HEAD(&dwc->queue);
INIT_LIST_HEAD(&dwc->free_list);
channel_clear_bit(dw, CH_EN, dwc->mask);
}
/* Clear/disable all interrupts on all channels. */
dma_writel(dw, CLEAR.XFER, dw->all_chan_mask);
dma_writel(dw, CLEAR.BLOCK, dw->all_chan_mask);
dma_writel(dw, CLEAR.SRC_TRAN, dw->all_chan_mask);
dma_writel(dw, CLEAR.DST_TRAN, dw->all_chan_mask);
dma_writel(dw, CLEAR.ERROR, dw->all_chan_mask);
channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
dma_cap_set(DMA_SLAVE, dw->dma.cap_mask);
dw->dma.dev = &pdev->dev;
dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources;
dw->dma.device_free_chan_resources = dwc_free_chan_resources;
dw->dma.device_prep_dma_memcpy = dwc_prep_dma_memcpy;
dw->dma.device_prep_slave_sg = dwc_prep_slave_sg;
dw->dma.device_terminate_all = dwc_terminate_all;
dw->dma.device_is_tx_complete = dwc_is_tx_complete;
dw->dma.device_issue_pending = dwc_issue_pending;
dma_writel(dw, CFG, DW_CFG_DMA_EN);
printk(KERN_INFO "%s: DesignWare DMA Controller, %d channels\n",
pdev->dev.bus_id, dw->dma.chancnt);
dma_async_device_register(&dw->dma);
return 0;
err_irq:
clk_disable(dw->clk);
clk_put(dw->clk);
err_clk:
iounmap(dw->regs);
dw->regs = NULL;
err_release_r:
release_resource(io);
err_kfree:
kfree(dw);
return err;
}
static int __exit dw_remove(struct platform_device *pdev)
{
struct dw_dma *dw = platform_get_drvdata(pdev);
struct dw_dma_chan *dwc, *_dwc;
struct resource *io;
dw_dma_off(dw);
dma_async_device_unregister(&dw->dma);
free_irq(platform_get_irq(pdev, 0), dw);
tasklet_kill(&dw->tasklet);
list_for_each_entry_safe(dwc, _dwc, &dw->dma.channels,
chan.device_node) {
list_del(&dwc->chan.device_node);
channel_clear_bit(dw, CH_EN, dwc->mask);
}
clk_disable(dw->clk);
clk_put(dw->clk);
iounmap(dw->regs);
dw->regs = NULL;
io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
release_mem_region(io->start, DW_REGLEN);
kfree(dw);
return 0;
}
static void dw_shutdown(struct platform_device *pdev)
{
struct dw_dma *dw = platform_get_drvdata(pdev);
dw_dma_off(platform_get_drvdata(pdev));
clk_disable(dw->clk);
}
static int dw_suspend_late(struct platform_device *pdev, pm_message_t mesg)
{
struct dw_dma *dw = platform_get_drvdata(pdev);
dw_dma_off(platform_get_drvdata(pdev));
clk_disable(dw->clk);
return 0;
}
static int dw_resume_early(struct platform_device *pdev)
{
struct dw_dma *dw = platform_get_drvdata(pdev);
clk_enable(dw->clk);
dma_writel(dw, CFG, DW_CFG_DMA_EN);
return 0;
}
static struct platform_driver dw_driver = {
.remove = __exit_p(dw_remove),
.shutdown = dw_shutdown,
.suspend_late = dw_suspend_late,
.resume_early = dw_resume_early,
.driver = {
.name = "dw_dmac",
},
};
static int __init dw_init(void)
{
return platform_driver_probe(&dw_driver, dw_probe);
}
module_init(dw_init);
static void __exit dw_exit(void)
{
platform_driver_unregister(&dw_driver);
}
module_exit(dw_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller driver");
MODULE_AUTHOR("Haavard Skinnemoen <haavard.skinnemoen@atmel.com>");
/*
* Driver for the Synopsys DesignWare AHB DMA Controller
*
* Copyright (C) 2005-2007 Atmel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/dw_dmac.h>
#define DW_DMA_MAX_NR_CHANNELS 8
/*
* Redefine this macro to handle differences between 32- and 64-bit
* addressing, big vs. little endian, etc.
*/
#define DW_REG(name) u32 name; u32 __pad_##name
/* Hardware register definitions. */
struct dw_dma_chan_regs {
DW_REG(SAR); /* Source Address Register */
DW_REG(DAR); /* Destination Address Register */
DW_REG(LLP); /* Linked List Pointer */
u32 CTL_LO; /* Control Register Low */
u32 CTL_HI; /* Control Register High */
DW_REG(SSTAT);
DW_REG(DSTAT);
DW_REG(SSTATAR);
DW_REG(DSTATAR);
u32 CFG_LO; /* Configuration Register Low */
u32 CFG_HI; /* Configuration Register High */
DW_REG(SGR);
DW_REG(DSR);
};
struct dw_dma_irq_regs {
DW_REG(XFER);
DW_REG(BLOCK);
DW_REG(SRC_TRAN);
DW_REG(DST_TRAN);
DW_REG(ERROR);
};
struct dw_dma_regs {
/* per-channel registers */
struct dw_dma_chan_regs CHAN[DW_DMA_MAX_NR_CHANNELS];
/* irq handling */
struct dw_dma_irq_regs RAW; /* r */
struct dw_dma_irq_regs STATUS; /* r (raw & mask) */
struct dw_dma_irq_regs MASK; /* rw (set = irq enabled) */
struct dw_dma_irq_regs CLEAR; /* w (ack, affects "raw") */
DW_REG(STATUS_INT); /* r */
/* software handshaking */
DW_REG(REQ_SRC);
DW_REG(REQ_DST);
DW_REG(SGL_REQ_SRC);
DW_REG(SGL_REQ_DST);
DW_REG(LAST_SRC);
DW_REG(LAST_DST);
/* miscellaneous */
DW_REG(CFG);
DW_REG(CH_EN);
DW_REG(ID);
DW_REG(TEST);
/* optional encoded params, 0x3c8..0x3 */
};
/* Bitfields in CTL_LO */
#define DWC_CTLL_INT_EN (1 << 0) /* irqs enabled? */
#define DWC_CTLL_DST_WIDTH(n) ((n)<<1) /* bytes per element */
#define DWC_CTLL_SRC_WIDTH(n) ((n)<<4)
#define DWC_CTLL_DST_INC (0<<7) /* DAR update/not */
#define DWC_CTLL_DST_DEC (1<<7)
#define DWC_CTLL_DST_FIX (2<<7)
#define DWC_CTLL_SRC_INC (0<<7) /* SAR update/not */
#define DWC_CTLL_SRC_DEC (1<<9)
#define DWC_CTLL_SRC_FIX (2<<9)
#define DWC_CTLL_DST_MSIZE(n) ((n)<<11) /* burst, #elements */
#define DWC_CTLL_SRC_MSIZE(n) ((n)<<14)
#define DWC_CTLL_S_GATH_EN (1 << 17) /* src gather, !FIX */
#define DWC_CTLL_D_SCAT_EN (1 << 18) /* dst scatter, !FIX */
#define DWC_CTLL_FC_M2M (0 << 20) /* mem-to-mem */
#define DWC_CTLL_FC_M2P (1 << 20) /* mem-to-periph */
#define DWC_CTLL_FC_P2M (2 << 20) /* periph-to-mem */
#define DWC_CTLL_FC_P2P (3 << 20) /* periph-to-periph */
/* plus 4 transfer types for peripheral-as-flow-controller */
#define DWC_CTLL_DMS(n) ((n)<<23) /* dst master select */
#define DWC_CTLL_SMS(n) ((n)<<25) /* src master select */
#define DWC_CTLL_LLP_D_EN (1 << 27) /* dest block chain */
#define DWC_CTLL_LLP_S_EN (1 << 28) /* src block chain */
/* Bitfields in CTL_HI */
#define DWC_CTLH_DONE 0x00001000
#define DWC_CTLH_BLOCK_TS_MASK 0x00000fff
/* Bitfields in CFG_LO. Platform-configurable bits are in <linux/dw_dmac.h> */
#define DWC_CFGL_CH_SUSP (1 << 8) /* pause xfer */
#define DWC_CFGL_FIFO_EMPTY (1 << 9) /* pause xfer */
#define DWC_CFGL_HS_DST (1 << 10) /* handshake w/dst */
#define DWC_CFGL_HS_SRC (1 << 11) /* handshake w/src */
#define DWC_CFGL_MAX_BURST(x) ((x) << 20)
#define DWC_CFGL_RELOAD_SAR (1 << 30)
#define DWC_CFGL_RELOAD_DAR (1 << 31)
/* Bitfields in CFG_HI. Platform-configurable bits are in <linux/dw_dmac.h> */
#define DWC_CFGH_DS_UPD_EN (1 << 5)
#define DWC_CFGH_SS_UPD_EN (1 << 6)
/* Bitfields in SGR */
#define DWC_SGR_SGI(x) ((x) << 0)
#define DWC_SGR_SGC(x) ((x) << 20)
/* Bitfields in DSR */
#define DWC_DSR_DSI(x) ((x) << 0)
#define DWC_DSR_DSC(x) ((x) << 20)
/* Bitfields in CFG */
#define DW_CFG_DMA_EN (1 << 0)
#define DW_REGLEN 0x400
struct dw_dma_chan {
struct dma_chan chan;
void __iomem *ch_regs;
u8 mask;
spinlock_t lock;
/* these other elements are all protected by lock */
dma_cookie_t completed;
struct list_head active_list;
struct list_head queue;
struct list_head free_list;
struct dw_dma_slave *dws;
unsigned int descs_allocated;
};
static inline struct dw_dma_chan_regs __iomem *
__dwc_regs(struct dw_dma_chan *dwc)
{
return dwc->ch_regs;
}
#define channel_readl(dwc, name) \
__raw_readl(&(__dwc_regs(dwc)->name))
#define channel_writel(dwc, name, val) \
__raw_writel((val), &(__dwc_regs(dwc)->name))
static inline struct dw_dma_chan *to_dw_dma_chan(struct dma_chan *chan)
{
return container_of(chan, struct dw_dma_chan, chan);
}
struct dw_dma {
struct dma_device dma;
void __iomem *regs;
struct tasklet_struct tasklet;
struct clk *clk;
u8 all_chan_mask;
struct dw_dma_chan chan[0];
};
static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw)
{
return dw->regs;
}
#define dma_readl(dw, name) \
__raw_readl(&(__dw_regs(dw)->name))
#define dma_writel(dw, name, val) \
__raw_writel((val), &(__dw_regs(dw)->name))
#define channel_set_bit(dw, reg, mask) \
dma_writel(dw, reg, ((mask) << 8) | (mask))
#define channel_clear_bit(dw, reg, mask) \
dma_writel(dw, reg, ((mask) << 8) | 0)
static inline struct dw_dma *to_dw_dma(struct dma_device *ddev)
{
return container_of(ddev, struct dw_dma, dma);
}
/* LLI == Linked List Item; a.k.a. DMA block descriptor */
struct dw_lli {
/* values that are not changed by hardware */
dma_addr_t sar;
dma_addr_t dar;
dma_addr_t llp; /* chain to next lli */
u32 ctllo;
/* values that may get written back: */
u32 ctlhi;
/* sstat and dstat can snapshot peripheral register state.
* silicon config may discard either or both...
*/
u32 sstat;
u32 dstat;
};
struct dw_desc {
/* FIRST values the hardware uses */
struct dw_lli lli;
/* THEN values for driver housekeeping */
struct list_head desc_node;
struct dma_async_tx_descriptor txd;
size_t len;
};
static inline struct dw_desc *
txd_to_dw_desc(struct dma_async_tx_descriptor *txd)
{
return container_of(txd, struct dw_desc, txd);
}
...@@ -366,7 +366,8 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor( ...@@ -366,7 +366,8 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
* *
* Return - The number of descriptors allocated. * Return - The number of descriptors allocated.
*/ */
static int fsl_dma_alloc_chan_resources(struct dma_chan *chan) static int fsl_dma_alloc_chan_resources(struct dma_chan *chan,
struct dma_client *client)
{ {
struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan); struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
LIST_HEAD(tmp_list); LIST_HEAD(tmp_list);
...@@ -809,8 +810,7 @@ static int fsl_dma_self_test(struct fsl_dma_chan *fsl_chan) ...@@ -809,8 +810,7 @@ static int fsl_dma_self_test(struct fsl_dma_chan *fsl_chan)
if (!src) { if (!src) {
dev_err(fsl_chan->dev, dev_err(fsl_chan->dev,
"selftest: Cannot alloc memory for test!\n"); "selftest: Cannot alloc memory for test!\n");
err = -ENOMEM; return -ENOMEM;
goto out;
} }
dest = src + test_size; dest = src + test_size;
...@@ -820,7 +820,7 @@ static int fsl_dma_self_test(struct fsl_dma_chan *fsl_chan) ...@@ -820,7 +820,7 @@ static int fsl_dma_self_test(struct fsl_dma_chan *fsl_chan)
chan = &fsl_chan->common; chan = &fsl_chan->common;
if (fsl_dma_alloc_chan_resources(chan) < 1) { if (fsl_dma_alloc_chan_resources(chan, NULL) < 1) {
dev_err(fsl_chan->dev, dev_err(fsl_chan->dev,
"selftest: Cannot alloc resources for DMA\n"); "selftest: Cannot alloc resources for DMA\n");
err = -ENODEV; err = -ENODEV;
...@@ -842,13 +842,13 @@ static int fsl_dma_self_test(struct fsl_dma_chan *fsl_chan) ...@@ -842,13 +842,13 @@ static int fsl_dma_self_test(struct fsl_dma_chan *fsl_chan)
if (fsl_dma_is_complete(chan, cookie, NULL, NULL) != DMA_SUCCESS) { if (fsl_dma_is_complete(chan, cookie, NULL, NULL) != DMA_SUCCESS) {
dev_err(fsl_chan->dev, "selftest: Time out!\n"); dev_err(fsl_chan->dev, "selftest: Time out!\n");
err = -ENODEV; err = -ENODEV;
goto out; goto free_resources;
} }
/* Test free and re-alloc channel resources */ /* Test free and re-alloc channel resources */
fsl_dma_free_chan_resources(chan); fsl_dma_free_chan_resources(chan);
if (fsl_dma_alloc_chan_resources(chan) < 1) { if (fsl_dma_alloc_chan_resources(chan, NULL) < 1) {
dev_err(fsl_chan->dev, dev_err(fsl_chan->dev,
"selftest: Cannot alloc resources for DMA\n"); "selftest: Cannot alloc resources for DMA\n");
err = -ENODEV; err = -ENODEV;
...@@ -927,8 +927,7 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev, ...@@ -927,8 +927,7 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
if (!new_fsl_chan) { if (!new_fsl_chan) {
dev_err(&dev->dev, "No free memory for allocating " dev_err(&dev->dev, "No free memory for allocating "
"dma channels!\n"); "dma channels!\n");
err = -ENOMEM; return -ENOMEM;
goto err;
} }
/* get dma channel register base */ /* get dma channel register base */
...@@ -936,7 +935,7 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev, ...@@ -936,7 +935,7 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
if (err) { if (err) {
dev_err(&dev->dev, "Can't get %s property 'reg'\n", dev_err(&dev->dev, "Can't get %s property 'reg'\n",
dev->node->full_name); dev->node->full_name);
goto err; goto err_no_reg;
} }
new_fsl_chan->feature = *(u32 *)match->data; new_fsl_chan->feature = *(u32 *)match->data;
...@@ -958,7 +957,7 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev, ...@@ -958,7 +957,7 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
dev_err(&dev->dev, "There is no %d channel!\n", dev_err(&dev->dev, "There is no %d channel!\n",
new_fsl_chan->id); new_fsl_chan->id);
err = -EINVAL; err = -EINVAL;
goto err; goto err_no_chan;
} }
fdev->chan[new_fsl_chan->id] = new_fsl_chan; fdev->chan[new_fsl_chan->id] = new_fsl_chan;
tasklet_init(&new_fsl_chan->tasklet, dma_do_tasklet, tasklet_init(&new_fsl_chan->tasklet, dma_do_tasklet,
...@@ -997,23 +996,26 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev, ...@@ -997,23 +996,26 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev,
if (err) { if (err) {
dev_err(&dev->dev, "DMA channel %s request_irq error " dev_err(&dev->dev, "DMA channel %s request_irq error "
"with return %d\n", dev->node->full_name, err); "with return %d\n", dev->node->full_name, err);
goto err; goto err_no_irq;
} }
} }
err = fsl_dma_self_test(new_fsl_chan); err = fsl_dma_self_test(new_fsl_chan);
if (err) if (err)
goto err; goto err_self_test;
dev_info(&dev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id, dev_info(&dev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id,
match->compatible, new_fsl_chan->irq); match->compatible, new_fsl_chan->irq);
return 0; return 0;
err:
dma_halt(new_fsl_chan); err_self_test:
iounmap(new_fsl_chan->reg_base);
free_irq(new_fsl_chan->irq, new_fsl_chan); free_irq(new_fsl_chan->irq, new_fsl_chan);
err_no_irq:
list_del(&new_fsl_chan->common.device_node); list_del(&new_fsl_chan->common.device_node);
err_no_chan:
iounmap(new_fsl_chan->reg_base);
err_no_reg:
kfree(new_fsl_chan); kfree(new_fsl_chan);
return err; return err;
} }
...@@ -1054,8 +1056,7 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev, ...@@ -1054,8 +1056,7 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
fdev = kzalloc(sizeof(struct fsl_dma_device), GFP_KERNEL); fdev = kzalloc(sizeof(struct fsl_dma_device), GFP_KERNEL);
if (!fdev) { if (!fdev) {
dev_err(&dev->dev, "No enough memory for 'priv'\n"); dev_err(&dev->dev, "No enough memory for 'priv'\n");
err = -ENOMEM; return -ENOMEM;
goto err;
} }
fdev->dev = &dev->dev; fdev->dev = &dev->dev;
INIT_LIST_HEAD(&fdev->common.channels); INIT_LIST_HEAD(&fdev->common.channels);
...@@ -1065,7 +1066,7 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev, ...@@ -1065,7 +1066,7 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
if (err) { if (err) {
dev_err(&dev->dev, "Can't get %s property 'reg'\n", dev_err(&dev->dev, "Can't get %s property 'reg'\n",
dev->node->full_name); dev->node->full_name);
goto err; goto err_no_reg;
} }
dev_info(&dev->dev, "Probe the Freescale DMA driver for %s " dev_info(&dev->dev, "Probe the Freescale DMA driver for %s "
...@@ -1103,6 +1104,7 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev, ...@@ -1103,6 +1104,7 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
err: err:
iounmap(fdev->reg_base); iounmap(fdev->reg_base);
err_no_reg:
kfree(fdev); kfree(fdev);
return err; return err;
} }
......
...@@ -47,6 +47,16 @@ static struct pci_device_id ioat_pci_tbl[] = { ...@@ -47,6 +47,16 @@ static struct pci_device_id ioat_pci_tbl[] = {
/* I/OAT v2 platforms */ /* I/OAT v2 platforms */
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
/* I/OAT v3 platforms */
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
{ 0, } { 0, }
}; };
...@@ -83,6 +93,11 @@ static int ioat_setup_functionality(struct pci_dev *pdev, void __iomem *iobase) ...@@ -83,6 +93,11 @@ static int ioat_setup_functionality(struct pci_dev *pdev, void __iomem *iobase)
if (device->dma && ioat_dca_enabled) if (device->dma && ioat_dca_enabled)
device->dca = ioat2_dca_init(pdev, iobase); device->dca = ioat2_dca_init(pdev, iobase);
break; break;
case IOAT_VER_3_0:
device->dma = ioat_dma_probe(pdev, iobase);
if (device->dma && ioat_dca_enabled)
device->dca = ioat3_dca_init(pdev, iobase);
break;
default: default:
err = -ENODEV; err = -ENODEV;
break; break;
......
...@@ -37,12 +37,18 @@ ...@@ -37,12 +37,18 @@
#include "ioatdma_registers.h" #include "ioatdma_registers.h"
/* /*
* Bit 16 of a tag map entry is the "valid" bit, if it is set then bits 0:15 * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
* contain the bit number of the APIC ID to map into the DCA tag. If the valid * contain the bit number of the APIC ID to map into the DCA tag. If the valid
* bit is not set, then the value must be 0 or 1 and defines the bit in the tag. * bit is not set, then the value must be 0 or 1 and defines the bit in the tag.
*/ */
#define DCA_TAG_MAP_VALID 0x80 #define DCA_TAG_MAP_VALID 0x80
#define DCA3_TAG_MAP_BIT_TO_INV 0x80
#define DCA3_TAG_MAP_BIT_TO_SEL 0x40
#define DCA3_TAG_MAP_LITERAL_VAL 0x1
#define DCA_TAG_MAP_MASK 0xDF
/* /*
* "Legacy" DCA systems do not implement the DCA register set in the * "Legacy" DCA systems do not implement the DCA register set in the
* I/OAT device. Software needs direct support for their tag mappings. * I/OAT device. Software needs direct support for their tag mappings.
...@@ -95,6 +101,7 @@ struct ioat_dca_slot { ...@@ -95,6 +101,7 @@ struct ioat_dca_slot {
}; };
#define IOAT_DCA_MAX_REQ 6 #define IOAT_DCA_MAX_REQ 6
#define IOAT3_DCA_MAX_REQ 2
struct ioat_dca_priv { struct ioat_dca_priv {
void __iomem *iobase; void __iomem *iobase;
...@@ -171,7 +178,9 @@ static int ioat_dca_remove_requester(struct dca_provider *dca, ...@@ -171,7 +178,9 @@ static int ioat_dca_remove_requester(struct dca_provider *dca,
return -ENODEV; return -ENODEV;
} }
static u8 ioat_dca_get_tag(struct dca_provider *dca, int cpu) static u8 ioat_dca_get_tag(struct dca_provider *dca,
struct device *dev,
int cpu)
{ {
struct ioat_dca_priv *ioatdca = dca_priv(dca); struct ioat_dca_priv *ioatdca = dca_priv(dca);
int i, apic_id, bit, value; int i, apic_id, bit, value;
...@@ -193,10 +202,26 @@ static u8 ioat_dca_get_tag(struct dca_provider *dca, int cpu) ...@@ -193,10 +202,26 @@ static u8 ioat_dca_get_tag(struct dca_provider *dca, int cpu)
return tag; return tag;
} }
static int ioat_dca_dev_managed(struct dca_provider *dca,
struct device *dev)
{
struct ioat_dca_priv *ioatdca = dca_priv(dca);
struct pci_dev *pdev;
int i;
pdev = to_pci_dev(dev);
for (i = 0; i < ioatdca->max_requesters; i++) {
if (ioatdca->req_slots[i].pdev == pdev)
return 1;
}
return 0;
}
static struct dca_ops ioat_dca_ops = { static struct dca_ops ioat_dca_ops = {
.add_requester = ioat_dca_add_requester, .add_requester = ioat_dca_add_requester,
.remove_requester = ioat_dca_remove_requester, .remove_requester = ioat_dca_remove_requester,
.get_tag = ioat_dca_get_tag, .get_tag = ioat_dca_get_tag,
.dev_managed = ioat_dca_dev_managed,
}; };
...@@ -207,6 +232,8 @@ struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) ...@@ -207,6 +232,8 @@ struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
u8 *tag_map = NULL; u8 *tag_map = NULL;
int i; int i;
int err; int err;
u8 version;
u8 max_requesters;
if (!system_has_dca_enabled(pdev)) if (!system_has_dca_enabled(pdev))
return NULL; return NULL;
...@@ -237,15 +264,20 @@ struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) ...@@ -237,15 +264,20 @@ struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
if (tag_map == NULL) if (tag_map == NULL)
return NULL; return NULL;
version = readb(iobase + IOAT_VER_OFFSET);
if (version == IOAT_VER_3_0)
max_requesters = IOAT3_DCA_MAX_REQ;
else
max_requesters = IOAT_DCA_MAX_REQ;
dca = alloc_dca_provider(&ioat_dca_ops, dca = alloc_dca_provider(&ioat_dca_ops,
sizeof(*ioatdca) + sizeof(*ioatdca) +
(sizeof(struct ioat_dca_slot) * IOAT_DCA_MAX_REQ)); (sizeof(struct ioat_dca_slot) * max_requesters));
if (!dca) if (!dca)
return NULL; return NULL;
ioatdca = dca_priv(dca); ioatdca = dca_priv(dca);
ioatdca->max_requesters = IOAT_DCA_MAX_REQ; ioatdca->max_requesters = max_requesters;
ioatdca->dca_base = iobase + 0x54; ioatdca->dca_base = iobase + 0x54;
/* copy over the APIC ID to DCA tag mapping */ /* copy over the APIC ID to DCA tag mapping */
...@@ -323,11 +355,13 @@ static int ioat2_dca_remove_requester(struct dca_provider *dca, ...@@ -323,11 +355,13 @@ static int ioat2_dca_remove_requester(struct dca_provider *dca,
return -ENODEV; return -ENODEV;
} }
static u8 ioat2_dca_get_tag(struct dca_provider *dca, int cpu) static u8 ioat2_dca_get_tag(struct dca_provider *dca,
struct device *dev,
int cpu)
{ {
u8 tag; u8 tag;
tag = ioat_dca_get_tag(dca, cpu); tag = ioat_dca_get_tag(dca, dev, cpu);
tag = (~tag) & 0x1F; tag = (~tag) & 0x1F;
return tag; return tag;
} }
...@@ -336,6 +370,7 @@ static struct dca_ops ioat2_dca_ops = { ...@@ -336,6 +370,7 @@ static struct dca_ops ioat2_dca_ops = {
.add_requester = ioat2_dca_add_requester, .add_requester = ioat2_dca_add_requester,
.remove_requester = ioat2_dca_remove_requester, .remove_requester = ioat2_dca_remove_requester,
.get_tag = ioat2_dca_get_tag, .get_tag = ioat2_dca_get_tag,
.dev_managed = ioat_dca_dev_managed,
}; };
static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset) static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
...@@ -425,3 +460,198 @@ struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) ...@@ -425,3 +460,198 @@ struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
return dca; return dca;
} }
static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev)
{
struct ioat_dca_priv *ioatdca = dca_priv(dca);
struct pci_dev *pdev;
int i;
u16 id;
u16 global_req_table;
/* This implementation only supports PCI-Express */
if (dev->bus != &pci_bus_type)
return -ENODEV;
pdev = to_pci_dev(dev);
id = dcaid_from_pcidev(pdev);
if (ioatdca->requester_count == ioatdca->max_requesters)
return -ENODEV;
for (i = 0; i < ioatdca->max_requesters; i++) {
if (ioatdca->req_slots[i].pdev == NULL) {
/* found an empty slot */
ioatdca->requester_count++;
ioatdca->req_slots[i].pdev = pdev;
ioatdca->req_slots[i].rid = id;
global_req_table =
readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
writel(id | IOAT_DCA_GREQID_VALID,
ioatdca->iobase + global_req_table + (i * 4));
return i;
}
}
/* Error, ioatdma->requester_count is out of whack */
return -EFAULT;
}
static int ioat3_dca_remove_requester(struct dca_provider *dca,
struct device *dev)
{
struct ioat_dca_priv *ioatdca = dca_priv(dca);
struct pci_dev *pdev;
int i;
u16 global_req_table;
/* This implementation only supports PCI-Express */
if (dev->bus != &pci_bus_type)
return -ENODEV;
pdev = to_pci_dev(dev);
for (i = 0; i < ioatdca->max_requesters; i++) {
if (ioatdca->req_slots[i].pdev == pdev) {
global_req_table =
readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
writel(0, ioatdca->iobase + global_req_table + (i * 4));
ioatdca->req_slots[i].pdev = NULL;
ioatdca->req_slots[i].rid = 0;
ioatdca->requester_count--;
return i;
}
}
return -ENODEV;
}
static u8 ioat3_dca_get_tag(struct dca_provider *dca,
struct device *dev,
int cpu)
{
u8 tag;
struct ioat_dca_priv *ioatdca = dca_priv(dca);
int i, apic_id, bit, value;
u8 entry;
tag = 0;
apic_id = cpu_physical_id(cpu);
for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
entry = ioatdca->tag_map[i];
if (entry & DCA3_TAG_MAP_BIT_TO_SEL) {
bit = entry &
~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV);
value = (apic_id & (1 << bit)) ? 1 : 0;
} else if (entry & DCA3_TAG_MAP_BIT_TO_INV) {
bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV;
value = (apic_id & (1 << bit)) ? 0 : 1;
} else {
value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0;
}
tag |= (value << i);
}
return tag;
}
static struct dca_ops ioat3_dca_ops = {
.add_requester = ioat3_dca_add_requester,
.remove_requester = ioat3_dca_remove_requester,
.get_tag = ioat3_dca_get_tag,
.dev_managed = ioat_dca_dev_managed,
};
static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
{
int slots = 0;
u32 req;
u16 global_req_table;
global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET);
if (global_req_table == 0)
return 0;
do {
req = readl(iobase + global_req_table + (slots * sizeof(u32)));
slots++;
} while ((req & IOAT_DCA_GREQID_LASTID) == 0);
return slots;
}
struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
{
struct dca_provider *dca;
struct ioat_dca_priv *ioatdca;
int slots;
int i;
int err;
u16 dca_offset;
u16 csi_fsb_control;
u16 pcie_control;
u8 bit;
union {
u64 full;
struct {
u32 low;
u32 high;
};
} tag_map;
if (!system_has_dca_enabled(pdev))
return NULL;
dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
if (dca_offset == 0)
return NULL;
slots = ioat3_dca_count_dca_slots(iobase, dca_offset);
if (slots == 0)
return NULL;
dca = alloc_dca_provider(&ioat3_dca_ops,
sizeof(*ioatdca)
+ (sizeof(struct ioat_dca_slot) * slots));
if (!dca)
return NULL;
ioatdca = dca_priv(dca);
ioatdca->iobase = iobase;
ioatdca->dca_base = iobase + dca_offset;
ioatdca->max_requesters = slots;
/* some bios might not know to turn these on */
csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) {
csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH;
writew(csi_fsb_control,
ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
}
pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) {
pcie_control |= IOAT3_PCI_CONTROL_MEMWR;
writew(pcie_control,
ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
}
/* TODO version, compatibility and configuration checks */
/* copy out the APIC to DCA tag map */
tag_map.low =
readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW);
tag_map.high =
readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH);
for (i = 0; i < 8; i++) {
bit = tag_map.full >> (8 * i);
ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK;
}
err = register_dca_provider(dca, &pdev->dev);
if (err) {
free_dca_provider(dca);
return NULL;
}
return dca;
}
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include <linux/dmaengine.h> #include <linux/dmaengine.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
#include <linux/workqueue.h>
#include "ioatdma.h" #include "ioatdma.h"
#include "ioatdma_registers.h" #include "ioatdma_registers.h"
#include "ioatdma_hw.h" #include "ioatdma_hw.h"
...@@ -41,11 +42,23 @@ ...@@ -41,11 +42,23 @@
#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) #define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx)
#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
static int ioat_pending_level = 4; static int ioat_pending_level = 4;
module_param(ioat_pending_level, int, 0644); module_param(ioat_pending_level, int, 0644);
MODULE_PARM_DESC(ioat_pending_level, MODULE_PARM_DESC(ioat_pending_level,
"high-water mark for pushing ioat descriptors (default: 4)"); "high-water mark for pushing ioat descriptors (default: 4)");
#define RESET_DELAY msecs_to_jiffies(100)
#define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000))
static void ioat_dma_chan_reset_part2(struct work_struct *work);
static void ioat_dma_chan_watchdog(struct work_struct *work);
/*
* workaround for IOAT ver.3.0 null descriptor issue
* (channel returns error when size is 0)
*/
#define NULL_DESC_BUFFER_SIZE 1
/* internal functions */ /* internal functions */
static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan); static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
...@@ -122,6 +135,38 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) ...@@ -122,6 +135,38 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device)
int i; int i;
struct ioat_dma_chan *ioat_chan; struct ioat_dma_chan *ioat_chan;
/*
* IOAT ver.3 workarounds
*/
if (device->version == IOAT_VER_3_0) {
u32 chan_err_mask;
u16 dev_id;
u32 dmauncerrsts;
/*
* Write CHANERRMSK_INT with 3E07h to mask out the errors
* that can cause stability issues for IOAT ver.3
*/
chan_err_mask = 0x3E07;
pci_write_config_dword(device->pdev,
IOAT_PCI_CHANERRMASK_INT_OFFSET,
chan_err_mask);
/*
* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
* (workaround for spurious config parity error after restart)
*/
pci_read_config_word(device->pdev,
IOAT_PCI_DEVICE_ID_OFFSET,
&dev_id);
if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
dmauncerrsts = 0x10;
pci_write_config_dword(device->pdev,
IOAT_PCI_DMAUNCERRSTS_OFFSET,
dmauncerrsts);
}
}
device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
...@@ -137,6 +182,7 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) ...@@ -137,6 +182,7 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device)
ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1)); ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1));
ioat_chan->xfercap = xfercap; ioat_chan->xfercap = xfercap;
ioat_chan->desccount = 0; ioat_chan->desccount = 0;
INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2);
if (ioat_chan->device->version != IOAT_VER_1_2) { if (ioat_chan->device->version != IOAT_VER_1_2) {
writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE
| IOAT_DMA_DCA_ANY_CPU, | IOAT_DMA_DCA_ANY_CPU,
...@@ -175,7 +221,7 @@ static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) ...@@ -175,7 +221,7 @@ static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
{ {
struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
if (ioat_chan->pending != 0) { if (ioat_chan->pending > 0) {
spin_lock_bh(&ioat_chan->desc_lock); spin_lock_bh(&ioat_chan->desc_lock);
__ioat1_dma_memcpy_issue_pending(ioat_chan); __ioat1_dma_memcpy_issue_pending(ioat_chan);
spin_unlock_bh(&ioat_chan->desc_lock); spin_unlock_bh(&ioat_chan->desc_lock);
...@@ -194,13 +240,228 @@ static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan) ...@@ -194,13 +240,228 @@ static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan)
{ {
struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
if (ioat_chan->pending != 0) { if (ioat_chan->pending > 0) {
spin_lock_bh(&ioat_chan->desc_lock); spin_lock_bh(&ioat_chan->desc_lock);
__ioat2_dma_memcpy_issue_pending(ioat_chan); __ioat2_dma_memcpy_issue_pending(ioat_chan);
spin_unlock_bh(&ioat_chan->desc_lock); spin_unlock_bh(&ioat_chan->desc_lock);
} }
} }
/**
* ioat_dma_chan_reset_part2 - reinit the channel after a reset
*/
static void ioat_dma_chan_reset_part2(struct work_struct *work)
{
struct ioat_dma_chan *ioat_chan =
container_of(work, struct ioat_dma_chan, work.work);
struct ioat_desc_sw *desc;
spin_lock_bh(&ioat_chan->cleanup_lock);
spin_lock_bh(&ioat_chan->desc_lock);
ioat_chan->completion_virt->low = 0;
ioat_chan->completion_virt->high = 0;
ioat_chan->pending = 0;
/*
* count the descriptors waiting, and be sure to do it
* right for both the CB1 line and the CB2 ring
*/
ioat_chan->dmacount = 0;
if (ioat_chan->used_desc.prev) {
desc = to_ioat_desc(ioat_chan->used_desc.prev);
do {
ioat_chan->dmacount++;
desc = to_ioat_desc(desc->node.next);
} while (&desc->node != ioat_chan->used_desc.next);
}
/*
* write the new starting descriptor address
* this puts channel engine into ARMED state
*/
desc = to_ioat_desc(ioat_chan->used_desc.prev);
switch (ioat_chan->device->version) {
case IOAT_VER_1_2:
writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
writel(((u64) desc->async_tx.phys) >> 32,
ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
+ IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
break;
case IOAT_VER_2_0:
writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
writel(((u64) desc->async_tx.phys) >> 32,
ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
/* tell the engine to go with what's left to be done */
writew(ioat_chan->dmacount,
ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
break;
}
dev_err(&ioat_chan->device->pdev->dev,
"chan%d reset - %d descs waiting, %d total desc\n",
chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
spin_unlock_bh(&ioat_chan->desc_lock);
spin_unlock_bh(&ioat_chan->cleanup_lock);
}
/**
* ioat_dma_reset_channel - restart a channel
* @ioat_chan: IOAT DMA channel handle
*/
static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan)
{
u32 chansts, chanerr;
if (!ioat_chan->used_desc.prev)
return;
chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
chansts = (ioat_chan->completion_virt->low
& IOAT_CHANSTS_DMA_TRANSFER_STATUS);
if (chanerr) {
dev_err(&ioat_chan->device->pdev->dev,
"chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
chan_num(ioat_chan), chansts, chanerr);
writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
}
/*
* whack it upside the head with a reset
* and wait for things to settle out.
* force the pending count to a really big negative
* to make sure no one forces an issue_pending
* while we're waiting.
*/
spin_lock_bh(&ioat_chan->desc_lock);
ioat_chan->pending = INT_MIN;
writeb(IOAT_CHANCMD_RESET,
ioat_chan->reg_base
+ IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
spin_unlock_bh(&ioat_chan->desc_lock);
/* schedule the 2nd half instead of sleeping a long time */
schedule_delayed_work(&ioat_chan->work, RESET_DELAY);
}
/**
* ioat_dma_chan_watchdog - watch for stuck channels
*/
static void ioat_dma_chan_watchdog(struct work_struct *work)
{
struct ioatdma_device *device =
container_of(work, struct ioatdma_device, work.work);
struct ioat_dma_chan *ioat_chan;
int i;
union {
u64 full;
struct {
u32 low;
u32 high;
};
} completion_hw;
unsigned long compl_desc_addr_hw;
for (i = 0; i < device->common.chancnt; i++) {
ioat_chan = ioat_lookup_chan_by_index(device, i);
if (ioat_chan->device->version == IOAT_VER_1_2
/* have we started processing anything yet */
&& ioat_chan->last_completion
/* have we completed any since last watchdog cycle? */
&& (ioat_chan->last_completion ==
ioat_chan->watchdog_completion)
/* has TCP stuck on one cookie since last watchdog? */
&& (ioat_chan->watchdog_tcp_cookie ==
ioat_chan->watchdog_last_tcp_cookie)
&& (ioat_chan->watchdog_tcp_cookie !=
ioat_chan->completed_cookie)
/* is there something in the chain to be processed? */
/* CB1 chain always has at least the last one processed */
&& (ioat_chan->used_desc.prev != ioat_chan->used_desc.next)
&& ioat_chan->pending == 0) {
/*
* check CHANSTS register for completed
* descriptor address.
* if it is different than completion writeback,
* it is not zero
* and it has changed since the last watchdog
* we can assume that channel
* is still working correctly
* and the problem is in completion writeback.
* update completion writeback
* with actual CHANSTS value
* else
* try resetting the channel
*/
completion_hw.low = readl(ioat_chan->reg_base +
IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version));
completion_hw.high = readl(ioat_chan->reg_base +
IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version));
#if (BITS_PER_LONG == 64)
compl_desc_addr_hw =
completion_hw.full
& IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
#else
compl_desc_addr_hw =
completion_hw.low & IOAT_LOW_COMPLETION_MASK;
#endif
if ((compl_desc_addr_hw != 0)
&& (compl_desc_addr_hw != ioat_chan->watchdog_completion)
&& (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) {
ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw;
ioat_chan->completion_virt->low = completion_hw.low;
ioat_chan->completion_virt->high = completion_hw.high;
} else {
ioat_dma_reset_channel(ioat_chan);
ioat_chan->watchdog_completion = 0;
ioat_chan->last_compl_desc_addr_hw = 0;
}
/*
* for version 2.0 if there are descriptors yet to be processed
* and the last completed hasn't changed since the last watchdog
* if they haven't hit the pending level
* issue the pending to push them through
* else
* try resetting the channel
*/
} else if (ioat_chan->device->version == IOAT_VER_2_0
&& ioat_chan->used_desc.prev
&& ioat_chan->last_completion
&& ioat_chan->last_completion == ioat_chan->watchdog_completion) {
if (ioat_chan->pending < ioat_pending_level)
ioat2_dma_memcpy_issue_pending(&ioat_chan->common);
else {
ioat_dma_reset_channel(ioat_chan);
ioat_chan->watchdog_completion = 0;
}
} else {
ioat_chan->last_compl_desc_addr_hw = 0;
ioat_chan->watchdog_completion
= ioat_chan->last_completion;
}
ioat_chan->watchdog_last_tcp_cookie =
ioat_chan->watchdog_tcp_cookie;
}
schedule_delayed_work(&device->work, WATCHDOG_DELAY);
}
static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
{ {
struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
...@@ -250,6 +511,13 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) ...@@ -250,6 +511,13 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
prev = new; prev = new;
} while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan))); } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan)));
if (!new) {
dev_err(&ioat_chan->device->pdev->dev,
"tx submit failed\n");
spin_unlock_bh(&ioat_chan->desc_lock);
return -ENOMEM;
}
hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
if (new->async_tx.callback) { if (new->async_tx.callback) {
hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
...@@ -335,7 +603,14 @@ static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) ...@@ -335,7 +603,14 @@ static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx)
desc_count++; desc_count++;
} while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan))); } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan)));
hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; if (!new) {
dev_err(&ioat_chan->device->pdev->dev,
"tx submit failed\n");
spin_unlock_bh(&ioat_chan->desc_lock);
return -ENOMEM;
}
hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
if (new->async_tx.callback) { if (new->async_tx.callback) {
hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
if (first != new) { if (first != new) {
...@@ -406,6 +681,7 @@ static struct ioat_desc_sw *ioat_dma_alloc_descriptor( ...@@ -406,6 +681,7 @@ static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
desc_sw->async_tx.tx_submit = ioat1_tx_submit; desc_sw->async_tx.tx_submit = ioat1_tx_submit;
break; break;
case IOAT_VER_2_0: case IOAT_VER_2_0:
case IOAT_VER_3_0:
desc_sw->async_tx.tx_submit = ioat2_tx_submit; desc_sw->async_tx.tx_submit = ioat2_tx_submit;
break; break;
} }
...@@ -452,7 +728,8 @@ static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan) ...@@ -452,7 +728,8 @@ static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan)
* ioat_dma_alloc_chan_resources - returns the number of allocated descriptors * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors
* @chan: the channel to be filled out * @chan: the channel to be filled out
*/ */
static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) static int ioat_dma_alloc_chan_resources(struct dma_chan *chan,
struct dma_client *client)
{ {
struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
struct ioat_desc_sw *desc; struct ioat_desc_sw *desc;
...@@ -555,6 +832,7 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan) ...@@ -555,6 +832,7 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan)
} }
break; break;
case IOAT_VER_2_0: case IOAT_VER_2_0:
case IOAT_VER_3_0:
list_for_each_entry_safe(desc, _desc, list_for_each_entry_safe(desc, _desc,
ioat_chan->free_desc.next, node) { ioat_chan->free_desc.next, node) {
list_del(&desc->node); list_del(&desc->node);
...@@ -585,6 +863,10 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan) ...@@ -585,6 +863,10 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan)
ioat_chan->last_completion = ioat_chan->completion_addr = 0; ioat_chan->last_completion = ioat_chan->completion_addr = 0;
ioat_chan->pending = 0; ioat_chan->pending = 0;
ioat_chan->dmacount = 0; ioat_chan->dmacount = 0;
ioat_chan->watchdog_completion = 0;
ioat_chan->last_compl_desc_addr_hw = 0;
ioat_chan->watchdog_tcp_cookie =
ioat_chan->watchdog_last_tcp_cookie = 0;
} }
/** /**
...@@ -640,7 +922,8 @@ ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) ...@@ -640,7 +922,8 @@ ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
/* set up the noop descriptor */ /* set up the noop descriptor */
noop_desc = to_ioat_desc(ioat_chan->used_desc.next); noop_desc = to_ioat_desc(ioat_chan->used_desc.next);
noop_desc->hw->size = 0; /* set size to non-zero value (channel returns error when size is 0) */
noop_desc->hw->size = NULL_DESC_BUFFER_SIZE;
noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
noop_desc->hw->src_addr = 0; noop_desc->hw->src_addr = 0;
noop_desc->hw->dst_addr = 0; noop_desc->hw->dst_addr = 0;
...@@ -690,6 +973,7 @@ static struct ioat_desc_sw *ioat_dma_get_next_descriptor( ...@@ -690,6 +973,7 @@ static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
return ioat1_dma_get_next_descriptor(ioat_chan); return ioat1_dma_get_next_descriptor(ioat_chan);
break; break;
case IOAT_VER_2_0: case IOAT_VER_2_0:
case IOAT_VER_3_0:
return ioat2_dma_get_next_descriptor(ioat_chan); return ioat2_dma_get_next_descriptor(ioat_chan);
break; break;
} }
...@@ -716,8 +1000,12 @@ static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy( ...@@ -716,8 +1000,12 @@ static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy(
new->src = dma_src; new->src = dma_src;
new->async_tx.flags = flags; new->async_tx.flags = flags;
return &new->async_tx; return &new->async_tx;
} else } else {
dev_err(&ioat_chan->device->pdev->dev,
"chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
return NULL; return NULL;
}
} }
static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy( static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy(
...@@ -744,8 +1032,13 @@ static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy( ...@@ -744,8 +1032,13 @@ static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy(
new->src = dma_src; new->src = dma_src;
new->async_tx.flags = flags; new->async_tx.flags = flags;
return &new->async_tx; return &new->async_tx;
} else } else {
spin_unlock_bh(&ioat_chan->desc_lock);
dev_err(&ioat_chan->device->pdev->dev,
"chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
return NULL; return NULL;
}
} }
static void ioat_dma_cleanup_tasklet(unsigned long data) static void ioat_dma_cleanup_tasklet(unsigned long data)
...@@ -756,6 +1049,27 @@ static void ioat_dma_cleanup_tasklet(unsigned long data) ...@@ -756,6 +1049,27 @@ static void ioat_dma_cleanup_tasklet(unsigned long data)
chan->reg_base + IOAT_CHANCTRL_OFFSET); chan->reg_base + IOAT_CHANCTRL_OFFSET);
} }
static void
ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc)
{
/*
* yes we are unmapping both _page and _single
* alloc'd regions with unmap_page. Is this
* *really* that bad?
*/
if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP))
pci_unmap_page(ioat_chan->device->pdev,
pci_unmap_addr(desc, dst),
pci_unmap_len(desc, len),
PCI_DMA_FROMDEVICE);
if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP))
pci_unmap_page(ioat_chan->device->pdev,
pci_unmap_addr(desc, src),
pci_unmap_len(desc, len),
PCI_DMA_TODEVICE);
}
/** /**
* ioat_dma_memcpy_cleanup - cleanup up finished descriptors * ioat_dma_memcpy_cleanup - cleanup up finished descriptors
* @chan: ioat channel to be cleaned up * @chan: ioat channel to be cleaned up
...@@ -799,11 +1113,27 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) ...@@ -799,11 +1113,27 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
if (phys_complete == ioat_chan->last_completion) { if (phys_complete == ioat_chan->last_completion) {
spin_unlock_bh(&ioat_chan->cleanup_lock); spin_unlock_bh(&ioat_chan->cleanup_lock);
/*
* perhaps we're stuck so hard that the watchdog can't go off?
* try to catch it after 2 seconds
*/
if (ioat_chan->device->version != IOAT_VER_3_0) {
if (time_after(jiffies,
ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) {
ioat_dma_chan_watchdog(&(ioat_chan->device->work.work));
ioat_chan->last_completion_time = jiffies;
}
}
return; return;
} }
ioat_chan->last_completion_time = jiffies;
cookie = 0; cookie = 0;
spin_lock_bh(&ioat_chan->desc_lock); if (!spin_trylock_bh(&ioat_chan->desc_lock)) {
spin_unlock_bh(&ioat_chan->cleanup_lock);
return;
}
switch (ioat_chan->device->version) { switch (ioat_chan->device->version) {
case IOAT_VER_1_2: case IOAT_VER_1_2:
list_for_each_entry_safe(desc, _desc, list_for_each_entry_safe(desc, _desc,
...@@ -816,21 +1146,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) ...@@ -816,21 +1146,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
*/ */
if (desc->async_tx.cookie) { if (desc->async_tx.cookie) {
cookie = desc->async_tx.cookie; cookie = desc->async_tx.cookie;
ioat_dma_unmap(ioat_chan, desc);
/*
* yes we are unmapping both _page and _single
* alloc'd regions with unmap_page. Is this
* *really* that bad?
*/
pci_unmap_page(ioat_chan->device->pdev,
pci_unmap_addr(desc, dst),
pci_unmap_len(desc, len),
PCI_DMA_FROMDEVICE);
pci_unmap_page(ioat_chan->device->pdev,
pci_unmap_addr(desc, src),
pci_unmap_len(desc, len),
PCI_DMA_TODEVICE);
if (desc->async_tx.callback) { if (desc->async_tx.callback) {
desc->async_tx.callback(desc->async_tx.callback_param); desc->async_tx.callback(desc->async_tx.callback_param);
desc->async_tx.callback = NULL; desc->async_tx.callback = NULL;
...@@ -862,6 +1178,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) ...@@ -862,6 +1178,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
} }
break; break;
case IOAT_VER_2_0: case IOAT_VER_2_0:
case IOAT_VER_3_0:
/* has some other thread has already cleaned up? */ /* has some other thread has already cleaned up? */
if (ioat_chan->used_desc.prev == NULL) if (ioat_chan->used_desc.prev == NULL)
break; break;
...@@ -889,16 +1206,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) ...@@ -889,16 +1206,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
if (desc->async_tx.cookie) { if (desc->async_tx.cookie) {
cookie = desc->async_tx.cookie; cookie = desc->async_tx.cookie;
desc->async_tx.cookie = 0; desc->async_tx.cookie = 0;
ioat_dma_unmap(ioat_chan, desc);
pci_unmap_page(ioat_chan->device->pdev,
pci_unmap_addr(desc, dst),
pci_unmap_len(desc, len),
PCI_DMA_FROMDEVICE);
pci_unmap_page(ioat_chan->device->pdev,
pci_unmap_addr(desc, src),
pci_unmap_len(desc, len),
PCI_DMA_TODEVICE);
if (desc->async_tx.callback) { if (desc->async_tx.callback) {
desc->async_tx.callback(desc->async_tx.callback_param); desc->async_tx.callback(desc->async_tx.callback_param);
desc->async_tx.callback = NULL; desc->async_tx.callback = NULL;
...@@ -943,6 +1251,7 @@ static enum dma_status ioat_dma_is_complete(struct dma_chan *chan, ...@@ -943,6 +1251,7 @@ static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
last_used = chan->cookie; last_used = chan->cookie;
last_complete = ioat_chan->completed_cookie; last_complete = ioat_chan->completed_cookie;
ioat_chan->watchdog_tcp_cookie = cookie;
if (done) if (done)
*done = last_complete; *done = last_complete;
...@@ -973,10 +1282,19 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) ...@@ -973,10 +1282,19 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
spin_lock_bh(&ioat_chan->desc_lock); spin_lock_bh(&ioat_chan->desc_lock);
desc = ioat_dma_get_next_descriptor(ioat_chan); desc = ioat_dma_get_next_descriptor(ioat_chan);
if (!desc) {
dev_err(&ioat_chan->device->pdev->dev,
"Unable to start null desc - get next desc failed\n");
spin_unlock_bh(&ioat_chan->desc_lock);
return;
}
desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL
| IOAT_DMA_DESCRIPTOR_CTL_INT_GN | IOAT_DMA_DESCRIPTOR_CTL_INT_GN
| IOAT_DMA_DESCRIPTOR_CTL_CP_STS; | IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
desc->hw->size = 0; /* set size to non-zero value (channel returns error when size is 0) */
desc->hw->size = NULL_DESC_BUFFER_SIZE;
desc->hw->src_addr = 0; desc->hw->src_addr = 0;
desc->hw->dst_addr = 0; desc->hw->dst_addr = 0;
async_tx_ack(&desc->async_tx); async_tx_ack(&desc->async_tx);
...@@ -994,6 +1312,7 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) ...@@ -994,6 +1312,7 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
+ IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
break; break;
case IOAT_VER_2_0: case IOAT_VER_2_0:
case IOAT_VER_3_0:
writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
writel(((u64) desc->async_tx.phys) >> 32, writel(((u64) desc->async_tx.phys) >> 32,
...@@ -1049,7 +1368,7 @@ static int ioat_dma_self_test(struct ioatdma_device *device) ...@@ -1049,7 +1368,7 @@ static int ioat_dma_self_test(struct ioatdma_device *device)
dma_chan = container_of(device->common.channels.next, dma_chan = container_of(device->common.channels.next,
struct dma_chan, struct dma_chan,
device_node); device_node);
if (device->common.device_alloc_chan_resources(dma_chan) < 1) { if (device->common.device_alloc_chan_resources(dma_chan, NULL) < 1) {
dev_err(&device->pdev->dev, dev_err(&device->pdev->dev,
"selftest cannot allocate chan resource\n"); "selftest cannot allocate chan resource\n");
err = -ENODEV; err = -ENODEV;
...@@ -1312,6 +1631,7 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, ...@@ -1312,6 +1631,7 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
ioat1_dma_memcpy_issue_pending; ioat1_dma_memcpy_issue_pending;
break; break;
case IOAT_VER_2_0: case IOAT_VER_2_0:
case IOAT_VER_3_0:
device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy; device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy;
device->common.device_issue_pending = device->common.device_issue_pending =
ioat2_dma_memcpy_issue_pending; ioat2_dma_memcpy_issue_pending;
...@@ -1331,8 +1651,16 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, ...@@ -1331,8 +1651,16 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
if (err) if (err)
goto err_self_test; goto err_self_test;
ioat_set_tcp_copy_break(device);
dma_async_device_register(&device->common); dma_async_device_register(&device->common);
if (device->version != IOAT_VER_3_0) {
INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog);
schedule_delayed_work(&device->work,
WATCHDOG_DELAY);
}
return device; return device;
err_self_test: err_self_test:
...@@ -1365,6 +1693,10 @@ void ioat_dma_remove(struct ioatdma_device *device) ...@@ -1365,6 +1693,10 @@ void ioat_dma_remove(struct ioatdma_device *device)
pci_release_regions(device->pdev); pci_release_regions(device->pdev);
pci_disable_device(device->pdev); pci_disable_device(device->pdev);
if (device->version != IOAT_VER_3_0) {
cancel_delayed_work(&device->work);
}
list_for_each_entry_safe(chan, _chan, list_for_each_entry_safe(chan, _chan,
&device->common.channels, device_node) { &device->common.channels, device_node) {
ioat_chan = to_ioat_chan(chan); ioat_chan = to_ioat_chan(chan);
......
...@@ -27,8 +27,9 @@ ...@@ -27,8 +27,9 @@
#include <linux/dmapool.h> #include <linux/dmapool.h>
#include <linux/cache.h> #include <linux/cache.h>
#include <linux/pci_ids.h> #include <linux/pci_ids.h>
#include <net/tcp.h>
#define IOAT_DMA_VERSION "2.04" #define IOAT_DMA_VERSION "3.30"
enum ioat_interrupt { enum ioat_interrupt {
none = 0, none = 0,
...@@ -40,6 +41,7 @@ enum ioat_interrupt { ...@@ -40,6 +41,7 @@ enum ioat_interrupt {
#define IOAT_LOW_COMPLETION_MASK 0xffffffc0 #define IOAT_LOW_COMPLETION_MASK 0xffffffc0
#define IOAT_DMA_DCA_ANY_CPU ~0 #define IOAT_DMA_DCA_ANY_CPU ~0
#define IOAT_WATCHDOG_PERIOD (2 * HZ)
/** /**
...@@ -62,6 +64,7 @@ struct ioatdma_device { ...@@ -62,6 +64,7 @@ struct ioatdma_device {
struct dma_device common; struct dma_device common;
u8 version; u8 version;
enum ioat_interrupt irq_mode; enum ioat_interrupt irq_mode;
struct delayed_work work;
struct msix_entry msix_entries[4]; struct msix_entry msix_entries[4];
struct ioat_dma_chan *idx[4]; struct ioat_dma_chan *idx[4];
}; };
...@@ -75,6 +78,7 @@ struct ioat_dma_chan { ...@@ -75,6 +78,7 @@ struct ioat_dma_chan {
dma_cookie_t completed_cookie; dma_cookie_t completed_cookie;
unsigned long last_completion; unsigned long last_completion;
unsigned long last_completion_time;
size_t xfercap; /* XFERCAP register value expanded out */ size_t xfercap; /* XFERCAP register value expanded out */
...@@ -82,6 +86,10 @@ struct ioat_dma_chan { ...@@ -82,6 +86,10 @@ struct ioat_dma_chan {
spinlock_t desc_lock; spinlock_t desc_lock;
struct list_head free_desc; struct list_head free_desc;
struct list_head used_desc; struct list_head used_desc;
unsigned long watchdog_completion;
int watchdog_tcp_cookie;
u32 watchdog_last_tcp_cookie;
struct delayed_work work;
int pending; int pending;
int dmacount; int dmacount;
...@@ -98,6 +106,7 @@ struct ioat_dma_chan { ...@@ -98,6 +106,7 @@ struct ioat_dma_chan {
u32 high; u32 high;
}; };
} *completion_virt; } *completion_virt;
unsigned long last_compl_desc_addr_hw;
struct tasklet_struct cleanup_task; struct tasklet_struct cleanup_task;
}; };
...@@ -121,17 +130,34 @@ struct ioat_desc_sw { ...@@ -121,17 +130,34 @@ struct ioat_desc_sw {
struct dma_async_tx_descriptor async_tx; struct dma_async_tx_descriptor async_tx;
}; };
static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev)
{
#ifdef CONFIG_NET_DMA
switch (dev->version) {
case IOAT_VER_1_2:
case IOAT_VER_3_0:
sysctl_tcp_dma_copybreak = 4096;
break;
case IOAT_VER_2_0:
sysctl_tcp_dma_copybreak = 2048;
break;
}
#endif
}
#if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE) #if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE)
struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
void __iomem *iobase); void __iomem *iobase);
void ioat_dma_remove(struct ioatdma_device *device); void ioat_dma_remove(struct ioatdma_device *device);
struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase);
struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
#else #else
#define ioat_dma_probe(pdev, iobase) NULL #define ioat_dma_probe(pdev, iobase) NULL
#define ioat_dma_remove(device) do { } while (0) #define ioat_dma_remove(device) do { } while (0)
#define ioat_dca_init(pdev, iobase) NULL #define ioat_dca_init(pdev, iobase) NULL
#define ioat2_dca_init(pdev, iobase) NULL #define ioat2_dca_init(pdev, iobase) NULL
#define ioat3_dca_init(pdev, iobase) NULL
#endif #endif
#endif /* IOATDMA_H */ #endif /* IOATDMA_H */
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#define IOAT_PCI_SID 0x8086 #define IOAT_PCI_SID 0x8086
#define IOAT_VER_1_2 0x12 /* Version 1.2 */ #define IOAT_VER_1_2 0x12 /* Version 1.2 */
#define IOAT_VER_2_0 0x20 /* Version 2.0 */ #define IOAT_VER_2_0 0x20 /* Version 2.0 */
#define IOAT_VER_3_0 0x30 /* Version 3.0 */
struct ioat_dma_descriptor { struct ioat_dma_descriptor {
uint32_t size; uint32_t size;
......
...@@ -25,6 +25,10 @@ ...@@ -25,6 +25,10 @@
#define IOAT_PCI_DMACTRL_DMA_EN 0x00000001 #define IOAT_PCI_DMACTRL_DMA_EN 0x00000001
#define IOAT_PCI_DMACTRL_MSI_EN 0x00000002 #define IOAT_PCI_DMACTRL_MSI_EN 0x00000002
#define IOAT_PCI_DEVICE_ID_OFFSET 0x02
#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148
#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184
/* MMIO Device Registers */ /* MMIO Device Registers */
#define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */ #define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */
...@@ -149,7 +153,23 @@ ...@@ -149,7 +153,23 @@
#define IOAT_DCA_GREQID_VALID 0x20000000 #define IOAT_DCA_GREQID_VALID 0x20000000
#define IOAT_DCA_GREQID_LASTID 0x80000000 #define IOAT_DCA_GREQID_LASTID 0x80000000
#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
#define IOAT3_CSI_CAPABILITY_PREFETCH 0x1
#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
#define IOAT3_PCI_CAPABILITY_MEMWR 0x1
#define IOAT3_CSI_CONTROL_OFFSET 0x0C
#define IOAT3_CSI_CONTROL_PREFETCH 0x1
#define IOAT3_PCI_CONTROL_OFFSET 0x0E
#define IOAT3_PCI_CONTROL_MEMWR 0x1
#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
#define IOAT3_APICID_TAG_MAP_OFFSET_LOW 0x10
#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
#define IOAT3_DCA_GREQID_OFFSET 0x02
#define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */ #define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */
#define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */ #define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */
......
...@@ -82,17 +82,24 @@ iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, ...@@ -82,17 +82,24 @@ iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
struct device *dev = struct device *dev =
&iop_chan->device->pdev->dev; &iop_chan->device->pdev->dev;
u32 len = unmap->unmap_len; u32 len = unmap->unmap_len;
u32 src_cnt = unmap->unmap_src_cnt; enum dma_ctrl_flags flags = desc->async_tx.flags;
dma_addr_t addr = iop_desc_get_dest_addr(unmap, u32 src_cnt;
iop_chan); dma_addr_t addr;
dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE); if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
while (src_cnt--) { addr = iop_desc_get_dest_addr(unmap, iop_chan);
addr = iop_desc_get_src_addr(unmap, dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE);
iop_chan, }
src_cnt);
dma_unmap_page(dev, addr, len, if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
DMA_TO_DEVICE); src_cnt = unmap->unmap_src_cnt;
while (src_cnt--) {
addr = iop_desc_get_src_addr(unmap,
iop_chan,
src_cnt);
dma_unmap_page(dev, addr, len,
DMA_TO_DEVICE);
}
} }
desc->group_head = NULL; desc->group_head = NULL;
} }
...@@ -366,8 +373,8 @@ iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots, ...@@ -366,8 +373,8 @@ iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots,
if (!retry++) if (!retry++)
goto retry; goto retry;
/* try to free some slots if the allocation fails */ /* perform direct reclaim if the allocation fails */
tasklet_schedule(&iop_chan->irq_tasklet); __iop_adma_slot_cleanup(iop_chan);
return NULL; return NULL;
} }
...@@ -443,8 +450,18 @@ iop_adma_tx_submit(struct dma_async_tx_descriptor *tx) ...@@ -443,8 +450,18 @@ iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan); static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan);
static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan); static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan);
/* returns the number of allocated descriptors */ /**
static int iop_adma_alloc_chan_resources(struct dma_chan *chan) * iop_adma_alloc_chan_resources - returns the number of allocated descriptors
* @chan - allocate descriptor resources for this channel
* @client - current client requesting the channel be ready for requests
*
* Note: We keep the slots for 1 operation on iop_chan->chain at all times. To
* avoid deadlock, via async_xor, num_descs_in_pool must at a minimum be
* greater than 2x the number slots needed to satisfy a device->max_xor
* request.
* */
static int iop_adma_alloc_chan_resources(struct dma_chan *chan,
struct dma_client *client)
{ {
char *hw_desc; char *hw_desc;
int idx; int idx;
...@@ -838,7 +855,7 @@ static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device) ...@@ -838,7 +855,7 @@ static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device)
dma_chan = container_of(device->common.channels.next, dma_chan = container_of(device->common.channels.next,
struct dma_chan, struct dma_chan,
device_node); device_node);
if (iop_adma_alloc_chan_resources(dma_chan) < 1) { if (iop_adma_alloc_chan_resources(dma_chan, NULL) < 1) {
err = -ENODEV; err = -ENODEV;
goto out; goto out;
} }
...@@ -936,7 +953,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) ...@@ -936,7 +953,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
dma_chan = container_of(device->common.channels.next, dma_chan = container_of(device->common.channels.next,
struct dma_chan, struct dma_chan,
device_node); device_node);
if (iop_adma_alloc_chan_resources(dma_chan) < 1) { if (iop_adma_alloc_chan_resources(dma_chan, NULL) < 1) {
err = -ENODEV; err = -ENODEV;
goto out; goto out;
} }
...@@ -1387,6 +1404,8 @@ static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan) ...@@ -1387,6 +1404,8 @@ static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
spin_unlock_bh(&iop_chan->lock); spin_unlock_bh(&iop_chan->lock);
} }
MODULE_ALIAS("platform:iop-adma");
static struct platform_driver iop_adma_driver = { static struct platform_driver iop_adma_driver = {
.probe = iop_adma_probe, .probe = iop_adma_probe,
.remove = iop_adma_remove, .remove = iop_adma_remove,
......
/*
* offload engine driver for the Marvell XOR engine
* Copyright (C) 2007, 2008, Marvell International Ltd.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/async_tx.h>
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/platform_device.h>
#include <linux/memory.h>
#include <asm/plat-orion/mv_xor.h>
#include "mv_xor.h"
static void mv_xor_issue_pending(struct dma_chan *chan);
#define to_mv_xor_chan(chan) \
container_of(chan, struct mv_xor_chan, common)
#define to_mv_xor_device(dev) \
container_of(dev, struct mv_xor_device, common)
#define to_mv_xor_slot(tx) \
container_of(tx, struct mv_xor_desc_slot, async_tx)
static void mv_desc_init(struct mv_xor_desc_slot *desc, unsigned long flags)
{
struct mv_xor_desc *hw_desc = desc->hw_desc;
hw_desc->status = (1 << 31);
hw_desc->phy_next_desc = 0;
hw_desc->desc_command = (1 << 31);
}
static u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc)
{
struct mv_xor_desc *hw_desc = desc->hw_desc;
return hw_desc->phy_dest_addr;
}
static u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc,
int src_idx)
{
struct mv_xor_desc *hw_desc = desc->hw_desc;
return hw_desc->phy_src_addr[src_idx];
}
static void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc,
u32 byte_count)
{
struct mv_xor_desc *hw_desc = desc->hw_desc;
hw_desc->byte_count = byte_count;
}
static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
u32 next_desc_addr)
{
struct mv_xor_desc *hw_desc = desc->hw_desc;
BUG_ON(hw_desc->phy_next_desc);
hw_desc->phy_next_desc = next_desc_addr;
}
static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc)
{
struct mv_xor_desc *hw_desc = desc->hw_desc;
hw_desc->phy_next_desc = 0;
}
static void mv_desc_set_block_fill_val(struct mv_xor_desc_slot *desc, u32 val)
{
desc->value = val;
}
static void mv_desc_set_dest_addr(struct mv_xor_desc_slot *desc,
dma_addr_t addr)
{
struct mv_xor_desc *hw_desc = desc->hw_desc;
hw_desc->phy_dest_addr = addr;
}
static int mv_chan_memset_slot_count(size_t len)
{
return 1;
}
#define mv_chan_memcpy_slot_count(c) mv_chan_memset_slot_count(c)
static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc,
int index, dma_addr_t addr)
{
struct mv_xor_desc *hw_desc = desc->hw_desc;
hw_desc->phy_src_addr[index] = addr;
if (desc->type == DMA_XOR)
hw_desc->desc_command |= (1 << index);
}
static u32 mv_chan_get_current_desc(struct mv_xor_chan *chan)
{
return __raw_readl(XOR_CURR_DESC(chan));
}
static void mv_chan_set_next_descriptor(struct mv_xor_chan *chan,
u32 next_desc_addr)
{
__raw_writel(next_desc_addr, XOR_NEXT_DESC(chan));
}
static void mv_chan_set_dest_pointer(struct mv_xor_chan *chan, u32 desc_addr)
{
__raw_writel(desc_addr, XOR_DEST_POINTER(chan));
}
static void mv_chan_set_block_size(struct mv_xor_chan *chan, u32 block_size)
{
__raw_writel(block_size, XOR_BLOCK_SIZE(chan));
}
static void mv_chan_set_value(struct mv_xor_chan *chan, u32 value)
{
__raw_writel(value, XOR_INIT_VALUE_LOW(chan));
__raw_writel(value, XOR_INIT_VALUE_HIGH(chan));
}
static void mv_chan_unmask_interrupts(struct mv_xor_chan *chan)
{
u32 val = __raw_readl(XOR_INTR_MASK(chan));
val |= XOR_INTR_MASK_VALUE << (chan->idx * 16);
__raw_writel(val, XOR_INTR_MASK(chan));
}
static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan)
{
u32 intr_cause = __raw_readl(XOR_INTR_CAUSE(chan));
intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF;
return intr_cause;
}
static int mv_is_err_intr(u32 intr_cause)
{
if (intr_cause & ((1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9)))
return 1;
return 0;
}
static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan)
{
u32 val = (1 << (1 + (chan->idx * 16)));
dev_dbg(chan->device->common.dev, "%s, val 0x%08x\n", __func__, val);
__raw_writel(val, XOR_INTR_CAUSE(chan));
}
static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan)
{
u32 val = 0xFFFF0000 >> (chan->idx * 16);
__raw_writel(val, XOR_INTR_CAUSE(chan));
}
static int mv_can_chain(struct mv_xor_desc_slot *desc)
{
struct mv_xor_desc_slot *chain_old_tail = list_entry(
desc->chain_node.prev, struct mv_xor_desc_slot, chain_node);
if (chain_old_tail->type != desc->type)
return 0;
if (desc->type == DMA_MEMSET)
return 0;
return 1;
}
static void mv_set_mode(struct mv_xor_chan *chan,
enum dma_transaction_type type)
{
u32 op_mode;
u32 config = __raw_readl(XOR_CONFIG(chan));
switch (type) {
case DMA_XOR:
op_mode = XOR_OPERATION_MODE_XOR;
break;
case DMA_MEMCPY:
op_mode = XOR_OPERATION_MODE_MEMCPY;
break;
case DMA_MEMSET:
op_mode = XOR_OPERATION_MODE_MEMSET;
break;
default:
dev_printk(KERN_ERR, chan->device->common.dev,
"error: unsupported operation %d.\n",
type);
BUG();
return;
}
config &= ~0x7;
config |= op_mode;
__raw_writel(config, XOR_CONFIG(chan));
chan->current_type = type;
}
static void mv_chan_activate(struct mv_xor_chan *chan)
{
u32 activation;
dev_dbg(chan->device->common.dev, " activate chan.\n");
activation = __raw_readl(XOR_ACTIVATION(chan));
activation |= 0x1;
__raw_writel(activation, XOR_ACTIVATION(chan));
}
static char mv_chan_is_busy(struct mv_xor_chan *chan)
{
u32 state = __raw_readl(XOR_ACTIVATION(chan));
state = (state >> 4) & 0x3;
return (state == 1) ? 1 : 0;
}
static int mv_chan_xor_slot_count(size_t len, int src_cnt)
{
return 1;
}
/**
* mv_xor_free_slots - flags descriptor slots for reuse
* @slot: Slot to free
* Caller must hold &mv_chan->lock while calling this function
*/
static void mv_xor_free_slots(struct mv_xor_chan *mv_chan,
struct mv_xor_desc_slot *slot)
{
dev_dbg(mv_chan->device->common.dev, "%s %d slot %p\n",
__func__, __LINE__, slot);
slot->slots_per_op = 0;
}
/*
* mv_xor_start_new_chain - program the engine to operate on new chain headed by
* sw_desc
* Caller must hold &mv_chan->lock while calling this function
*/
static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan,
struct mv_xor_desc_slot *sw_desc)
{
dev_dbg(mv_chan->device->common.dev, "%s %d: sw_desc %p\n",
__func__, __LINE__, sw_desc);
if (sw_desc->type != mv_chan->current_type)
mv_set_mode(mv_chan, sw_desc->type);
if (sw_desc->type == DMA_MEMSET) {
/* for memset requests we need to program the engine, no
* descriptors used.
*/
struct mv_xor_desc *hw_desc = sw_desc->hw_desc;
mv_chan_set_dest_pointer(mv_chan, hw_desc->phy_dest_addr);
mv_chan_set_block_size(mv_chan, sw_desc->unmap_len);
mv_chan_set_value(mv_chan, sw_desc->value);
} else {
/* set the hardware chain */
mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys);
}
mv_chan->pending += sw_desc->slot_cnt;
mv_xor_issue_pending(&mv_chan->common);
}
static dma_cookie_t
mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
struct mv_xor_chan *mv_chan, dma_cookie_t cookie)
{
BUG_ON(desc->async_tx.cookie < 0);
if (desc->async_tx.cookie > 0) {
cookie = desc->async_tx.cookie;
/* call the callback (must not sleep or submit new
* operations to this channel)
*/
if (desc->async_tx.callback)
desc->async_tx.callback(
desc->async_tx.callback_param);
/* unmap dma addresses
* (unmap_single vs unmap_page?)
*/
if (desc->group_head && desc->unmap_len) {
struct mv_xor_desc_slot *unmap = desc->group_head;
struct device *dev =
&mv_chan->device->pdev->dev;
u32 len = unmap->unmap_len;
enum dma_ctrl_flags flags = desc->async_tx.flags;
u32 src_cnt;
dma_addr_t addr;
if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
addr = mv_desc_get_dest_addr(unmap);
dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE);
}
if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
src_cnt = unmap->unmap_src_cnt;
while (src_cnt--) {
addr = mv_desc_get_src_addr(unmap,
src_cnt);
dma_unmap_page(dev, addr, len,
DMA_TO_DEVICE);
}
}
desc->group_head = NULL;
}
}
/* run dependent operations */
async_tx_run_dependencies(&desc->async_tx);
return cookie;
}
static int
mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan)
{
struct mv_xor_desc_slot *iter, *_iter;
dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
completed_node) {
if (async_tx_test_ack(&iter->async_tx)) {
list_del(&iter->completed_node);
mv_xor_free_slots(mv_chan, iter);
}
}
return 0;
}
static int
mv_xor_clean_slot(struct mv_xor_desc_slot *desc,
struct mv_xor_chan *mv_chan)
{
dev_dbg(mv_chan->device->common.dev, "%s %d: desc %p flags %d\n",
__func__, __LINE__, desc, desc->async_tx.flags);
list_del(&desc->chain_node);
/* the client is allowed to attach dependent operations
* until 'ack' is set
*/
if (!async_tx_test_ack(&desc->async_tx)) {
/* move this slot to the completed_slots */
list_add_tail(&desc->completed_node, &mv_chan->completed_slots);
return 0;
}
mv_xor_free_slots(mv_chan, desc);
return 0;
}
static void __mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
{
struct mv_xor_desc_slot *iter, *_iter;
dma_cookie_t cookie = 0;
int busy = mv_chan_is_busy(mv_chan);
u32 current_desc = mv_chan_get_current_desc(mv_chan);
int seen_current = 0;
dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
dev_dbg(mv_chan->device->common.dev, "current_desc %x\n", current_desc);
mv_xor_clean_completed_slots(mv_chan);
/* free completed slots from the chain starting with
* the oldest descriptor
*/
list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
chain_node) {
prefetch(_iter);
prefetch(&_iter->async_tx);
/* do not advance past the current descriptor loaded into the
* hardware channel, subsequent descriptors are either in
* process or have not been submitted
*/
if (seen_current)
break;
/* stop the search if we reach the current descriptor and the
* channel is busy
*/
if (iter->async_tx.phys == current_desc) {
seen_current = 1;
if (busy)
break;
}
cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie);
if (mv_xor_clean_slot(iter, mv_chan))
break;
}
if ((busy == 0) && !list_empty(&mv_chan->chain)) {
struct mv_xor_desc_slot *chain_head;
chain_head = list_entry(mv_chan->chain.next,
struct mv_xor_desc_slot,
chain_node);
mv_xor_start_new_chain(mv_chan, chain_head);
}
if (cookie > 0)
mv_chan->completed_cookie = cookie;
}
static void
mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
{
spin_lock_bh(&mv_chan->lock);
__mv_xor_slot_cleanup(mv_chan);
spin_unlock_bh(&mv_chan->lock);
}
static void mv_xor_tasklet(unsigned long data)
{
struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
__mv_xor_slot_cleanup(chan);
}
static struct mv_xor_desc_slot *
mv_xor_alloc_slots(struct mv_xor_chan *mv_chan, int num_slots,
int slots_per_op)
{
struct mv_xor_desc_slot *iter, *_iter, *alloc_start = NULL;
LIST_HEAD(chain);
int slots_found, retry = 0;
/* start search from the last allocated descrtiptor
* if a contiguous allocation can not be found start searching
* from the beginning of the list
*/
retry:
slots_found = 0;
if (retry == 0)
iter = mv_chan->last_used;
else
iter = list_entry(&mv_chan->all_slots,
struct mv_xor_desc_slot,
slot_node);
list_for_each_entry_safe_continue(
iter, _iter, &mv_chan->all_slots, slot_node) {
prefetch(_iter);
prefetch(&_iter->async_tx);
if (iter->slots_per_op) {
/* give up after finding the first busy slot
* on the second pass through the list
*/
if (retry)
break;
slots_found = 0;
continue;
}
/* start the allocation if the slot is correctly aligned */
if (!slots_found++)
alloc_start = iter;
if (slots_found == num_slots) {
struct mv_xor_desc_slot *alloc_tail = NULL;
struct mv_xor_desc_slot *last_used = NULL;
iter = alloc_start;
while (num_slots) {
int i;
/* pre-ack all but the last descriptor */
async_tx_ack(&iter->async_tx);
list_add_tail(&iter->chain_node, &chain);
alloc_tail = iter;
iter->async_tx.cookie = 0;
iter->slot_cnt = num_slots;
iter->xor_check_result = NULL;
for (i = 0; i < slots_per_op; i++) {
iter->slots_per_op = slots_per_op - i;
last_used = iter;
iter = list_entry(iter->slot_node.next,
struct mv_xor_desc_slot,
slot_node);
}
num_slots -= slots_per_op;
}
alloc_tail->group_head = alloc_start;
alloc_tail->async_tx.cookie = -EBUSY;
list_splice(&chain, &alloc_tail->async_tx.tx_list);
mv_chan->last_used = last_used;
mv_desc_clear_next_desc(alloc_start);
mv_desc_clear_next_desc(alloc_tail);
return alloc_tail;
}
}
if (!retry++)
goto retry;
/* try to free some slots if the allocation fails */
tasklet_schedule(&mv_chan->irq_tasklet);
return NULL;
}
static dma_cookie_t
mv_desc_assign_cookie(struct mv_xor_chan *mv_chan,
struct mv_xor_desc_slot *desc)
{
dma_cookie_t cookie = mv_chan->common.cookie;
if (++cookie < 0)
cookie = 1;
mv_chan->common.cookie = desc->async_tx.cookie = cookie;
return cookie;
}
/************************ DMA engine API functions ****************************/
static dma_cookie_t
mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
{
struct mv_xor_desc_slot *sw_desc = to_mv_xor_slot(tx);
struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan);
struct mv_xor_desc_slot *grp_start, *old_chain_tail;
dma_cookie_t cookie;
int new_hw_chain = 1;
dev_dbg(mv_chan->device->common.dev,
"%s sw_desc %p: async_tx %p\n",
__func__, sw_desc, &sw_desc->async_tx);
grp_start = sw_desc->group_head;
spin_lock_bh(&mv_chan->lock);
cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
if (list_empty(&mv_chan->chain))
list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain);
else {
new_hw_chain = 0;
old_chain_tail = list_entry(mv_chan->chain.prev,
struct mv_xor_desc_slot,
chain_node);
list_splice_init(&grp_start->async_tx.tx_list,
&old_chain_tail->chain_node);
if (!mv_can_chain(grp_start))
goto submit_done;
dev_dbg(mv_chan->device->common.dev, "Append to last desc %x\n",
old_chain_tail->async_tx.phys);
/* fix up the hardware chain */
mv_desc_set_next_desc(old_chain_tail, grp_start->async_tx.phys);
/* if the channel is not busy */
if (!mv_chan_is_busy(mv_chan)) {
u32 current_desc = mv_chan_get_current_desc(mv_chan);
/*
* and the curren desc is the end of the chain before
* the append, then we need to start the channel
*/
if (current_desc == old_chain_tail->async_tx.phys)
new_hw_chain = 1;
}
}
if (new_hw_chain)
mv_xor_start_new_chain(mv_chan, grp_start);
submit_done:
spin_unlock_bh(&mv_chan->lock);
return cookie;
}
/* returns the number of allocated descriptors */
static int mv_xor_alloc_chan_resources(struct dma_chan *chan,
struct dma_client *client)
{
char *hw_desc;
int idx;
struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
struct mv_xor_desc_slot *slot = NULL;
struct mv_xor_platform_data *plat_data =
mv_chan->device->pdev->dev.platform_data;
int num_descs_in_pool = plat_data->pool_size/MV_XOR_SLOT_SIZE;
/* Allocate descriptor slots */
idx = mv_chan->slots_allocated;
while (idx < num_descs_in_pool) {
slot = kzalloc(sizeof(*slot), GFP_KERNEL);
if (!slot) {
printk(KERN_INFO "MV XOR Channel only initialized"
" %d descriptor slots", idx);
break;
}
hw_desc = (char *) mv_chan->device->dma_desc_pool_virt;
slot->hw_desc = (void *) &hw_desc[idx * MV_XOR_SLOT_SIZE];
dma_async_tx_descriptor_init(&slot->async_tx, chan);
slot->async_tx.tx_submit = mv_xor_tx_submit;
INIT_LIST_HEAD(&slot->chain_node);
INIT_LIST_HEAD(&slot->slot_node);
INIT_LIST_HEAD(&slot->async_tx.tx_list);
hw_desc = (char *) mv_chan->device->dma_desc_pool;
slot->async_tx.phys =
(dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];
slot->idx = idx++;
spin_lock_bh(&mv_chan->lock);
mv_chan->slots_allocated = idx;
list_add_tail(&slot->slot_node, &mv_chan->all_slots);
spin_unlock_bh(&mv_chan->lock);
}
if (mv_chan->slots_allocated && !mv_chan->last_used)
mv_chan->last_used = list_entry(mv_chan->all_slots.next,
struct mv_xor_desc_slot,
slot_node);
dev_dbg(mv_chan->device->common.dev,
"allocated %d descriptor slots last_used: %p\n",
mv_chan->slots_allocated, mv_chan->last_used);
return mv_chan->slots_allocated ? : -ENOMEM;
}
static struct dma_async_tx_descriptor *
mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
size_t len, unsigned long flags)
{
struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
struct mv_xor_desc_slot *sw_desc, *grp_start;
int slot_cnt;
dev_dbg(mv_chan->device->common.dev,
"%s dest: %x src %x len: %u flags: %ld\n",
__func__, dest, src, len, flags);
if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
return NULL;
BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
spin_lock_bh(&mv_chan->lock);
slot_cnt = mv_chan_memcpy_slot_count(len);
sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
if (sw_desc) {
sw_desc->type = DMA_MEMCPY;
sw_desc->async_tx.flags = flags;
grp_start = sw_desc->group_head;
mv_desc_init(grp_start, flags);
mv_desc_set_byte_count(grp_start, len);
mv_desc_set_dest_addr(sw_desc->group_head, dest);
mv_desc_set_src_addr(grp_start, 0, src);
sw_desc->unmap_src_cnt = 1;
sw_desc->unmap_len = len;
}
spin_unlock_bh(&mv_chan->lock);
dev_dbg(mv_chan->device->common.dev,
"%s sw_desc %p async_tx %p\n",
__func__, sw_desc, sw_desc ? &sw_desc->async_tx : 0);
return sw_desc ? &sw_desc->async_tx : NULL;
}
static struct dma_async_tx_descriptor *
mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
size_t len, unsigned long flags)
{
struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
struct mv_xor_desc_slot *sw_desc, *grp_start;
int slot_cnt;
dev_dbg(mv_chan->device->common.dev,
"%s dest: %x len: %u flags: %ld\n",
__func__, dest, len, flags);
if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
return NULL;
BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
spin_lock_bh(&mv_chan->lock);
slot_cnt = mv_chan_memset_slot_count(len);
sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
if (sw_desc) {
sw_desc->type = DMA_MEMSET;
sw_desc->async_tx.flags = flags;
grp_start = sw_desc->group_head;
mv_desc_init(grp_start, flags);
mv_desc_set_byte_count(grp_start, len);
mv_desc_set_dest_addr(sw_desc->group_head, dest);
mv_desc_set_block_fill_val(grp_start, value);
sw_desc->unmap_src_cnt = 1;
sw_desc->unmap_len = len;
}
spin_unlock_bh(&mv_chan->lock);
dev_dbg(mv_chan->device->common.dev,
"%s sw_desc %p async_tx %p \n",
__func__, sw_desc, &sw_desc->async_tx);
return sw_desc ? &sw_desc->async_tx : NULL;
}
static struct dma_async_tx_descriptor *
mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
unsigned int src_cnt, size_t len, unsigned long flags)
{
struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
struct mv_xor_desc_slot *sw_desc, *grp_start;
int slot_cnt;
if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
return NULL;
BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
dev_dbg(mv_chan->device->common.dev,
"%s src_cnt: %d len: dest %x %u flags: %ld\n",
__func__, src_cnt, len, dest, flags);
spin_lock_bh(&mv_chan->lock);
slot_cnt = mv_chan_xor_slot_count(len, src_cnt);
sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
if (sw_desc) {
sw_desc->type = DMA_XOR;
sw_desc->async_tx.flags = flags;
grp_start = sw_desc->group_head;
mv_desc_init(grp_start, flags);
/* the byte count field is the same as in memcpy desc*/
mv_desc_set_byte_count(grp_start, len);
mv_desc_set_dest_addr(sw_desc->group_head, dest);
sw_desc->unmap_src_cnt = src_cnt;
sw_desc->unmap_len = len;
while (src_cnt--)
mv_desc_set_src_addr(grp_start, src_cnt, src[src_cnt]);
}
spin_unlock_bh(&mv_chan->lock);
dev_dbg(mv_chan->device->common.dev,
"%s sw_desc %p async_tx %p \n",
__func__, sw_desc, &sw_desc->async_tx);
return sw_desc ? &sw_desc->async_tx : NULL;
}
static void mv_xor_free_chan_resources(struct dma_chan *chan)
{
struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
struct mv_xor_desc_slot *iter, *_iter;
int in_use_descs = 0;
mv_xor_slot_cleanup(mv_chan);
spin_lock_bh(&mv_chan->lock);
list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
chain_node) {
in_use_descs++;
list_del(&iter->chain_node);
}
list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
completed_node) {
in_use_descs++;
list_del(&iter->completed_node);
}
list_for_each_entry_safe_reverse(
iter, _iter, &mv_chan->all_slots, slot_node) {
list_del(&iter->slot_node);
kfree(iter);
mv_chan->slots_allocated--;
}
mv_chan->last_used = NULL;
dev_dbg(mv_chan->device->common.dev, "%s slots_allocated %d\n",
__func__, mv_chan->slots_allocated);
spin_unlock_bh(&mv_chan->lock);
if (in_use_descs)
dev_err(mv_chan->device->common.dev,
"freeing %d in use descriptors!\n", in_use_descs);
}
/**
* mv_xor_is_complete - poll the status of an XOR transaction
* @chan: XOR channel handle
* @cookie: XOR transaction identifier
*/
static enum dma_status mv_xor_is_complete(struct dma_chan *chan,
dma_cookie_t cookie,
dma_cookie_t *done,
dma_cookie_t *used)
{
struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
dma_cookie_t last_used;
dma_cookie_t last_complete;
enum dma_status ret;
last_used = chan->cookie;
last_complete = mv_chan->completed_cookie;
mv_chan->is_complete_cookie = cookie;
if (done)
*done = last_complete;
if (used)
*used = last_used;
ret = dma_async_is_complete(cookie, last_complete, last_used);
if (ret == DMA_SUCCESS) {
mv_xor_clean_completed_slots(mv_chan);
return ret;
}
mv_xor_slot_cleanup(mv_chan);
last_used = chan->cookie;
last_complete = mv_chan->completed_cookie;
if (done)
*done = last_complete;
if (used)
*used = last_used;
return dma_async_is_complete(cookie, last_complete, last_used);
}
static void mv_dump_xor_regs(struct mv_xor_chan *chan)
{
u32 val;
val = __raw_readl(XOR_CONFIG(chan));
dev_printk(KERN_ERR, chan->device->common.dev,
"config 0x%08x.\n", val);
val = __raw_readl(XOR_ACTIVATION(chan));
dev_printk(KERN_ERR, chan->device->common.dev,
"activation 0x%08x.\n", val);
val = __raw_readl(XOR_INTR_CAUSE(chan));
dev_printk(KERN_ERR, chan->device->common.dev,
"intr cause 0x%08x.\n", val);
val = __raw_readl(XOR_INTR_MASK(chan));
dev_printk(KERN_ERR, chan->device->common.dev,
"intr mask 0x%08x.\n", val);
val = __raw_readl(XOR_ERROR_CAUSE(chan));
dev_printk(KERN_ERR, chan->device->common.dev,
"error cause 0x%08x.\n", val);
val = __raw_readl(XOR_ERROR_ADDR(chan));
dev_printk(KERN_ERR, chan->device->common.dev,
"error addr 0x%08x.\n", val);
}
static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan,
u32 intr_cause)
{
if (intr_cause & (1 << 4)) {
dev_dbg(chan->device->common.dev,
"ignore this error\n");
return;
}
dev_printk(KERN_ERR, chan->device->common.dev,
"error on chan %d. intr cause 0x%08x.\n",
chan->idx, intr_cause);
mv_dump_xor_regs(chan);
BUG();
}
static irqreturn_t mv_xor_interrupt_handler(int irq, void *data)
{
struct mv_xor_chan *chan = data;
u32 intr_cause = mv_chan_get_intr_cause(chan);
dev_dbg(chan->device->common.dev, "intr cause %x\n", intr_cause);
if (mv_is_err_intr(intr_cause))
mv_xor_err_interrupt_handler(chan, intr_cause);
tasklet_schedule(&chan->irq_tasklet);
mv_xor_device_clear_eoc_cause(chan);
return IRQ_HANDLED;
}
static void mv_xor_issue_pending(struct dma_chan *chan)
{
struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
if (mv_chan->pending >= MV_XOR_THRESHOLD) {
mv_chan->pending = 0;
mv_chan_activate(mv_chan);
}
}
/*
* Perform a transaction to verify the HW works.
*/
#define MV_XOR_TEST_SIZE 2000
static int __devinit mv_xor_memcpy_self_test(struct mv_xor_device *device)
{
int i;
void *src, *dest;
dma_addr_t src_dma, dest_dma;
struct dma_chan *dma_chan;
dma_cookie_t cookie;
struct dma_async_tx_descriptor *tx;
int err = 0;
struct mv_xor_chan *mv_chan;
src = kmalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
if (!src)
return -ENOMEM;
dest = kzalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
if (!dest) {
kfree(src);
return -ENOMEM;
}
/* Fill in src buffer */
for (i = 0; i < MV_XOR_TEST_SIZE; i++)
((u8 *) src)[i] = (u8)i;
/* Start copy, using first DMA channel */
dma_chan = container_of(device->common.channels.next,
struct dma_chan,
device_node);
if (mv_xor_alloc_chan_resources(dma_chan, NULL) < 1) {
err = -ENODEV;
goto out;
}
dest_dma = dma_map_single(dma_chan->device->dev, dest,
MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
src_dma = dma_map_single(dma_chan->device->dev, src,
MV_XOR_TEST_SIZE, DMA_TO_DEVICE);
tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
MV_XOR_TEST_SIZE, 0);
cookie = mv_xor_tx_submit(tx);
mv_xor_issue_pending(dma_chan);
async_tx_ack(tx);
msleep(1);
if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
DMA_SUCCESS) {
dev_printk(KERN_ERR, dma_chan->device->dev,
"Self-test copy timed out, disabling\n");
err = -ENODEV;
goto free_resources;
}
mv_chan = to_mv_xor_chan(dma_chan);
dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
if (memcmp(src, dest, MV_XOR_TEST_SIZE)) {
dev_printk(KERN_ERR, dma_chan->device->dev,
"Self-test copy failed compare, disabling\n");
err = -ENODEV;
goto free_resources;
}
free_resources:
mv_xor_free_chan_resources(dma_chan);
out:
kfree(src);
kfree(dest);
return err;
}
#define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */
static int __devinit
mv_xor_xor_self_test(struct mv_xor_device *device)
{
int i, src_idx;
struct page *dest;
struct page *xor_srcs[MV_XOR_NUM_SRC_TEST];
dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST];
dma_addr_t dest_dma;
struct dma_async_tx_descriptor *tx;
struct dma_chan *dma_chan;
dma_cookie_t cookie;
u8 cmp_byte = 0;
u32 cmp_word;
int err = 0;
struct mv_xor_chan *mv_chan;
for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
if (!xor_srcs[src_idx])
while (src_idx--) {
__free_page(xor_srcs[src_idx]);
return -ENOMEM;
}
}
dest = alloc_page(GFP_KERNEL);
if (!dest)
while (src_idx--) {
__free_page(xor_srcs[src_idx]);
return -ENOMEM;
}
/* Fill in src buffers */
for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
u8 *ptr = page_address(xor_srcs[src_idx]);
for (i = 0; i < PAGE_SIZE; i++)
ptr[i] = (1 << src_idx);
}
for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++)
cmp_byte ^= (u8) (1 << src_idx);
cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
(cmp_byte << 8) | cmp_byte;
memset(page_address(dest), 0, PAGE_SIZE);
dma_chan = container_of(device->common.channels.next,
struct dma_chan,
device_node);
if (mv_xor_alloc_chan_resources(dma_chan, NULL) < 1) {
err = -ENODEV;
goto out;
}
/* test xor */
dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE,
DMA_FROM_DEVICE);
for (i = 0; i < MV_XOR_NUM_SRC_TEST; i++)
dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
0, PAGE_SIZE, DMA_TO_DEVICE);
tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
MV_XOR_NUM_SRC_TEST, PAGE_SIZE, 0);
cookie = mv_xor_tx_submit(tx);
mv_xor_issue_pending(dma_chan);
async_tx_ack(tx);
msleep(8);
if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
DMA_SUCCESS) {
dev_printk(KERN_ERR, dma_chan->device->dev,
"Self-test xor timed out, disabling\n");
err = -ENODEV;
goto free_resources;
}
mv_chan = to_mv_xor_chan(dma_chan);
dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
PAGE_SIZE, DMA_FROM_DEVICE);
for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
u32 *ptr = page_address(dest);
if (ptr[i] != cmp_word) {
dev_printk(KERN_ERR, dma_chan->device->dev,
"Self-test xor failed compare, disabling."
" index %d, data %x, expected %x\n", i,
ptr[i], cmp_word);
err = -ENODEV;
goto free_resources;
}
}
free_resources:
mv_xor_free_chan_resources(dma_chan);
out:
src_idx = MV_XOR_NUM_SRC_TEST;
while (src_idx--)
__free_page(xor_srcs[src_idx]);
__free_page(dest);
return err;
}
static int __devexit mv_xor_remove(struct platform_device *dev)
{
struct mv_xor_device *device = platform_get_drvdata(dev);
struct dma_chan *chan, *_chan;
struct mv_xor_chan *mv_chan;
struct mv_xor_platform_data *plat_data = dev->dev.platform_data;
dma_async_device_unregister(&device->common);
dma_free_coherent(&dev->dev, plat_data->pool_size,
device->dma_desc_pool_virt, device->dma_desc_pool);
list_for_each_entry_safe(chan, _chan, &device->common.channels,
device_node) {
mv_chan = to_mv_xor_chan(chan);
list_del(&chan->device_node);
}
return 0;
}
static int __devinit mv_xor_probe(struct platform_device *pdev)
{
int ret = 0;
int irq;
struct mv_xor_device *adev;
struct mv_xor_chan *mv_chan;
struct dma_device *dma_dev;
struct mv_xor_platform_data *plat_data = pdev->dev.platform_data;
adev = devm_kzalloc(&pdev->dev, sizeof(*adev), GFP_KERNEL);
if (!adev)
return -ENOMEM;
dma_dev = &adev->common;
/* allocate coherent memory for hardware descriptors
* note: writecombine gives slightly better performance, but
* requires that we explicitly flush the writes
*/
adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
plat_data->pool_size,
&adev->dma_desc_pool,
GFP_KERNEL);
if (!adev->dma_desc_pool_virt)
return -ENOMEM;
adev->id = plat_data->hw_id;
/* discover transaction capabilites from the platform data */
dma_dev->cap_mask = plat_data->cap_mask;
adev->pdev = pdev;
platform_set_drvdata(pdev, adev);
adev->shared = platform_get_drvdata(plat_data->shared);
INIT_LIST_HEAD(&dma_dev->channels);
/* set base routines */
dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources;
dma_dev->device_free_chan_resources = mv_xor_free_chan_resources;
dma_dev->device_is_tx_complete = mv_xor_is_complete;
dma_dev->device_issue_pending = mv_xor_issue_pending;
dma_dev->dev = &pdev->dev;
/* set prep routines based on capability */
if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy;
if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset;
if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
dma_dev->max_xor = 8; ;
dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor;
}
mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL);
if (!mv_chan) {
ret = -ENOMEM;
goto err_free_dma;
}
mv_chan->device = adev;
mv_chan->idx = plat_data->hw_id;
mv_chan->mmr_base = adev->shared->xor_base;
if (!mv_chan->mmr_base) {
ret = -ENOMEM;
goto err_free_dma;
}
tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long)
mv_chan);
/* clear errors before enabling interrupts */
mv_xor_device_clear_err_status(mv_chan);
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
ret = irq;
goto err_free_dma;
}
ret = devm_request_irq(&pdev->dev, irq,
mv_xor_interrupt_handler,
0, dev_name(&pdev->dev), mv_chan);
if (ret)
goto err_free_dma;
mv_chan_unmask_interrupts(mv_chan);
mv_set_mode(mv_chan, DMA_MEMCPY);
spin_lock_init(&mv_chan->lock);
INIT_LIST_HEAD(&mv_chan->chain);
INIT_LIST_HEAD(&mv_chan->completed_slots);
INIT_LIST_HEAD(&mv_chan->all_slots);
INIT_RCU_HEAD(&mv_chan->common.rcu);
mv_chan->common.device = dma_dev;
list_add_tail(&mv_chan->common.device_node, &dma_dev->channels);
if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
ret = mv_xor_memcpy_self_test(adev);
dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
if (ret)
goto err_free_dma;
}
if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
ret = mv_xor_xor_self_test(adev);
dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
if (ret)
goto err_free_dma;
}
dev_printk(KERN_INFO, &pdev->dev, "Marvell XOR: "
"( %s%s%s%s)\n",
dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
dma_async_device_register(dma_dev);
goto out;
err_free_dma:
dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
adev->dma_desc_pool_virt, adev->dma_desc_pool);
out:
return ret;
}
static void
mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp,
struct mbus_dram_target_info *dram)
{
void __iomem *base = msp->xor_base;
u32 win_enable = 0;
int i;
for (i = 0; i < 8; i++) {
writel(0, base + WINDOW_BASE(i));
writel(0, base + WINDOW_SIZE(i));
if (i < 4)
writel(0, base + WINDOW_REMAP_HIGH(i));
}
for (i = 0; i < dram->num_cs; i++) {
struct mbus_dram_window *cs = dram->cs + i;
writel((cs->base & 0xffff0000) |
(cs->mbus_attr << 8) |
dram->mbus_dram_target_id, base + WINDOW_BASE(i));
writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i));
win_enable |= (1 << i);
win_enable |= 3 << (16 + (2 * i));
}
writel(win_enable, base + WINDOW_BAR_ENABLE(0));
writel(win_enable, base + WINDOW_BAR_ENABLE(1));
}
static struct platform_driver mv_xor_driver = {
.probe = mv_xor_probe,
.remove = mv_xor_remove,
.driver = {
.owner = THIS_MODULE,
.name = MV_XOR_NAME,
},
};
static int mv_xor_shared_probe(struct platform_device *pdev)
{
struct mv_xor_platform_shared_data *msd = pdev->dev.platform_data;
struct mv_xor_shared_private *msp;
struct resource *res;
dev_printk(KERN_NOTICE, &pdev->dev, "Marvell shared XOR driver\n");
msp = devm_kzalloc(&pdev->dev, sizeof(*msp), GFP_KERNEL);
if (!msp)
return -ENOMEM;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res)
return -ENODEV;
msp->xor_base = devm_ioremap(&pdev->dev, res->start,
res->end - res->start + 1);
if (!msp->xor_base)
return -EBUSY;
res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
if (!res)
return -ENODEV;
msp->xor_high_base = devm_ioremap(&pdev->dev, res->start,
res->end - res->start + 1);
if (!msp->xor_high_base)
return -EBUSY;
platform_set_drvdata(pdev, msp);
/*
* (Re-)program MBUS remapping windows if we are asked to.
*/
if (msd != NULL && msd->dram != NULL)
mv_xor_conf_mbus_windows(msp, msd->dram);
return 0;
}
static int mv_xor_shared_remove(struct platform_device *pdev)
{
return 0;
}
static struct platform_driver mv_xor_shared_driver = {
.probe = mv_xor_shared_probe,
.remove = mv_xor_shared_remove,
.driver = {
.owner = THIS_MODULE,
.name = MV_XOR_SHARED_NAME,
},
};
static int __init mv_xor_init(void)
{
int rc;
rc = platform_driver_register(&mv_xor_shared_driver);
if (!rc) {
rc = platform_driver_register(&mv_xor_driver);
if (rc)
platform_driver_unregister(&mv_xor_shared_driver);
}
return rc;
}
module_init(mv_xor_init);
/* it's currently unsafe to unload this module */
#if 0
static void __exit mv_xor_exit(void)
{
platform_driver_unregister(&mv_xor_driver);
platform_driver_unregister(&mv_xor_shared_driver);
return;
}
module_exit(mv_xor_exit);
#endif
MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>");
MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine");
MODULE_LICENSE("GPL");
/*
* Copyright (C) 2007, 2008, Marvell International Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef MV_XOR_H
#define MV_XOR_H
#include <linux/types.h>
#include <linux/io.h>
#include <linux/dmaengine.h>
#include <linux/interrupt.h>
#define USE_TIMER
#define MV_XOR_SLOT_SIZE 64
#define MV_XOR_THRESHOLD 1
#define XOR_OPERATION_MODE_XOR 0
#define XOR_OPERATION_MODE_MEMCPY 2
#define XOR_OPERATION_MODE_MEMSET 4
#define XOR_CURR_DESC(chan) (chan->mmr_base + 0x210 + (chan->idx * 4))
#define XOR_NEXT_DESC(chan) (chan->mmr_base + 0x200 + (chan->idx * 4))
#define XOR_BYTE_COUNT(chan) (chan->mmr_base + 0x220 + (chan->idx * 4))
#define XOR_DEST_POINTER(chan) (chan->mmr_base + 0x2B0 + (chan->idx * 4))
#define XOR_BLOCK_SIZE(chan) (chan->mmr_base + 0x2C0 + (chan->idx * 4))
#define XOR_INIT_VALUE_LOW(chan) (chan->mmr_base + 0x2E0)
#define XOR_INIT_VALUE_HIGH(chan) (chan->mmr_base + 0x2E4)
#define XOR_CONFIG(chan) (chan->mmr_base + 0x10 + (chan->idx * 4))
#define XOR_ACTIVATION(chan) (chan->mmr_base + 0x20 + (chan->idx * 4))
#define XOR_INTR_CAUSE(chan) (chan->mmr_base + 0x30)
#define XOR_INTR_MASK(chan) (chan->mmr_base + 0x40)
#define XOR_ERROR_CAUSE(chan) (chan->mmr_base + 0x50)
#define XOR_ERROR_ADDR(chan) (chan->mmr_base + 0x60)
#define XOR_INTR_MASK_VALUE 0x3F5
#define WINDOW_BASE(w) (0x250 + ((w) << 2))
#define WINDOW_SIZE(w) (0x270 + ((w) << 2))
#define WINDOW_REMAP_HIGH(w) (0x290 + ((w) << 2))
#define WINDOW_BAR_ENABLE(chan) (0x240 + ((chan) << 2))
struct mv_xor_shared_private {
void __iomem *xor_base;
void __iomem *xor_high_base;
};
/**
* struct mv_xor_device - internal representation of a XOR device
* @pdev: Platform device
* @id: HW XOR Device selector
* @dma_desc_pool: base of DMA descriptor region (DMA address)
* @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
* @common: embedded struct dma_device
*/
struct mv_xor_device {
struct platform_device *pdev;
int id;
dma_addr_t dma_desc_pool;
void *dma_desc_pool_virt;
struct dma_device common;
struct mv_xor_shared_private *shared;
};
/**
* struct mv_xor_chan - internal representation of a XOR channel
* @pending: allows batching of hardware operations
* @completed_cookie: identifier for the most recently completed operation
* @lock: serializes enqueue/dequeue operations to the descriptors pool
* @mmr_base: memory mapped register base
* @idx: the index of the xor channel
* @chain: device chain view of the descriptors
* @completed_slots: slots completed by HW but still need to be acked
* @device: parent device
* @common: common dmaengine channel object members
* @last_used: place holder for allocation to continue from where it left off
* @all_slots: complete domain of slots usable by the channel
* @slots_allocated: records the actual size of the descriptor slot pool
* @irq_tasklet: bottom half where mv_xor_slot_cleanup runs
*/
struct mv_xor_chan {
int pending;
dma_cookie_t completed_cookie;
spinlock_t lock; /* protects the descriptor slot pool */
void __iomem *mmr_base;
unsigned int idx;
enum dma_transaction_type current_type;
struct list_head chain;
struct list_head completed_slots;
struct mv_xor_device *device;
struct dma_chan common;
struct mv_xor_desc_slot *last_used;
struct list_head all_slots;
int slots_allocated;
struct tasklet_struct irq_tasklet;
#ifdef USE_TIMER
unsigned long cleanup_time;
u32 current_on_last_cleanup;
dma_cookie_t is_complete_cookie;
#endif
};
/**
* struct mv_xor_desc_slot - software descriptor
* @slot_node: node on the mv_xor_chan.all_slots list
* @chain_node: node on the mv_xor_chan.chain list
* @completed_node: node on the mv_xor_chan.completed_slots list
* @hw_desc: virtual address of the hardware descriptor chain
* @phys: hardware address of the hardware descriptor chain
* @group_head: first operation in a transaction
* @slot_cnt: total slots used in an transaction (group of operations)
* @slots_per_op: number of slots per operation
* @idx: pool index
* @unmap_src_cnt: number of xor sources
* @unmap_len: transaction bytecount
* @async_tx: support for the async_tx api
* @group_list: list of slots that make up a multi-descriptor transaction
* for example transfer lengths larger than the supported hw max
* @xor_check_result: result of zero sum
* @crc32_result: result crc calculation
*/
struct mv_xor_desc_slot {
struct list_head slot_node;
struct list_head chain_node;
struct list_head completed_node;
enum dma_transaction_type type;
void *hw_desc;
struct mv_xor_desc_slot *group_head;
u16 slot_cnt;
u16 slots_per_op;
u16 idx;
u16 unmap_src_cnt;
u32 value;
size_t unmap_len;
struct dma_async_tx_descriptor async_tx;
union {
u32 *xor_check_result;
u32 *crc32_result;
};
#ifdef USE_TIMER
unsigned long arrival_time;
struct timer_list timeout;
#endif
};
/* This structure describes XOR descriptor size 64bytes */
struct mv_xor_desc {
u32 status; /* descriptor execution status */
u32 crc32_result; /* result of CRC-32 calculation */
u32 desc_command; /* type of operation to be carried out */
u32 phy_next_desc; /* next descriptor address pointer */
u32 byte_count; /* size of src/dst blocks in bytes */
u32 phy_dest_addr; /* destination block address */
u32 phy_src_addr[8]; /* source block addresses */
u32 reserved0;
u32 reserved1;
};
#define to_mv_sw_desc(addr_hw_desc) \
container_of(addr_hw_desc, struct mv_xor_desc_slot, hw_desc)
#define mv_hw_desc_slot_idx(hw_desc, idx) \
((void *)(((unsigned long)hw_desc) + ((idx) << 5)))
#define MV_XOR_MIN_BYTE_COUNT (128)
#define XOR_MAX_BYTE_COUNT ((16 * 1024 * 1024) - 1)
#define MV_XOR_MAX_BYTE_COUNT XOR_MAX_BYTE_COUNT
#endif
...@@ -198,17 +198,13 @@ iop_chan_memset_slot_count(size_t len, int *slots_per_op) ...@@ -198,17 +198,13 @@ iop_chan_memset_slot_count(size_t len, int *slots_per_op)
static inline int static inline int
iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op) iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op)
{ {
int num_slots; static const char slot_count_table[] = { 1, 2, 2, 2,
/* slots_to_find = 1 for basic descriptor + 1 per 4 sources above 1 2, 3, 3, 3,
* (1 source => 8 bytes) (1 slot => 32 bytes) 3, 4, 4, 4,
*/ 4, 5, 5, 5,
num_slots = 1 + (((src_cnt - 1) << 3) >> 5); };
if (((src_cnt - 1) << 3) & 0x1f) *slots_per_op = slot_count_table[src_cnt - 1];
num_slots++; return *slots_per_op;
*slots_per_op = num_slots;
return num_slots;
} }
#define ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024) #define ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024)
......
...@@ -260,7 +260,7 @@ static inline int iop_chan_memset_slot_count(size_t len, int *slots_per_op) ...@@ -260,7 +260,7 @@ static inline int iop_chan_memset_slot_count(size_t len, int *slots_per_op)
static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt, static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt,
int *slots_per_op) int *slots_per_op)
{ {
static const int slot_count_table[] = { 0, static const char slot_count_table[] = {
1, 1, 1, 1, /* 01 - 04 */ 1, 1, 1, 1, /* 01 - 04 */
2, 2, 2, 2, /* 05 - 08 */ 2, 2, 2, 2, /* 05 - 08 */
4, 4, 4, 4, /* 09 - 12 */ 4, 4, 4, 4, /* 09 - 12 */
...@@ -270,7 +270,7 @@ static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt, ...@@ -270,7 +270,7 @@ static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt,
8, 8, 8, 8, /* 25 - 28 */ 8, 8, 8, 8, /* 25 - 28 */
8, 8, 8, 8, /* 29 - 32 */ 8, 8, 8, 8, /* 29 - 32 */
}; };
*slots_per_op = slot_count_table[src_cnt]; *slots_per_op = slot_count_table[src_cnt - 1];
return *slots_per_op; return *slots_per_op;
} }
......
/*
* Marvell XOR platform device data definition file.
*/
#ifndef __ASM_PLAT_ORION_MV_XOR_H
#define __ASM_PLAT_ORION_MV_XOR_H
#include <linux/dmaengine.h>
#include <linux/mbus.h>
#define MV_XOR_SHARED_NAME "mv_xor_shared"
#define MV_XOR_NAME "mv_xor"
struct mbus_dram_target_info;
struct mv_xor_platform_shared_data {
struct mbus_dram_target_info *dram;
};
struct mv_xor_platform_data {
struct platform_device *shared;
int hw_id;
dma_cap_mask_t cap_mask;
size_t pool_size;
};
#endif
...@@ -30,4 +30,20 @@ ...@@ -30,4 +30,20 @@
#define GPIO_PIN_PD(N) (GPIO_PIOD_BASE + (N)) #define GPIO_PIN_PD(N) (GPIO_PIOD_BASE + (N))
#define GPIO_PIN_PE(N) (GPIO_PIOE_BASE + (N)) #define GPIO_PIN_PE(N) (GPIO_PIOE_BASE + (N))
/*
* DMAC peripheral hardware handshaking interfaces, used with dw_dmac
*/
#define DMAC_MCI_RX 0
#define DMAC_MCI_TX 1
#define DMAC_DAC_TX 2
#define DMAC_AC97_A_RX 3
#define DMAC_AC97_A_TX 4
#define DMAC_AC97_B_RX 5
#define DMAC_AC97_B_TX 6
#define DMAC_DMAREQ_0 7
#define DMAC_DMAREQ_1 8
#define DMAC_DMAREQ_2 9
#define DMAC_DMAREQ_3 10
#endif /* __ASM_ARCH_AT32AP700X_H__ */ #endif /* __ASM_ARCH_AT32AP700X_H__ */
...@@ -101,21 +101,14 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, ...@@ -101,21 +101,14 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
/** /**
* async_tx_sync_epilog - actions to take if an operation is run synchronously * async_tx_sync_epilog - actions to take if an operation is run synchronously
* @flags: async_tx flags
* @depend_tx: transaction depends on depend_tx
* @cb_fn: function to call when the transaction completes * @cb_fn: function to call when the transaction completes
* @cb_fn_param: parameter to pass to the callback routine * @cb_fn_param: parameter to pass to the callback routine
*/ */
static inline void static inline void
async_tx_sync_epilog(unsigned long flags, async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param)
struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_fn_param)
{ {
if (cb_fn) if (cb_fn)
cb_fn(cb_fn_param); cb_fn(cb_fn_param);
if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
async_tx_ack(depend_tx);
} }
void void
...@@ -152,4 +145,6 @@ struct dma_async_tx_descriptor * ...@@ -152,4 +145,6 @@ struct dma_async_tx_descriptor *
async_trigger_callback(enum async_tx_flags flags, async_trigger_callback(enum async_tx_flags flags,
struct dma_async_tx_descriptor *depend_tx, struct dma_async_tx_descriptor *depend_tx,
dma_async_tx_callback cb_fn, void *cb_fn_param); dma_async_tx_callback cb_fn, void *cb_fn_param);
void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
#endif /* _ASYNC_TX_H_ */ #endif /* _ASYNC_TX_H_ */
...@@ -10,6 +10,7 @@ void dca_unregister_notify(struct notifier_block *nb); ...@@ -10,6 +10,7 @@ void dca_unregister_notify(struct notifier_block *nb);
#define DCA_PROVIDER_REMOVE 0x0002 #define DCA_PROVIDER_REMOVE 0x0002
struct dca_provider { struct dca_provider {
struct list_head node;
struct dca_ops *ops; struct dca_ops *ops;
struct device *cd; struct device *cd;
int id; int id;
...@@ -18,7 +19,9 @@ struct dca_provider { ...@@ -18,7 +19,9 @@ struct dca_provider {
struct dca_ops { struct dca_ops {
int (*add_requester) (struct dca_provider *, struct device *); int (*add_requester) (struct dca_provider *, struct device *);
int (*remove_requester) (struct dca_provider *, struct device *); int (*remove_requester) (struct dca_provider *, struct device *);
u8 (*get_tag) (struct dca_provider *, int cpu); u8 (*get_tag) (struct dca_provider *, struct device *,
int cpu);
int (*dev_managed) (struct dca_provider *, struct device *);
}; };
struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size); struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size);
...@@ -32,9 +35,11 @@ static inline void *dca_priv(struct dca_provider *dca) ...@@ -32,9 +35,11 @@ static inline void *dca_priv(struct dca_provider *dca)
} }
/* Requester API */ /* Requester API */
#define DCA_GET_TAG_TWO_ARGS
int dca_add_requester(struct device *dev); int dca_add_requester(struct device *dev);
int dca_remove_requester(struct device *dev); int dca_remove_requester(struct device *dev);
u8 dca_get_tag(int cpu); u8 dca_get_tag(int cpu);
u8 dca3_get_tag(struct device *dev, int cpu);
/* internal stuff */ /* internal stuff */
int __init dca_sysfs_init(void); int __init dca_sysfs_init(void);
......
...@@ -89,10 +89,23 @@ enum dma_transaction_type { ...@@ -89,10 +89,23 @@ enum dma_transaction_type {
DMA_MEMSET, DMA_MEMSET,
DMA_MEMCPY_CRC32C, DMA_MEMCPY_CRC32C,
DMA_INTERRUPT, DMA_INTERRUPT,
DMA_SLAVE,
}; };
/* last transaction type for creation of the capabilities mask */ /* last transaction type for creation of the capabilities mask */
#define DMA_TX_TYPE_END (DMA_INTERRUPT + 1) #define DMA_TX_TYPE_END (DMA_SLAVE + 1)
/**
* enum dma_slave_width - DMA slave register access width.
* @DMA_SLAVE_WIDTH_8BIT: Do 8-bit slave register accesses
* @DMA_SLAVE_WIDTH_16BIT: Do 16-bit slave register accesses
* @DMA_SLAVE_WIDTH_32BIT: Do 32-bit slave register accesses
*/
enum dma_slave_width {
DMA_SLAVE_WIDTH_8BIT,
DMA_SLAVE_WIDTH_16BIT,
DMA_SLAVE_WIDTH_32BIT,
};
/** /**
* enum dma_ctrl_flags - DMA flags to augment operation preparation, * enum dma_ctrl_flags - DMA flags to augment operation preparation,
...@@ -102,10 +115,14 @@ enum dma_transaction_type { ...@@ -102,10 +115,14 @@ enum dma_transaction_type {
* @DMA_CTRL_ACK - the descriptor cannot be reused until the client * @DMA_CTRL_ACK - the descriptor cannot be reused until the client
* acknowledges receipt, i.e. has has a chance to establish any * acknowledges receipt, i.e. has has a chance to establish any
* dependency chains * dependency chains
* @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
* @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
*/ */
enum dma_ctrl_flags { enum dma_ctrl_flags {
DMA_PREP_INTERRUPT = (1 << 0), DMA_PREP_INTERRUPT = (1 << 0),
DMA_CTRL_ACK = (1 << 1), DMA_CTRL_ACK = (1 << 1),
DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),
DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
}; };
/** /**
...@@ -114,6 +131,32 @@ enum dma_ctrl_flags { ...@@ -114,6 +131,32 @@ enum dma_ctrl_flags {
*/ */
typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t; typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t;
/**
* struct dma_slave - Information about a DMA slave
* @dev: device acting as DMA slave
* @dma_dev: required DMA master device. If non-NULL, the client can not be
* bound to other masters than this.
* @tx_reg: physical address of data register used for
* memory-to-peripheral transfers
* @rx_reg: physical address of data register used for
* peripheral-to-memory transfers
* @reg_width: peripheral register width
*
* If dma_dev is non-NULL, the client can not be bound to other DMA
* masters than the one corresponding to this device. The DMA master
* driver may use this to determine if there is controller-specific
* data wrapped around this struct. Drivers of platform code that sets
* the dma_dev field must therefore make sure to use an appropriate
* controller-specific dma slave structure wrapping this struct.
*/
struct dma_slave {
struct device *dev;
struct device *dma_dev;
dma_addr_t tx_reg;
dma_addr_t rx_reg;
enum dma_slave_width reg_width;
};
/** /**
* struct dma_chan_percpu - the per-CPU part of struct dma_chan * struct dma_chan_percpu - the per-CPU part of struct dma_chan
* @refcount: local_t used for open-coded "bigref" counting * @refcount: local_t used for open-coded "bigref" counting
...@@ -139,6 +182,7 @@ struct dma_chan_percpu { ...@@ -139,6 +182,7 @@ struct dma_chan_percpu {
* @rcu: the DMA channel's RCU head * @rcu: the DMA channel's RCU head
* @device_node: used to add this to the device chan list * @device_node: used to add this to the device chan list
* @local: per-cpu pointer to a struct dma_chan_percpu * @local: per-cpu pointer to a struct dma_chan_percpu
* @client-count: how many clients are using this channel
*/ */
struct dma_chan { struct dma_chan {
struct dma_device *device; struct dma_device *device;
...@@ -154,6 +198,7 @@ struct dma_chan { ...@@ -154,6 +198,7 @@ struct dma_chan {
struct list_head device_node; struct list_head device_node;
struct dma_chan_percpu *local; struct dma_chan_percpu *local;
int client_count;
}; };
#define to_dma_chan(p) container_of(p, struct dma_chan, dev) #define to_dma_chan(p) container_of(p, struct dma_chan, dev)
...@@ -202,11 +247,14 @@ typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client, ...@@ -202,11 +247,14 @@ typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client,
* @event_callback: func ptr to call when something happens * @event_callback: func ptr to call when something happens
* @cap_mask: only return channels that satisfy the requested capabilities * @cap_mask: only return channels that satisfy the requested capabilities
* a value of zero corresponds to any capability * a value of zero corresponds to any capability
* @slave: data for preparing slave transfer. Must be non-NULL iff the
* DMA_SLAVE capability is requested.
* @global_node: list_head for global dma_client_list * @global_node: list_head for global dma_client_list
*/ */
struct dma_client { struct dma_client {
dma_event_callback event_callback; dma_event_callback event_callback;
dma_cap_mask_t cap_mask; dma_cap_mask_t cap_mask;
struct dma_slave *slave;
struct list_head global_node; struct list_head global_node;
}; };
...@@ -263,6 +311,8 @@ struct dma_async_tx_descriptor { ...@@ -263,6 +311,8 @@ struct dma_async_tx_descriptor {
* @device_prep_dma_zero_sum: prepares a zero_sum operation * @device_prep_dma_zero_sum: prepares a zero_sum operation
* @device_prep_dma_memset: prepares a memset operation * @device_prep_dma_memset: prepares a memset operation
* @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
* @device_prep_slave_sg: prepares a slave dma operation
* @device_terminate_all: terminate all pending operations
* @device_issue_pending: push pending transactions to hardware * @device_issue_pending: push pending transactions to hardware
*/ */
struct dma_device { struct dma_device {
...@@ -279,7 +329,8 @@ struct dma_device { ...@@ -279,7 +329,8 @@ struct dma_device {
int dev_id; int dev_id;
struct device *dev; struct device *dev;
int (*device_alloc_chan_resources)(struct dma_chan *chan); int (*device_alloc_chan_resources)(struct dma_chan *chan,
struct dma_client *client);
void (*device_free_chan_resources)(struct dma_chan *chan); void (*device_free_chan_resources)(struct dma_chan *chan);
struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)( struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)(
...@@ -297,6 +348,12 @@ struct dma_device { ...@@ -297,6 +348,12 @@ struct dma_device {
struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)( struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
struct dma_chan *chan, unsigned long flags); struct dma_chan *chan, unsigned long flags);
struct dma_async_tx_descriptor *(*device_prep_slave_sg)(
struct dma_chan *chan, struct scatterlist *sgl,
unsigned int sg_len, enum dma_data_direction direction,
unsigned long flags);
void (*device_terminate_all)(struct dma_chan *chan);
enum dma_status (*device_is_tx_complete)(struct dma_chan *chan, enum dma_status (*device_is_tx_complete)(struct dma_chan *chan,
dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t cookie, dma_cookie_t *last,
dma_cookie_t *used); dma_cookie_t *used);
...@@ -318,16 +375,14 @@ dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan, ...@@ -318,16 +375,14 @@ dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan,
void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
struct dma_chan *chan); struct dma_chan *chan);
static inline void static inline void async_tx_ack(struct dma_async_tx_descriptor *tx)
async_tx_ack(struct dma_async_tx_descriptor *tx)
{ {
tx->flags |= DMA_CTRL_ACK; tx->flags |= DMA_CTRL_ACK;
} }
static inline int static inline bool async_tx_test_ack(struct dma_async_tx_descriptor *tx)
async_tx_test_ack(struct dma_async_tx_descriptor *tx)
{ {
return tx->flags & DMA_CTRL_ACK; return (tx->flags & DMA_CTRL_ACK) == DMA_CTRL_ACK;
} }
#define first_dma_cap(mask) __first_dma_cap(&(mask)) #define first_dma_cap(mask) __first_dma_cap(&(mask))
......
/*
* Driver for the Synopsys DesignWare DMA Controller (aka DMACA on
* AVR32 systems.)
*
* Copyright (C) 2007 Atmel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef DW_DMAC_H
#define DW_DMAC_H
#include <linux/dmaengine.h>
/**
* struct dw_dma_platform_data - Controller configuration parameters
* @nr_channels: Number of channels supported by hardware (max 8)
*/
struct dw_dma_platform_data {
unsigned int nr_channels;
};
/**
* struct dw_dma_slave - Controller-specific information about a slave
* @slave: Generic information about the slave
* @ctl_lo: Platform-specific initializer for the CTL_LO register
* @cfg_hi: Platform-specific initializer for the CFG_HI register
* @cfg_lo: Platform-specific initializer for the CFG_LO register
*/
struct dw_dma_slave {
struct dma_slave slave;
u32 cfg_hi;
u32 cfg_lo;
};
/* Platform-configurable bits in CFG_HI */
#define DWC_CFGH_FCMODE (1 << 0)
#define DWC_CFGH_FIFO_MODE (1 << 1)
#define DWC_CFGH_PROTCTL(x) ((x) << 2)
#define DWC_CFGH_SRC_PER(x) ((x) << 7)
#define DWC_CFGH_DST_PER(x) ((x) << 11)
/* Platform-configurable bits in CFG_LO */
#define DWC_CFGL_PRIO(x) ((x) << 5) /* priority */
#define DWC_CFGL_LOCK_CH_XFER (0 << 12) /* scope of LOCK_CH */
#define DWC_CFGL_LOCK_CH_BLOCK (1 << 12)
#define DWC_CFGL_LOCK_CH_XACT (2 << 12)
#define DWC_CFGL_LOCK_BUS_XFER (0 << 14) /* scope of LOCK_BUS */
#define DWC_CFGL_LOCK_BUS_BLOCK (1 << 14)
#define DWC_CFGL_LOCK_BUS_XACT (2 << 14)
#define DWC_CFGL_LOCK_CH (1 << 15) /* channel lockout */
#define DWC_CFGL_LOCK_BUS (1 << 16) /* busmaster lockout */
#define DWC_CFGL_HS_DST_POL (1 << 18) /* dst handshake active low */
#define DWC_CFGL_HS_SRC_POL (1 << 19) /* src handshake active low */
static inline struct dw_dma_slave *to_dw_dma_slave(struct dma_slave *slave)
{
return container_of(slave, struct dw_dma_slave, slave);
}
#endif /* DW_DMAC_H */
...@@ -2371,6 +2371,14 @@ ...@@ -2371,6 +2371,14 @@
#define PCI_DEVICE_ID_INTEL_ICH9_7 0x2916 #define PCI_DEVICE_ID_INTEL_ICH9_7 0x2916
#define PCI_DEVICE_ID_INTEL_ICH9_8 0x2918 #define PCI_DEVICE_ID_INTEL_ICH9_8 0x2918
#define PCI_DEVICE_ID_INTEL_82855PM_HB 0x3340 #define PCI_DEVICE_ID_INTEL_82855PM_HB 0x3340
#define PCI_DEVICE_ID_INTEL_IOAT_TBG4 0x3429
#define PCI_DEVICE_ID_INTEL_IOAT_TBG5 0x342a
#define PCI_DEVICE_ID_INTEL_IOAT_TBG6 0x342b
#define PCI_DEVICE_ID_INTEL_IOAT_TBG7 0x342c
#define PCI_DEVICE_ID_INTEL_IOAT_TBG0 0x3430
#define PCI_DEVICE_ID_INTEL_IOAT_TBG1 0x3431
#define PCI_DEVICE_ID_INTEL_IOAT_TBG2 0x3432
#define PCI_DEVICE_ID_INTEL_IOAT_TBG3 0x3433
#define PCI_DEVICE_ID_INTEL_82830_HB 0x3575 #define PCI_DEVICE_ID_INTEL_82830_HB 0x3575
#define PCI_DEVICE_ID_INTEL_82830_CGC 0x3577 #define PCI_DEVICE_ID_INTEL_82830_CGC 0x3577
#define PCI_DEVICE_ID_INTEL_82855GM_HB 0x3580 #define PCI_DEVICE_ID_INTEL_82855GM_HB 0x3580
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#define NET_DMA_DEFAULT_COPYBREAK 4096 #define NET_DMA_DEFAULT_COPYBREAK 4096
int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK; int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK;
EXPORT_SYMBOL(sysctl_tcp_dma_copybreak);
/** /**
* dma_skb_copy_datagram_iovec - Copy a datagram to an iovec. * dma_skb_copy_datagram_iovec - Copy a datagram to an iovec.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment