Commit 425902f5 authored by Jason Gunthorpe's avatar Jason Gunthorpe Committed by Greg Kroah-Hartman

fpga zynq: Use the scatterlist interface

This allows the driver to avoid a high order coherent DMA allocation
and memory copy. With this patch it can DMA directly from the kernel
pages that the bitfile is stored in.

Since this is now a gather DMA operation the driver uses the ISR
to feed the chips DMA queue with each entry from the SGL.
Signed-off-by: default avatarJason Gunthorpe <jgunthorpe@obsidianresearch.com>
Acked-by: default avatarMoritz Fischer <moritz.fischer@ettus.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent baa6d396
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <linux/pm.h> #include <linux/pm.h>
#include <linux/regmap.h> #include <linux/regmap.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/scatterlist.h>
/* Offsets into SLCR regmap */ /* Offsets into SLCR regmap */
...@@ -80,6 +81,7 @@ ...@@ -80,6 +81,7 @@
/* FPGA init status */ /* FPGA init status */
#define STATUS_DMA_Q_F BIT(31) #define STATUS_DMA_Q_F BIT(31)
#define STATUS_DMA_Q_E BIT(30)
#define STATUS_PCFG_INIT_MASK BIT(4) #define STATUS_PCFG_INIT_MASK BIT(4)
/* Interrupt Status/Mask Register Bit definitions */ /* Interrupt Status/Mask Register Bit definitions */
...@@ -98,12 +100,16 @@ ...@@ -98,12 +100,16 @@
#define DMA_INVALID_ADDRESS GENMASK(31, 0) #define DMA_INVALID_ADDRESS GENMASK(31, 0)
/* Used to unlock the dev */ /* Used to unlock the dev */
#define UNLOCK_MASK 0x757bdf0d #define UNLOCK_MASK 0x757bdf0d
/* Timeout for DMA to complete */
#define DMA_DONE_TIMEOUT msecs_to_jiffies(1000)
/* Timeout for polling reset bits */ /* Timeout for polling reset bits */
#define INIT_POLL_TIMEOUT 2500000 #define INIT_POLL_TIMEOUT 2500000
/* Delay for polling reset bits */ /* Delay for polling reset bits */
#define INIT_POLL_DELAY 20 #define INIT_POLL_DELAY 20
/* Signal this is the last DMA transfer, wait for the AXI and PCAP before
* interrupting
*/
#define DMA_SRC_LAST_TRANSFER 1
/* Timeout for DMA completion */
#define DMA_TIMEOUT_MS 5000
/* Masks for controlling stuff in SLCR */ /* Masks for controlling stuff in SLCR */
/* Disable all Level shifters */ /* Disable all Level shifters */
...@@ -124,6 +130,11 @@ struct zynq_fpga_priv { ...@@ -124,6 +130,11 @@ struct zynq_fpga_priv {
void __iomem *io_base; void __iomem *io_base;
struct regmap *slcr; struct regmap *slcr;
spinlock_t dma_lock;
unsigned int dma_elm;
unsigned int dma_nelms;
struct scatterlist *cur_sg;
struct completion dma_done; struct completion dma_done;
}; };
...@@ -149,13 +160,80 @@ static inline void zynq_fpga_set_irq(struct zynq_fpga_priv *priv, u32 enable) ...@@ -149,13 +160,80 @@ static inline void zynq_fpga_set_irq(struct zynq_fpga_priv *priv, u32 enable)
zynq_fpga_write(priv, INT_MASK_OFFSET, ~enable); zynq_fpga_write(priv, INT_MASK_OFFSET, ~enable);
} }
/* Must be called with dma_lock held */
static void zynq_step_dma(struct zynq_fpga_priv *priv)
{
u32 addr;
u32 len;
bool first;
first = priv->dma_elm == 0;
while (priv->cur_sg) {
/* Feed the DMA queue until it is full. */
if (zynq_fpga_read(priv, STATUS_OFFSET) & STATUS_DMA_Q_F)
break;
addr = sg_dma_address(priv->cur_sg);
len = sg_dma_len(priv->cur_sg);
if (priv->dma_elm + 1 == priv->dma_nelms) {
/* The last transfer waits for the PCAP to finish too,
* notice this also changes the irq_mask to ignore
* IXR_DMA_DONE_MASK which ensures we do not trigger
* the completion too early.
*/
addr |= DMA_SRC_LAST_TRANSFER;
priv->cur_sg = NULL;
} else {
priv->cur_sg = sg_next(priv->cur_sg);
priv->dma_elm++;
}
zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, addr);
zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, DMA_INVALID_ADDRESS);
zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, len / 4);
zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0);
}
/* Once the first transfer is queued we can turn on the ISR, future
* calls to zynq_step_dma will happen from the ISR context. The
* dma_lock spinlock guarentees this handover is done coherently, the
* ISR enable is put at the end to avoid another CPU spinning in the
* ISR on this lock.
*/
if (first && priv->cur_sg) {
zynq_fpga_set_irq(priv,
IXR_DMA_DONE_MASK | IXR_ERROR_FLAGS_MASK);
} else if (!priv->cur_sg) {
/* The last transfer changes to DMA & PCAP mode since we do
* not want to continue until everything has been flushed into
* the PCAP.
*/
zynq_fpga_set_irq(priv,
IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK);
}
}
static irqreturn_t zynq_fpga_isr(int irq, void *data) static irqreturn_t zynq_fpga_isr(int irq, void *data)
{ {
struct zynq_fpga_priv *priv = data; struct zynq_fpga_priv *priv = data;
u32 intr_status;
/* disable DMA and error IRQs */ /* If anything other than DMA completion is reported stop and hand
zynq_fpga_set_irq(priv, 0); * control back to zynq_fpga_ops_write, something went wrong,
* otherwise progress the DMA.
*/
spin_lock(&priv->dma_lock);
intr_status = zynq_fpga_read(priv, INT_STS_OFFSET);
if (!(intr_status & IXR_ERROR_FLAGS_MASK) &&
(intr_status & IXR_DMA_DONE_MASK) && priv->cur_sg) {
zynq_fpga_write(priv, INT_STS_OFFSET, IXR_DMA_DONE_MASK);
zynq_step_dma(priv);
spin_unlock(&priv->dma_lock);
return IRQ_HANDLED;
}
spin_unlock(&priv->dma_lock);
zynq_fpga_set_irq(priv, 0);
complete(&priv->dma_done); complete(&priv->dma_done);
return IRQ_HANDLED; return IRQ_HANDLED;
...@@ -266,10 +344,11 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr, ...@@ -266,10 +344,11 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr,
zynq_fpga_write(priv, CTRL_OFFSET, zynq_fpga_write(priv, CTRL_OFFSET,
(CTRL_PCAP_PR_MASK | CTRL_PCAP_MODE_MASK | ctrl)); (CTRL_PCAP_PR_MASK | CTRL_PCAP_MODE_MASK | ctrl));
/* check that we have room in the command queue */ /* We expect that the command queue is empty right now. */
status = zynq_fpga_read(priv, STATUS_OFFSET); status = zynq_fpga_read(priv, STATUS_OFFSET);
if (status & STATUS_DMA_Q_F) { if ((status & STATUS_DMA_Q_F) ||
dev_err(&mgr->dev, "DMA command queue full\n"); (status & STATUS_DMA_Q_E) != STATUS_DMA_Q_E) {
dev_err(&mgr->dev, "DMA command queue not right\n");
err = -EBUSY; err = -EBUSY;
goto out_err; goto out_err;
} }
...@@ -288,27 +367,36 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr, ...@@ -288,27 +367,36 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr,
return err; return err;
} }
static int zynq_fpga_ops_write(struct fpga_manager *mgr, static int zynq_fpga_ops_write(struct fpga_manager *mgr, struct sg_table *sgt)
const char *buf, size_t count)
{ {
struct zynq_fpga_priv *priv; struct zynq_fpga_priv *priv;
const char *why; const char *why;
int err; int err;
char *kbuf;
size_t in_count;
dma_addr_t dma_addr;
u32 transfer_length;
u32 intr_status; u32 intr_status;
unsigned long timeout;
unsigned long flags;
struct scatterlist *sg;
int i;
in_count = count;
priv = mgr->priv; priv = mgr->priv;
kbuf = /* The hardware can only DMA multiples of 4 bytes, and it requires the
dma_alloc_coherent(mgr->dev.parent, count, &dma_addr, GFP_KERNEL); * starting addresses to be aligned to 64 bits (UG585 pg 212).
if (!kbuf) */
return -ENOMEM; for_each_sg(sgt->sgl, sg, sgt->nents, i) {
if ((sg->offset % 8) || (sg->length % 4)) {
dev_err(&mgr->dev,
"Invalid bitstream, chunks must be aligned\n");
return -EINVAL;
}
}
memcpy(kbuf, buf, count); priv->dma_nelms =
dma_map_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE);
if (priv->dma_nelms == 0) {
dev_err(&mgr->dev, "Unable to DMA map (TO_DEVICE)\n");
return -ENOMEM;
}
/* enable clock */ /* enable clock */
err = clk_enable(priv->clk); err = clk_enable(priv->clk);
...@@ -316,28 +404,31 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr, ...@@ -316,28 +404,31 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr,
goto out_free; goto out_free;
zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK); zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK);
reinit_completion(&priv->dma_done); reinit_completion(&priv->dma_done);
/* enable DMA and error IRQs */ /* zynq_step_dma will turn on interrupts */
zynq_fpga_set_irq(priv, IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK); spin_lock_irqsave(&priv->dma_lock, flags);
priv->dma_elm = 0;
/* the +1 in the src addr is used to hold off on DMA_DONE IRQ priv->cur_sg = sgt->sgl;
* until both AXI and PCAP are done ... zynq_step_dma(priv);
*/ spin_unlock_irqrestore(&priv->dma_lock, flags);
zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, (u32)(dma_addr) + 1);
zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, (u32)DMA_INVALID_ADDRESS);
/* convert #bytes to #words */
transfer_length = (count + 3) / 4;
zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, transfer_length); timeout = wait_for_completion_timeout(&priv->dma_done,
zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0); msecs_to_jiffies(DMA_TIMEOUT_MS));
wait_for_completion(&priv->dma_done); spin_lock_irqsave(&priv->dma_lock, flags);
zynq_fpga_set_irq(priv, 0);
priv->cur_sg = NULL;
spin_unlock_irqrestore(&priv->dma_lock, flags);
intr_status = zynq_fpga_read(priv, INT_STS_OFFSET); intr_status = zynq_fpga_read(priv, INT_STS_OFFSET);
zynq_fpga_write(priv, INT_STS_OFFSET, intr_status); zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK);
/* There doesn't seem to be a way to force cancel any DMA, so if
* something went wrong we are relying on the hardware to have halted
* the DMA before we get here, if there was we could use
* wait_for_completion_interruptible too.
*/
if (intr_status & IXR_ERROR_FLAGS_MASK) { if (intr_status & IXR_ERROR_FLAGS_MASK) {
why = "DMA reported error"; why = "DMA reported error";
...@@ -345,8 +436,12 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr, ...@@ -345,8 +436,12 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr,
goto out_report; goto out_report;
} }
if (!((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) { if (priv->cur_sg ||
why = "DMA did not complete"; !((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) {
if (timeout == 0)
why = "DMA timed out";
else
why = "DMA did not complete";
err = -EIO; err = -EIO;
goto out_report; goto out_report;
} }
...@@ -369,7 +464,7 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr, ...@@ -369,7 +464,7 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr,
clk_disable(priv->clk); clk_disable(priv->clk);
out_free: out_free:
dma_free_coherent(mgr->dev.parent, count, kbuf, dma_addr); dma_unmap_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE);
return err; return err;
} }
...@@ -433,7 +528,7 @@ static const struct fpga_manager_ops zynq_fpga_ops = { ...@@ -433,7 +528,7 @@ static const struct fpga_manager_ops zynq_fpga_ops = {
.initial_header_size = 128, .initial_header_size = 128,
.state = zynq_fpga_ops_state, .state = zynq_fpga_ops_state,
.write_init = zynq_fpga_ops_write_init, .write_init = zynq_fpga_ops_write_init,
.write = zynq_fpga_ops_write, .write_sg = zynq_fpga_ops_write,
.write_complete = zynq_fpga_ops_write_complete, .write_complete = zynq_fpga_ops_write_complete,
}; };
...@@ -447,6 +542,7 @@ static int zynq_fpga_probe(struct platform_device *pdev) ...@@ -447,6 +542,7 @@ static int zynq_fpga_probe(struct platform_device *pdev)
priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
if (!priv) if (!priv)
return -ENOMEM; return -ENOMEM;
spin_lock_init(&priv->dma_lock);
res = platform_get_resource(pdev, IORESOURCE_MEM, 0); res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
priv->io_base = devm_ioremap_resource(dev, res); priv->io_base = devm_ioremap_resource(dev, res);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment