Commit 99613159 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'dmaengine-5.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine

Pull dmaengine updates from Vinod Koul:
 "A bunch of new support and few updates to drivers:

  New support:
   - DMA_MEMCPY_SG support is bought back as we have a user in Xilinx
     driver
   - Support for TI J721S2 SoC in k3-udma driver
   - Support for Ingenic MDMA and BDMA in the JZ4760
   - Support for Renesas r8a779f0 dmac

  Updates:
   - We are finally getting rid of slave_id, so this brings in the
     changes across tree for that
   - updates for idxd driver
   - at_xdmac driver cleanup"

* tag 'dmaengine-5.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine: (60 commits)
  dt-bindings: dma-controller: Split interrupt fields in example
  dmaengine: pch_dma: Remove usage of the deprecated "pci-dma-compat.h" API
  dmaengine: at_xdmac: Fix race over irq_status
  dmaengine: at_xdmac: Remove a level of indentation in at_xdmac_tasklet()
  dmaengine: at_xdmac: Fix at_xdmac_lld struct definition
  dmaengine: at_xdmac: Fix lld view setting
  dmaengine: at_xdmac: Remove a level of indentation in at_xdmac_advance_work()
  dmaengine: at_xdmac: Fix concurrency over xfers_list
  dmaengine: at_xdmac: Move the free desc to the tail of the desc list
  dmaengine: at_xdmac: Fix race for the tx desc callback
  dmaengine: at_xdmac: Fix concurrency over chan's completed_cookie
  dmaengine: at_xdmac: Print debug message after realeasing the lock
  dmaengine: at_xdmac: Start transfer for cyclic channels in issue_pending
  dmaengine: at_xdmac: Don't start transactions at tx_submit level
  dmaengine: idxd: deprecate token sysfs attributes for read buffers
  dmaengine: idxd: change bandwidth token to read buffers
  dmaengine: idxd: fix wq settings post wq disable
  dmaengine: idxd: change MSIX allocation based on per wq activation
  dmaengine: idxd: fix descriptor flushing locking
  dmaengine: idxd: embed irq_entry in idxd_wq struct
  ...
parents fe81ba13 bbd0ff07
......@@ -41,14 +41,14 @@ KernelVersion: 5.6.0
Contact: dmaengine@vger.kernel.org
Description: The maximum number of groups can be created under this device.
What: /sys/bus/dsa/devices/dsa<m>/max_tokens
Date: Oct 25, 2019
KernelVersion: 5.6.0
What: /sys/bus/dsa/devices/dsa<m>/max_read_buffers
Date: Dec 10, 2021
KernelVersion: 5.17.0
Contact: dmaengine@vger.kernel.org
Description: The total number of bandwidth tokens supported by this device.
The bandwidth tokens represent resources within the DSA
Description: The total number of read buffers supported by this device.
The read buffers represent resources within the DSA
implementation, and these resources are allocated by engines to
support operations.
support operations. See DSA spec v1.2 9.2.4 Total Read Buffers.
What: /sys/bus/dsa/devices/dsa<m>/max_transfer_size
Date: Oct 25, 2019
......@@ -115,13 +115,13 @@ KernelVersion: 5.6.0
Contact: dmaengine@vger.kernel.org
Description: To indicate if this device is configurable or not.
What: /sys/bus/dsa/devices/dsa<m>/token_limit
Date: Oct 25, 2019
KernelVersion: 5.6.0
What: /sys/bus/dsa/devices/dsa<m>/read_buffer_limit
Date: Dec 10, 2021
KernelVersion: 5.17.0
Contact: dmaengine@vger.kernel.org
Description: The maximum number of bandwidth tokens that may be in use at
Description: The maximum number of read buffers that may be in use at
one time by operations that access low bandwidth memory in the
device.
device. See DSA spec v1.2 9.2.8 GENCFG on Global Read Buffer Limit.
What: /sys/bus/dsa/devices/dsa<m>/cmd_status
Date: Aug 28, 2020
......@@ -220,8 +220,38 @@ Contact: dmaengine@vger.kernel.org
Description: Show the current number of entries in this WQ if WQ Occupancy
Support bit WQ capabilities is 1.
What: /sys/bus/dsa/devices/wq<m>.<n>/enqcmds_retries
Date Oct 29, 2021
KernelVersion: 5.17.0
Contact: dmaengine@vger.kernel.org
Description: Indicate the number of retires for an enqcmds submission on a sharedwq.
A max value to set attribute is capped at 64.
What: /sys/bus/dsa/devices/engine<m>.<n>/group_id
Date: Oct 25, 2019
KernelVersion: 5.6.0
Contact: dmaengine@vger.kernel.org
Description: The group that this engine belongs to.
What: /sys/bus/dsa/devices/group<m>.<n>/use_read_buffer_limit
Date: Dec 10, 2021
KernelVersion: 5.17.0
Contact: dmaengine@vger.kernel.org
Description: Enable the use of global read buffer limit for the group. See DSA
spec v1.2 9.2.18 GRPCFG Use Global Read Buffer Limit.
What: /sys/bus/dsa/devices/group<m>.<n>/read_buffers_allowed
Date: Dec 10, 2021
KernelVersion: 5.17.0
Contact: dmaengine@vger.kernel.org
Description: Indicates max number of read buffers that may be in use at one time
by all engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read
Buffers Allowed.
What: /sys/bus/dsa/devices/group<m>.<n>/read_buffers_reserved
Date: Dec 10, 2021
KernelVersion: 5.17.0
Contact: dmaengine@vger.kernel.org
Description: Indicates the number of Read Buffers reserved for the use of
engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read Buffers
Reserved.
# SPDX-License-Identifier: GPL-2.0
%YAML 1.2
---
$id: http://devicetree.org/schemas/dma/arm,pl330.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: ARM PrimeCell PL330 DMA Controller
maintainers:
- Vinod Koul <vkoul@kernel.org>
description:
The ARM PrimeCell PL330 DMA controller can move blocks of memory contents
between memory and peripherals or memory to memory.
# We need a select here so we don't match all nodes with 'arm,primecell'
select:
properties:
compatible:
contains:
const: arm,pl330
required:
- compatible
allOf:
- $ref: dma-controller.yaml#
- $ref: /schemas/arm/primecell.yaml#
properties:
compatible:
items:
- enum:
- arm,pl330
- const: arm,primecell
reg:
maxItems: 1
interrupts:
minItems: 1
maxItems: 32
description: A single combined interrupt or an interrupt per event
'#dma-cells':
const: 1
description: Contains the DMA request number for the consumer
arm,pl330-broken-no-flushp:
type: boolean
description: quirk for avoiding to execute DMAFLUSHP
arm,pl330-periph-burst:
type: boolean
description: quirk for performing burst transfer only
dma-coherent: true
resets:
minItems: 1
maxItems: 2
reset-names:
minItems: 1
items:
- const: dma
- const: dma-ocp
required:
- compatible
- reg
- interrupts
unevaluatedProperties: false
examples:
- |
dma-controller@12680000 {
compatible = "arm,pl330", "arm,primecell";
reg = <0x12680000 0x1000>;
interrupts = <99>;
#dma-cells = <1>;
};
...
......@@ -10,6 +10,7 @@ maintainers:
- Vinod Koul <vkoul@kernel.org>
allOf:
- $ref: /schemas/arm/primecell.yaml#
- $ref: "dma-controller.yaml#"
# We need a select here so we don't match all nodes with 'arm,primecell'
......@@ -89,6 +90,9 @@ properties:
- 64
description: bus width used for memcpy in bits. FTDMAC020 also accept 64 bits
resets:
maxItems: 1
required:
- reg
- interrupts
......
* ARM PrimeCell PL330 DMA Controller
The ARM PrimeCell PL330 DMA controller can move blocks of memory contents
between memory and peripherals or memory to memory.
Required properties:
- compatible: should include both "arm,pl330" and "arm,primecell".
- reg: physical base address of the controller and length of memory mapped
region.
- interrupts: interrupt number to the cpu.
Optional properties:
- dma-coherent : Present if dma operations are coherent
- #dma-cells: must be <1>. used to represent the number of integer
cells in the dmas property of client device.
- dma-channels: contains the total number of DMA channels supported by the DMAC
- dma-requests: contains the total number of DMA requests supported by the DMAC
- arm,pl330-broken-no-flushp: quirk for avoiding to execute DMAFLUSHP
- arm,pl330-periph-burst: quirk for performing burst transfer only
- resets: contains an entry for each entry in reset-names.
See ../reset/reset.txt for details.
- reset-names: must contain at least "dma", and optional is "dma-ocp".
Example:
pdma0: pdma@12680000 {
compatible = "arm,pl330", "arm,primecell";
reg = <0x12680000 0x1000>;
interrupts = <99>;
#dma-cells = <1>;
#dma-channels = <8>;
#dma-requests = <32>;
};
Client drivers (device nodes requiring dma transfers from dev-to-mem or
mem-to-dev) should specify the DMA channel numbers and dma channel names
as shown below.
[property name] = <[phandle of the dma controller] [dma request id]>;
[property name] = <[dma channel name]>
where 'dma request id' is the dma request number which is connected
to the client controller. The 'property name' 'dmas' and 'dma-names'
as required by the generic dma device tree binding helpers. The dma
names correspond 1:1 with the dma request ids in the dmas property.
Example: dmas = <&pdma0 12
&pdma1 11>;
dma-names = "tx", "rx";
......@@ -24,10 +24,10 @@ examples:
dma: dma-controller@48000000 {
compatible = "ti,omap-sdma";
reg = <0x48000000 0x1000>;
interrupts = <0 12 0x4
0 13 0x4
0 14 0x4
0 15 0x4>;
interrupts = <0 12 0x4>,
<0 13 0x4>,
<0 14 0x4>,
<0 15 0x4>;
#dma-cells = <1>;
dma-channels = <32>;
dma-requests = <127>;
......
......@@ -14,15 +14,23 @@ allOf:
properties:
compatible:
enum:
- ingenic,jz4740-dma
- ingenic,jz4725b-dma
- ingenic,jz4760-dma
- ingenic,jz4760b-dma
- ingenic,jz4770-dma
- ingenic,jz4780-dma
- ingenic,x1000-dma
- ingenic,x1830-dma
oneOf:
- enum:
- ingenic,jz4740-dma
- ingenic,jz4725b-dma
- ingenic,jz4760-dma
- ingenic,jz4760-bdma
- ingenic,jz4760-mdma
- ingenic,jz4760b-dma
- ingenic,jz4760b-bdma
- ingenic,jz4760b-mdma
- ingenic,jz4770-dma
- ingenic,jz4780-dma
- ingenic,x1000-dma
- ingenic,x1830-dma
- items:
- const: ingenic,jz4770-bdma
- const: ingenic,jz4760b-bdma
reg:
items:
......@@ -36,13 +44,19 @@ properties:
maxItems: 1
"#dma-cells":
const: 2
enum: [2, 3]
description: >
DMA clients must use the format described in dma.txt, giving a phandle
to the DMA controller plus the following 2 integer cells:
- Request type: The DMA request type for transfers to/from the
device on the allocated channel, as defined in the SoC documentation.
to the DMA controller plus the following integer cells:
- Request type: The DMA request type specifies the device endpoint that
will be the source or destination of the DMA transfer.
If "#dma-cells" is 2, the request type is a single cell, and the
direction will be unidirectional (either RX or TX but not both).
If "#dma-cells" is 3, the request type has two cells; the first
one corresponds to the host to device direction (TX), the second one
corresponds to the device to host direction (RX). The DMA channel is
then bidirectional.
- Channel: If set to 0xffffffff, any available channel will be allocated
for the client. Otherwise, the exact channel specified will be used.
......
......@@ -44,6 +44,10 @@ properties:
- items:
- const: renesas,dmac-r8a779a0 # R-Car V3U
- items:
- const: renesas,dmac-r8a779f0 # R-Car S4-8
- const: renesas,rcar-gen4-dmac
reg: true
interrupts:
......@@ -118,6 +122,7 @@ if:
contains:
enum:
- renesas,dmac-r8a779a0
- renesas,rcar-gen4-dmac
then:
properties:
reg:
......
......@@ -53,6 +53,9 @@ properties:
minimum: 1
maximum: 8
resets:
maxItems: 1
snps,dma-masters:
description: |
Number of AXI masters supported by the hardware.
......
......@@ -30,6 +30,7 @@ description: |
allOf:
- $ref: /schemas/dma/dma-controller.yaml#
- $ref: /schemas/arm/keystone/ti,k3-sci-common.yaml#
properties:
compatible:
......
......@@ -25,6 +25,7 @@ description: |
allOf:
- $ref: /schemas/dma/dma-controller.yaml#
- $ref: /schemas/arm/keystone/ti,k3-sci-common.yaml#
properties:
compatible:
......
......@@ -6,6 +6,16 @@ Andy Shevchenko <andriy.shevchenko@linux.intel.com>
This small document introduces how to test DMA drivers using dmatest module.
The dmatest module tests DMA memcpy, memset, XOR and RAID6 P+Q operations using
various lengths and various offsets into the source and destination buffers. It
will initialize both buffers with a repeatable pattern and verify that the DMA
engine copies the requested region and nothing more. It will also verify that
the bytes aren't swapped around, and that the source buffer isn't modified.
The dmatest module can be configured to test a specific channel. It can also
test multiple channels at the same time, and it can start multiple threads
competing for the same channel.
.. note::
The test suite works only on the channels that have at least one
capability of the following: DMA_MEMCPY (memory-to-memory), DMA_MEMSET
......@@ -143,13 +153,14 @@ Part 5 - Handling channel allocation
Allocating Channels
-------------------
Channels are required to be configured prior to starting the test run.
Attempting to run the test without configuring the channels will fail.
Channels do not need to be configured prior to starting a test run. Attempting
to run the test without configuring the channels will result in testing any
channels that are available.
Example::
% echo 1 > /sys/module/dmatest/parameters/run
dmatest: Could not start test, no channels configured
dmatest: No channels configured, continue with any
Channels are registered using the "channel" parameter. Channels can be requested by their
name, once requested, the channel is registered and a pending thread is added to the test list.
......
......@@ -162,6 +162,29 @@ Currently, the types available are:
- The device is able to do memory to memory copies
- - DMA_MEMCPY_SG
- The device supports memory to memory scatter-gather transfers.
- Even though a plain memcpy can look like a particular case of a
scatter-gather transfer, with a single chunk to copy, it's a distinct
transaction type in the mem2mem transfer case. This is because some very
simple devices might be able to do contiguous single-chunk memory copies,
but have no support for more complex SG transfers.
- No matter what the overall size of the combined chunks for source and
destination is, only as many bytes as the smallest of the two will be
transmitted. That means the number and size of the scatter-gather buffers in
both lists need not be the same, and that the operation functionally is
equivalent to a ``strncpy`` where the ``count`` argument equals the smallest
total size of the two scatter-gather list buffers.
- It's usually used for copying pixel data between host memory and
memory-mapped GPU device memory, such as found on modern PCI video graphics
cards. The most immediate example is the OpenGL API function
``glReadPielx()``, which might require a verbatim copy of a huge framebuffer
from local device memory onto host memory.
- DMA_XOR
- The device is able to perform XOR operations on memory areas
......
This diff is collapsed.
......@@ -104,10 +104,10 @@
* descriptor base address in the upper 8 bits.
*/
struct jz4780_dma_hwdesc {
uint32_t dcm;
uint32_t dsa;
uint32_t dta;
uint32_t dtc;
u32 dcm;
u32 dsa;
u32 dta;
u32 dtc;
};
/* Size of allocations for hardware descriptor blocks. */
......@@ -122,7 +122,8 @@ struct jz4780_dma_desc {
dma_addr_t desc_phys;
unsigned int count;
enum dma_transaction_type type;
uint32_t status;
u32 transfer_type;
u32 status;
};
struct jz4780_dma_chan {
......@@ -130,8 +131,8 @@ struct jz4780_dma_chan {
unsigned int id;
struct dma_pool *desc_pool;
uint32_t transfer_type;
uint32_t transfer_shift;
u32 transfer_type_tx, transfer_type_rx;
u32 transfer_shift;
struct dma_slave_config config;
struct jz4780_dma_desc *desc;
......@@ -152,12 +153,12 @@ struct jz4780_dma_dev {
unsigned int irq;
const struct jz4780_dma_soc_data *soc_data;
uint32_t chan_reserved;
u32 chan_reserved;
struct jz4780_dma_chan chan[];
};
struct jz4780_dma_filter_data {
uint32_t transfer_type;
u32 transfer_type_tx, transfer_type_rx;
int channel;
};
......@@ -179,26 +180,26 @@ static inline struct jz4780_dma_dev *jz4780_dma_chan_parent(
dma_device);
}
static inline uint32_t jz4780_dma_chn_readl(struct jz4780_dma_dev *jzdma,
static inline u32 jz4780_dma_chn_readl(struct jz4780_dma_dev *jzdma,
unsigned int chn, unsigned int reg)
{
return readl(jzdma->chn_base + reg + JZ_DMA_REG_CHAN(chn));
}
static inline void jz4780_dma_chn_writel(struct jz4780_dma_dev *jzdma,
unsigned int chn, unsigned int reg, uint32_t val)
unsigned int chn, unsigned int reg, u32 val)
{
writel(val, jzdma->chn_base + reg + JZ_DMA_REG_CHAN(chn));
}
static inline uint32_t jz4780_dma_ctrl_readl(struct jz4780_dma_dev *jzdma,
static inline u32 jz4780_dma_ctrl_readl(struct jz4780_dma_dev *jzdma,
unsigned int reg)
{
return readl(jzdma->ctrl_base + reg);
}
static inline void jz4780_dma_ctrl_writel(struct jz4780_dma_dev *jzdma,
unsigned int reg, uint32_t val)
unsigned int reg, u32 val)
{
writel(val, jzdma->ctrl_base + reg);
}
......@@ -226,9 +227,10 @@ static inline void jz4780_dma_chan_disable(struct jz4780_dma_dev *jzdma,
jz4780_dma_ctrl_writel(jzdma, JZ_DMA_REG_DCKEC, BIT(chn));
}
static struct jz4780_dma_desc *jz4780_dma_desc_alloc(
struct jz4780_dma_chan *jzchan, unsigned int count,
enum dma_transaction_type type)
static struct jz4780_dma_desc *
jz4780_dma_desc_alloc(struct jz4780_dma_chan *jzchan, unsigned int count,
enum dma_transaction_type type,
enum dma_transfer_direction direction)
{
struct jz4780_dma_desc *desc;
......@@ -248,6 +250,12 @@ static struct jz4780_dma_desc *jz4780_dma_desc_alloc(
desc->count = count;
desc->type = type;
if (direction == DMA_DEV_TO_MEM)
desc->transfer_type = jzchan->transfer_type_rx;
else
desc->transfer_type = jzchan->transfer_type_tx;
return desc;
}
......@@ -260,8 +268,8 @@ static void jz4780_dma_desc_free(struct virt_dma_desc *vdesc)
kfree(desc);
}
static uint32_t jz4780_dma_transfer_size(struct jz4780_dma_chan *jzchan,
unsigned long val, uint32_t *shift)
static u32 jz4780_dma_transfer_size(struct jz4780_dma_chan *jzchan,
unsigned long val, u32 *shift)
{
struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
int ord = ffs(val) - 1;
......@@ -303,7 +311,7 @@ static int jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan,
enum dma_transfer_direction direction)
{
struct dma_slave_config *config = &jzchan->config;
uint32_t width, maxburst, tsz;
u32 width, maxburst, tsz;
if (direction == DMA_MEM_TO_DEV) {
desc->dcm = JZ_DMA_DCM_SAI;
......@@ -361,7 +369,7 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_slave_sg(
unsigned int i;
int err;
desc = jz4780_dma_desc_alloc(jzchan, sg_len, DMA_SLAVE);
desc = jz4780_dma_desc_alloc(jzchan, sg_len, DMA_SLAVE, direction);
if (!desc)
return NULL;
......@@ -410,7 +418,7 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_cyclic(
periods = buf_len / period_len;
desc = jz4780_dma_desc_alloc(jzchan, periods, DMA_CYCLIC);
desc = jz4780_dma_desc_alloc(jzchan, periods, DMA_CYCLIC, direction);
if (!desc)
return NULL;
......@@ -453,16 +461,16 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_memcpy(
{
struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
struct jz4780_dma_desc *desc;
uint32_t tsz;
u32 tsz;
desc = jz4780_dma_desc_alloc(jzchan, 1, DMA_MEMCPY);
desc = jz4780_dma_desc_alloc(jzchan, 1, DMA_MEMCPY, 0);
if (!desc)
return NULL;
tsz = jz4780_dma_transfer_size(jzchan, dest | src | len,
&jzchan->transfer_shift);
jzchan->transfer_type = JZ_DMA_DRT_AUTO;
desc->transfer_type = JZ_DMA_DRT_AUTO;
desc->desc[0].dsa = src;
desc->desc[0].dta = dest;
......@@ -528,7 +536,7 @@ static void jz4780_dma_begin(struct jz4780_dma_chan *jzchan)
/* Set transfer type. */
jz4780_dma_chn_writel(jzdma, jzchan->id, JZ_DMA_REG_DRT,
jzchan->transfer_type);
jzchan->desc->transfer_type);
/*
* Set the transfer count. This is redundant for a descriptor-driven
......@@ -670,7 +678,7 @@ static bool jz4780_dma_chan_irq(struct jz4780_dma_dev *jzdma,
{
const unsigned int soc_flags = jzdma->soc_data->flags;
struct jz4780_dma_desc *desc = jzchan->desc;
uint32_t dcs;
u32 dcs;
bool ack = true;
spin_lock(&jzchan->vchan.lock);
......@@ -727,7 +735,7 @@ static irqreturn_t jz4780_dma_irq_handler(int irq, void *data)
struct jz4780_dma_dev *jzdma = data;
unsigned int nb_channels = jzdma->soc_data->nb_channels;
unsigned long pending;
uint32_t dmac;
u32 dmac;
int i;
pending = jz4780_dma_ctrl_readl(jzdma, JZ_DMA_REG_DIRQP);
......@@ -788,7 +796,8 @@ static bool jz4780_dma_filter_fn(struct dma_chan *chan, void *param)
return false;
}
jzchan->transfer_type = data->transfer_type;
jzchan->transfer_type_tx = data->transfer_type_tx;
jzchan->transfer_type_rx = data->transfer_type_rx;
return true;
}
......@@ -800,11 +809,17 @@ static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec,
dma_cap_mask_t mask = jzdma->dma_device.cap_mask;
struct jz4780_dma_filter_data data;
if (dma_spec->args_count != 2)
if (dma_spec->args_count == 2) {
data.transfer_type_tx = dma_spec->args[0];
data.transfer_type_rx = dma_spec->args[0];
data.channel = dma_spec->args[1];
} else if (dma_spec->args_count == 3) {
data.transfer_type_tx = dma_spec->args[0];
data.transfer_type_rx = dma_spec->args[1];
data.channel = dma_spec->args[2];
} else {
return NULL;
data.transfer_type = dma_spec->args[0];
data.channel = dma_spec->args[1];
}
if (data.channel > -1) {
if (data.channel >= jzdma->soc_data->nb_channels) {
......@@ -822,7 +837,8 @@ static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec,
return NULL;
}
jzdma->chan[data.channel].transfer_type = data.transfer_type;
jzdma->chan[data.channel].transfer_type_tx = data.transfer_type_tx;
jzdma->chan[data.channel].transfer_type_rx = data.transfer_type_rx;
return dma_get_slave_channel(
&jzdma->chan[data.channel].vchan.chan);
......@@ -938,6 +954,14 @@ static int jz4780_dma_probe(struct platform_device *pdev)
jzchan->vchan.desc_free = jz4780_dma_desc_free;
}
/*
* On JZ4760, chan0 won't enable properly the first time.
* Enabling then disabling chan1 will magically make chan0 work
* correctly.
*/
jz4780_dma_chan_enable(jzdma, 1);
jz4780_dma_chan_disable(jzdma, 1);
ret = platform_get_irq(pdev, 0);
if (ret < 0)
goto err_disable_clk;
......@@ -1011,12 +1035,36 @@ static const struct jz4780_dma_soc_data jz4760_dma_soc_data = {
.flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC,
};
static const struct jz4780_dma_soc_data jz4760_mdma_soc_data = {
.nb_channels = 2,
.transfer_ord_max = 6,
.flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC,
};
static const struct jz4780_dma_soc_data jz4760_bdma_soc_data = {
.nb_channels = 3,
.transfer_ord_max = 6,
.flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC,
};
static const struct jz4780_dma_soc_data jz4760b_dma_soc_data = {
.nb_channels = 5,
.transfer_ord_max = 6,
.flags = JZ_SOC_DATA_PER_CHAN_PM,
};
static const struct jz4780_dma_soc_data jz4760b_mdma_soc_data = {
.nb_channels = 2,
.transfer_ord_max = 6,
.flags = JZ_SOC_DATA_PER_CHAN_PM,
};
static const struct jz4780_dma_soc_data jz4760b_bdma_soc_data = {
.nb_channels = 3,
.transfer_ord_max = 6,
.flags = JZ_SOC_DATA_PER_CHAN_PM,
};
static const struct jz4780_dma_soc_data jz4770_dma_soc_data = {
.nb_channels = 6,
.transfer_ord_max = 6,
......@@ -1045,7 +1093,11 @@ static const struct of_device_id jz4780_dma_dt_match[] = {
{ .compatible = "ingenic,jz4740-dma", .data = &jz4740_dma_soc_data },
{ .compatible = "ingenic,jz4725b-dma", .data = &jz4725b_dma_soc_data },
{ .compatible = "ingenic,jz4760-dma", .data = &jz4760_dma_soc_data },
{ .compatible = "ingenic,jz4760-mdma", .data = &jz4760_mdma_soc_data },
{ .compatible = "ingenic,jz4760-bdma", .data = &jz4760_bdma_soc_data },
{ .compatible = "ingenic,jz4760b-dma", .data = &jz4760b_dma_soc_data },
{ .compatible = "ingenic,jz4760b-mdma", .data = &jz4760b_mdma_soc_data },
{ .compatible = "ingenic,jz4760b-bdma", .data = &jz4760b_bdma_soc_data },
{ .compatible = "ingenic,jz4770-dma", .data = &jz4770_dma_soc_data },
{ .compatible = "ingenic,jz4780-dma", .data = &jz4780_dma_soc_data },
{ .compatible = "ingenic,x1000-dma", .data = &x1000_dma_soc_data },
......
......@@ -1159,6 +1159,13 @@ int dma_async_device_register(struct dma_device *device)
return -EIO;
}
if (dma_has_cap(DMA_MEMCPY_SG, device->cap_mask) && !device->device_prep_dma_memcpy_sg) {
dev_err(device->dev,
"Device claims capability %s, but op is not defined\n",
"DMA_MEMCPY_SG");
return -EIO;
}
if (dma_has_cap(DMA_XOR, device->cap_mask) && !device->device_prep_dma_xor) {
dev_err(device->dev,
"Device claims capability %s, but op is not defined\n",
......
......@@ -19,30 +19,6 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd);
static void idxd_wq_disable_cleanup(struct idxd_wq *wq);
/* Interrupt control bits */
void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id)
{
struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector);
pci_msi_mask_irq(data);
}
void idxd_mask_msix_vectors(struct idxd_device *idxd)
{
struct pci_dev *pdev = idxd->pdev;
int msixcnt = pci_msix_vec_count(pdev);
int i;
for (i = 0; i < msixcnt; i++)
idxd_mask_msix_vector(idxd, i);
}
void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id)
{
struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector);
pci_msi_unmask_irq(data);
}
void idxd_unmask_error_interrupts(struct idxd_device *idxd)
{
union genctrl_reg genctrl;
......@@ -382,14 +358,24 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq)
lockdep_assert_held(&wq->wq_lock);
memset(wq->wqcfg, 0, idxd->wqcfg_size);
wq->type = IDXD_WQT_NONE;
wq->size = 0;
wq->group = NULL;
wq->threshold = 0;
wq->priority = 0;
wq->ats_dis = 0;
wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
clear_bit(WQ_FLAG_DEDICATED, &wq->flags);
clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
memset(wq->name, 0, WQ_NAME_SIZE);
wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER;
wq->max_batch_size = WQ_DEFAULT_MAX_BATCH;
}
static void idxd_wq_device_reset_cleanup(struct idxd_wq *wq)
{
lockdep_assert_held(&wq->wq_lock);
idxd_wq_disable_cleanup(wq);
wq->size = 0;
wq->group = NULL;
}
static void idxd_wq_ref_release(struct percpu_ref *ref)
......@@ -404,19 +390,31 @@ int idxd_wq_init_percpu_ref(struct idxd_wq *wq)
int rc;
memset(&wq->wq_active, 0, sizeof(wq->wq_active));
rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, 0, GFP_KERNEL);
rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release,
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL);
if (rc < 0)
return rc;
reinit_completion(&wq->wq_dead);
reinit_completion(&wq->wq_resurrect);
return 0;
}
void idxd_wq_quiesce(struct idxd_wq *wq)
void __idxd_wq_quiesce(struct idxd_wq *wq)
{
lockdep_assert_held(&wq->wq_lock);
reinit_completion(&wq->wq_resurrect);
percpu_ref_kill(&wq->wq_active);
complete_all(&wq->wq_resurrect);
wait_for_completion(&wq->wq_dead);
}
void idxd_wq_quiesce(struct idxd_wq *wq)
{
mutex_lock(&wq->wq_lock);
__idxd_wq_quiesce(wq);
mutex_unlock(&wq->wq_lock);
}
/* Device control bits */
static inline bool idxd_is_enabled(struct idxd_device *idxd)
{
......@@ -572,7 +570,6 @@ void idxd_device_reset(struct idxd_device *idxd)
idxd_device_clear_state(idxd);
idxd->state = IDXD_DEV_DISABLED;
idxd_unmask_error_interrupts(idxd);
idxd_msix_perm_setup(idxd);
spin_unlock(&idxd->dev_lock);
}
......@@ -681,9 +678,9 @@ static void idxd_groups_clear_state(struct idxd_device *idxd)
memset(&group->grpcfg, 0, sizeof(group->grpcfg));
group->num_engines = 0;
group->num_wqs = 0;
group->use_token_limit = false;
group->tokens_allowed = 0;
group->tokens_reserved = 0;
group->use_rdbuf_limit = false;
group->rdbufs_allowed = 0;
group->rdbufs_reserved = 0;
group->tc_a = -1;
group->tc_b = -1;
}
......@@ -699,6 +696,7 @@ static void idxd_device_wqs_clear_state(struct idxd_device *idxd)
if (wq->state == IDXD_WQ_ENABLED) {
idxd_wq_disable_cleanup(wq);
idxd_wq_device_reset_cleanup(wq);
wq->state = IDXD_WQ_DISABLED;
}
}
......@@ -711,36 +709,6 @@ void idxd_device_clear_state(struct idxd_device *idxd)
idxd_device_wqs_clear_state(idxd);
}
void idxd_msix_perm_setup(struct idxd_device *idxd)
{
union msix_perm mperm;
int i, msixcnt;
msixcnt = pci_msix_vec_count(idxd->pdev);
if (msixcnt < 0)
return;
mperm.bits = 0;
mperm.pasid = idxd->pasid;
mperm.pasid_en = device_pasid_enabled(idxd);
for (i = 1; i < msixcnt; i++)
iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8);
}
void idxd_msix_perm_clear(struct idxd_device *idxd)
{
union msix_perm mperm;
int i, msixcnt;
msixcnt = pci_msix_vec_count(idxd->pdev);
if (msixcnt < 0)
return;
mperm.bits = 0;
for (i = 1; i < msixcnt; i++)
iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8);
}
static void idxd_group_config_write(struct idxd_group *group)
{
struct idxd_device *idxd = group->idxd;
......@@ -780,10 +748,10 @@ static int idxd_groups_config_write(struct idxd_device *idxd)
int i;
struct device *dev = &idxd->pdev->dev;
/* Setup bandwidth token limit */
if (idxd->hw.gen_cap.config_en && idxd->token_limit) {
/* Setup bandwidth rdbuf limit */
if (idxd->hw.gen_cap.config_en && idxd->rdbuf_limit) {
reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
reg.token_limit = idxd->token_limit;
reg.rdbuf_limit = idxd->rdbuf_limit;
iowrite32(reg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET);
}
......@@ -827,15 +795,12 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
wq->wqcfg->bits[i] = ioread32(idxd->reg_base + wq_offset);
}
if (wq->size == 0 && wq->type != IDXD_WQT_NONE)
wq->size = WQ_DEFAULT_QUEUE_DEPTH;
/* byte 0-3 */
wq->wqcfg->wq_size = wq->size;
if (wq->size == 0) {
idxd->cmd_status = IDXD_SCMD_WQ_NO_SIZE;
dev_warn(dev, "Incorrect work queue size: 0\n");
return -EINVAL;
}
/* bytes 4-7 */
wq->wqcfg->wq_thresh = wq->threshold;
......@@ -924,13 +889,12 @@ static void idxd_group_flags_setup(struct idxd_device *idxd)
group->tc_b = group->grpcfg.flags.tc_b = 1;
else
group->grpcfg.flags.tc_b = group->tc_b;
group->grpcfg.flags.use_token_limit = group->use_token_limit;
group->grpcfg.flags.tokens_reserved = group->tokens_reserved;
if (group->tokens_allowed)
group->grpcfg.flags.tokens_allowed =
group->tokens_allowed;
group->grpcfg.flags.use_rdbuf_limit = group->use_rdbuf_limit;
group->grpcfg.flags.rdbufs_reserved = group->rdbufs_reserved;
if (group->rdbufs_allowed)
group->grpcfg.flags.rdbufs_allowed = group->rdbufs_allowed;
else
group->grpcfg.flags.tokens_allowed = idxd->max_tokens;
group->grpcfg.flags.rdbufs_allowed = idxd->max_rdbufs;
}
}
......@@ -981,8 +945,6 @@ static int idxd_wqs_setup(struct idxd_device *idxd)
if (!wq->group)
continue;
if (!wq->size)
continue;
if (wq_shared(wq) && !device_swq_supported(idxd)) {
idxd->cmd_status = IDXD_SCMD_WQ_NO_SWQ_SUPPORT;
......@@ -1123,7 +1085,7 @@ int idxd_device_load_config(struct idxd_device *idxd)
int i, rc;
reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
idxd->token_limit = reg.token_limit;
idxd->rdbuf_limit = reg.rdbuf_limit;
for (i = 0; i < idxd->max_groups; i++) {
struct idxd_group *group = idxd->groups[i];
......@@ -1142,6 +1104,106 @@ int idxd_device_load_config(struct idxd_device *idxd)
return 0;
}
static void idxd_flush_pending_descs(struct idxd_irq_entry *ie)
{
struct idxd_desc *desc, *itr;
struct llist_node *head;
LIST_HEAD(flist);
enum idxd_complete_type ctype;
spin_lock(&ie->list_lock);
head = llist_del_all(&ie->pending_llist);
if (head) {
llist_for_each_entry_safe(desc, itr, head, llnode)
list_add_tail(&desc->list, &ie->work_list);
}
list_for_each_entry_safe(desc, itr, &ie->work_list, list)
list_move_tail(&desc->list, &flist);
spin_unlock(&ie->list_lock);
list_for_each_entry_safe(desc, itr, &flist, list) {
list_del(&desc->list);
ctype = desc->completion->status ? IDXD_COMPLETE_NORMAL : IDXD_COMPLETE_ABORT;
idxd_dma_complete_txd(desc, ctype, true);
}
}
static void idxd_device_set_perm_entry(struct idxd_device *idxd,
struct idxd_irq_entry *ie)
{
union msix_perm mperm;
if (ie->pasid == INVALID_IOASID)
return;
mperm.bits = 0;
mperm.pasid = ie->pasid;
mperm.pasid_en = 1;
iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + ie->id * 8);
}
static void idxd_device_clear_perm_entry(struct idxd_device *idxd,
struct idxd_irq_entry *ie)
{
iowrite32(0, idxd->reg_base + idxd->msix_perm_offset + ie->id * 8);
}
void idxd_wq_free_irq(struct idxd_wq *wq)
{
struct idxd_device *idxd = wq->idxd;
struct idxd_irq_entry *ie = &wq->ie;
synchronize_irq(ie->vector);
free_irq(ie->vector, ie);
idxd_flush_pending_descs(ie);
if (idxd->request_int_handles)
idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX);
idxd_device_clear_perm_entry(idxd, ie);
ie->vector = -1;
ie->int_handle = INVALID_INT_HANDLE;
ie->pasid = INVALID_IOASID;
}
int idxd_wq_request_irq(struct idxd_wq *wq)
{
struct idxd_device *idxd = wq->idxd;
struct pci_dev *pdev = idxd->pdev;
struct device *dev = &pdev->dev;
struct idxd_irq_entry *ie;
int rc;
ie = &wq->ie;
ie->vector = pci_irq_vector(pdev, ie->id);
ie->pasid = device_pasid_enabled(idxd) ? idxd->pasid : INVALID_IOASID;
idxd_device_set_perm_entry(idxd, ie);
rc = request_threaded_irq(ie->vector, NULL, idxd_wq_thread, 0, "idxd-portal", ie);
if (rc < 0) {
dev_err(dev, "Failed to request irq %d.\n", ie->vector);
goto err_irq;
}
if (idxd->request_int_handles) {
rc = idxd_device_request_int_handle(idxd, ie->id, &ie->int_handle,
IDXD_IRQ_MSIX);
if (rc < 0)
goto err_int_handle;
} else {
ie->int_handle = ie->id;
}
return 0;
err_int_handle:
ie->int_handle = INVALID_INT_HANDLE;
free_irq(ie->vector, ie);
err_irq:
idxd_device_clear_perm_entry(idxd, ie);
ie->pasid = INVALID_IOASID;
return rc;
}
int __drv_enable_wq(struct idxd_wq *wq)
{
struct idxd_device *idxd = wq->idxd;
......
......@@ -21,20 +21,27 @@ static inline struct idxd_wq *to_idxd_wq(struct dma_chan *c)
}
void idxd_dma_complete_txd(struct idxd_desc *desc,
enum idxd_complete_type comp_type)
enum idxd_complete_type comp_type,
bool free_desc)
{
struct idxd_device *idxd = desc->wq->idxd;
struct dma_async_tx_descriptor *tx;
struct dmaengine_result res;
int complete = 1;
if (desc->completion->status == DSA_COMP_SUCCESS)
if (desc->completion->status == DSA_COMP_SUCCESS) {
res.result = DMA_TRANS_NOERROR;
else if (desc->completion->status)
} else if (desc->completion->status) {
if (idxd->request_int_handles && comp_type != IDXD_COMPLETE_ABORT &&
desc->completion->status == DSA_COMP_INT_HANDLE_INVAL &&
idxd_queue_int_handle_resubmit(desc))
return;
res.result = DMA_TRANS_WRITE_FAILED;
else if (comp_type == IDXD_COMPLETE_ABORT)
} else if (comp_type == IDXD_COMPLETE_ABORT) {
res.result = DMA_TRANS_ABORTED;
else
} else {
complete = 0;
}
tx = &desc->txd;
if (complete && tx->cookie) {
......@@ -44,6 +51,9 @@ void idxd_dma_complete_txd(struct idxd_desc *desc,
tx->callback = NULL;
tx->callback_result = NULL;
}
if (free_desc)
idxd_free_desc(desc->wq, desc);
}
static void op_flag_setup(unsigned long flags, u32 *desc_flags)
......@@ -153,8 +163,10 @@ static dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx)
cookie = dma_cookie_assign(tx);
rc = idxd_submit_desc(wq, desc);
if (rc < 0)
if (rc < 0) {
idxd_free_desc(wq, desc);
return rc;
}
return cookie;
}
......@@ -277,6 +289,14 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev)
mutex_lock(&wq->wq_lock);
wq->type = IDXD_WQT_KERNEL;
rc = idxd_wq_request_irq(wq);
if (rc < 0) {
idxd->cmd_status = IDXD_SCMD_WQ_IRQ_ERR;
dev_dbg(dev, "WQ %d irq setup failed: %d\n", wq->id, rc);
goto err_irq;
}
rc = __drv_enable_wq(wq);
if (rc < 0) {
dev_dbg(dev, "Enable wq %d failed: %d\n", wq->id, rc);
......@@ -310,13 +330,15 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev)
return 0;
err_dma:
idxd_wq_quiesce(wq);
__idxd_wq_quiesce(wq);
percpu_ref_exit(&wq->wq_active);
err_ref:
idxd_wq_free_resources(wq);
err_res_alloc:
__drv_disable_wq(wq);
err:
idxd_wq_free_irq(wq);
err_irq:
wq->type = IDXD_WQT_NONE;
mutex_unlock(&wq->wq_lock);
return rc;
......@@ -327,11 +349,13 @@ static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev)
struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
mutex_lock(&wq->wq_lock);
idxd_wq_quiesce(wq);
__idxd_wq_quiesce(wq);
idxd_unregister_dma_channel(wq);
idxd_wq_free_resources(wq);
__drv_disable_wq(wq);
percpu_ref_exit(&wq->wq_active);
idxd_wq_free_irq(wq);
wq->type = IDXD_WQT_NONE;
mutex_unlock(&wq->wq_lock);
}
......
......@@ -10,6 +10,7 @@
#include <linux/cdev.h>
#include <linux/idr.h>
#include <linux/pci.h>
#include <linux/ioasid.h>
#include <linux/perf_event.h>
#include <uapi/linux/idxd.h>
#include "registers.h"
......@@ -51,6 +52,9 @@ enum idxd_type {
#define IDXD_NAME_SIZE 128
#define IDXD_PMU_EVENT_MAX 64
#define IDXD_ENQCMDS_RETRIES 32
#define IDXD_ENQCMDS_MAX_RETRIES 64
struct idxd_device_driver {
const char *name;
enum idxd_dev_type *type;
......@@ -64,8 +68,8 @@ extern struct idxd_device_driver idxd_drv;
extern struct idxd_device_driver idxd_dmaengine_drv;
extern struct idxd_device_driver idxd_user_drv;
#define INVALID_INT_HANDLE -1
struct idxd_irq_entry {
struct idxd_device *idxd;
int id;
int vector;
struct llist_head pending_llist;
......@@ -75,6 +79,8 @@ struct idxd_irq_entry {
* and irq thread processing error descriptor.
*/
spinlock_t list_lock;
int int_handle;
ioasid_t pasid;
};
struct idxd_group {
......@@ -84,9 +90,9 @@ struct idxd_group {
int id;
int num_engines;
int num_wqs;
bool use_token_limit;
u8 tokens_allowed;
u8 tokens_reserved;
bool use_rdbuf_limit;
u8 rdbufs_allowed;
u8 rdbufs_reserved;
int tc_a;
int tc_b;
};
......@@ -145,6 +151,10 @@ struct idxd_cdev {
#define WQ_NAME_SIZE 1024
#define WQ_TYPE_SIZE 10
#define WQ_DEFAULT_QUEUE_DEPTH 16
#define WQ_DEFAULT_MAX_XFER SZ_2M
#define WQ_DEFAULT_MAX_BATCH 32
enum idxd_op_type {
IDXD_OP_BLOCK = 0,
IDXD_OP_NONBLOCK = 1,
......@@ -164,13 +174,16 @@ struct idxd_dma_chan {
struct idxd_wq {
void __iomem *portal;
u32 portal_offset;
unsigned int enqcmds_retries;
struct percpu_ref wq_active;
struct completion wq_dead;
struct completion wq_resurrect;
struct idxd_dev idxd_dev;
struct idxd_cdev *idxd_cdev;
struct wait_queue_head err_queue;
struct idxd_device *idxd;
int id;
struct idxd_irq_entry ie;
enum idxd_wq_type type;
struct idxd_group *group;
int client_count;
......@@ -251,6 +264,7 @@ struct idxd_device {
int id;
int major;
u32 cmd_status;
struct idxd_irq_entry ie; /* misc irq, msix 0 */
struct pci_dev *pdev;
void __iomem *reg_base;
......@@ -266,6 +280,8 @@ struct idxd_device {
unsigned int pasid;
int num_groups;
int irq_cnt;
bool request_int_handles;
u32 msix_perm_offset;
u32 wqcfg_offset;
......@@ -276,24 +292,20 @@ struct idxd_device {
u32 max_batch_size;
int max_groups;
int max_engines;
int max_tokens;
int max_rdbufs;
int max_wqs;
int max_wq_size;
int token_limit;
int nr_tokens; /* non-reserved tokens */
int rdbuf_limit;
int nr_rdbufs; /* non-reserved read buffers */
unsigned int wqcfg_size;
union sw_err_reg sw_err;
wait_queue_head_t cmd_waitq;
int num_wq_irqs;
struct idxd_irq_entry *irq_entries;
struct idxd_dma_dev *idxd_dma;
struct workqueue_struct *wq;
struct work_struct work;
int *int_handles;
struct idxd_pmu *idxd_pmu;
};
......@@ -380,6 +392,21 @@ static inline void idxd_dev_set_type(struct idxd_dev *idev, int type)
idev->type = type;
}
static inline struct idxd_irq_entry *idxd_get_ie(struct idxd_device *idxd, int idx)
{
return (idx == 0) ? &idxd->ie : &idxd->wqs[idx - 1]->ie;
}
static inline struct idxd_wq *ie_to_wq(struct idxd_irq_entry *ie)
{
return container_of(ie, struct idxd_wq, ie);
}
static inline struct idxd_device *ie_to_idxd(struct idxd_irq_entry *ie)
{
return container_of(ie, struct idxd_device, ie);
}
extern struct bus_type dsa_bus_type;
extern bool support_enqcmd;
......@@ -518,17 +545,13 @@ void idxd_unregister_devices(struct idxd_device *idxd);
int idxd_register_driver(void);
void idxd_unregister_driver(void);
void idxd_wqs_quiesce(struct idxd_device *idxd);
bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc);
/* device interrupt control */
void idxd_msix_perm_setup(struct idxd_device *idxd);
void idxd_msix_perm_clear(struct idxd_device *idxd);
irqreturn_t idxd_misc_thread(int vec, void *data);
irqreturn_t idxd_wq_thread(int irq, void *data);
void idxd_mask_error_interrupts(struct idxd_device *idxd);
void idxd_unmask_error_interrupts(struct idxd_device *idxd);
void idxd_mask_msix_vectors(struct idxd_device *idxd);
void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id);
void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id);
/* device control */
int idxd_register_idxd_drv(void);
......@@ -564,13 +587,17 @@ int idxd_wq_map_portal(struct idxd_wq *wq);
void idxd_wq_unmap_portal(struct idxd_wq *wq);
int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid);
int idxd_wq_disable_pasid(struct idxd_wq *wq);
void __idxd_wq_quiesce(struct idxd_wq *wq);
void idxd_wq_quiesce(struct idxd_wq *wq);
int idxd_wq_init_percpu_ref(struct idxd_wq *wq);
void idxd_wq_free_irq(struct idxd_wq *wq);
int idxd_wq_request_irq(struct idxd_wq *wq);
/* submission */
int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc);
struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype);
void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc);
int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc);
/* dmaengine */
int idxd_register_dma_device(struct idxd_device *idxd);
......@@ -579,7 +606,7 @@ int idxd_register_dma_channel(struct idxd_wq *wq);
void idxd_unregister_dma_channel(struct idxd_wq *wq);
void idxd_parse_completion_status(u8 status, enum dmaengine_tx_result *res);
void idxd_dma_complete_txd(struct idxd_desc *desc,
enum idxd_complete_type comp_type);
enum idxd_complete_type comp_type, bool free_desc);
/* cdev */
int idxd_cdev_register(void);
......@@ -603,10 +630,4 @@ static inline void perfmon_init(void) {}
static inline void perfmon_exit(void) {}
#endif
static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason)
{
idxd_dma_complete_txd(desc, reason);
idxd_free_desc(desc->wq, desc);
}
#endif
......@@ -72,7 +72,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
{
struct pci_dev *pdev = idxd->pdev;
struct device *dev = &pdev->dev;
struct idxd_irq_entry *irq_entry;
struct idxd_irq_entry *ie;
int i, msixcnt;
int rc = 0;
......@@ -81,6 +81,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
dev_err(dev, "Not MSI-X interrupt capable.\n");
return -ENOSPC;
}
idxd->irq_cnt = msixcnt;
rc = pci_alloc_irq_vectors(pdev, msixcnt, msixcnt, PCI_IRQ_MSIX);
if (rc != msixcnt) {
......@@ -89,87 +90,34 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
}
dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt);
/*
* We implement 1 completion list per MSI-X entry except for
* entry 0, which is for errors and others.
*/
idxd->irq_entries = kcalloc_node(msixcnt, sizeof(struct idxd_irq_entry),
GFP_KERNEL, dev_to_node(dev));
if (!idxd->irq_entries) {
rc = -ENOMEM;
goto err_irq_entries;
}
for (i = 0; i < msixcnt; i++) {
idxd->irq_entries[i].id = i;
idxd->irq_entries[i].idxd = idxd;
idxd->irq_entries[i].vector = pci_irq_vector(pdev, i);
spin_lock_init(&idxd->irq_entries[i].list_lock);
}
idxd_msix_perm_setup(idxd);
irq_entry = &idxd->irq_entries[0];
rc = request_threaded_irq(irq_entry->vector, NULL, idxd_misc_thread,
0, "idxd-misc", irq_entry);
ie = idxd_get_ie(idxd, 0);
ie->vector = pci_irq_vector(pdev, 0);
rc = request_threaded_irq(ie->vector, NULL, idxd_misc_thread, 0, "idxd-misc", ie);
if (rc < 0) {
dev_err(dev, "Failed to allocate misc interrupt.\n");
goto err_misc_irq;
}
dev_dbg(dev, "Requested idxd-misc handler on msix vector %d\n", ie->vector);
dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", irq_entry->vector);
/* first MSI-X entry is not for wq interrupts */
idxd->num_wq_irqs = msixcnt - 1;
for (i = 0; i < idxd->max_wqs; i++) {
int msix_idx = i + 1;
for (i = 1; i < msixcnt; i++) {
irq_entry = &idxd->irq_entries[i];
ie = idxd_get_ie(idxd, msix_idx);
ie->id = msix_idx;
ie->int_handle = INVALID_INT_HANDLE;
ie->pasid = INVALID_IOASID;
init_llist_head(&idxd->irq_entries[i].pending_llist);
INIT_LIST_HEAD(&idxd->irq_entries[i].work_list);
rc = request_threaded_irq(irq_entry->vector, NULL,
idxd_wq_thread, 0, "idxd-portal", irq_entry);
if (rc < 0) {
dev_err(dev, "Failed to allocate irq %d.\n", irq_entry->vector);
goto err_wq_irqs;
}
dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, irq_entry->vector);
if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) {
/*
* The MSIX vector enumeration starts at 1 with vector 0 being the
* misc interrupt that handles non I/O completion events. The
* interrupt handles are for IMS enumeration on guest. The misc
* interrupt vector does not require a handle and therefore we start
* the int_handles at index 0. Since 'i' starts at 1, the first
* int_handles index will be 0.
*/
rc = idxd_device_request_int_handle(idxd, i, &idxd->int_handles[i - 1],
IDXD_IRQ_MSIX);
if (rc < 0) {
free_irq(irq_entry->vector, irq_entry);
goto err_wq_irqs;
}
dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i - 1]);
}
spin_lock_init(&ie->list_lock);
init_llist_head(&ie->pending_llist);
INIT_LIST_HEAD(&ie->work_list);
}
idxd_unmask_error_interrupts(idxd);
return 0;
err_wq_irqs:
while (--i >= 0) {
irq_entry = &idxd->irq_entries[i];
free_irq(irq_entry->vector, irq_entry);
if (i != 0)
idxd_device_release_int_handle(idxd,
idxd->int_handles[i], IDXD_IRQ_MSIX);
}
err_misc_irq:
/* Disable error interrupt generation */
idxd_mask_error_interrupts(idxd);
idxd_msix_perm_clear(idxd);
err_irq_entries:
pci_free_irq_vectors(pdev);
dev_err(dev, "No usable interrupts\n");
return rc;
......@@ -178,26 +126,16 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
static void idxd_cleanup_interrupts(struct idxd_device *idxd)
{
struct pci_dev *pdev = idxd->pdev;
struct idxd_irq_entry *irq_entry;
int i, msixcnt;
struct idxd_irq_entry *ie;
int msixcnt;
msixcnt = pci_msix_vec_count(pdev);
if (msixcnt <= 0)
return;
irq_entry = &idxd->irq_entries[0];
free_irq(irq_entry->vector, irq_entry);
for (i = 1; i < msixcnt; i++) {
irq_entry = &idxd->irq_entries[i];
if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE))
idxd_device_release_int_handle(idxd, idxd->int_handles[i],
IDXD_IRQ_MSIX);
free_irq(irq_entry->vector, irq_entry);
}
ie = idxd_get_ie(idxd, 0);
idxd_mask_error_interrupts(idxd);
free_irq(ie->vector, ie);
pci_free_irq_vectors(pdev);
}
......@@ -237,8 +175,10 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
mutex_init(&wq->wq_lock);
init_waitqueue_head(&wq->err_queue);
init_completion(&wq->wq_dead);
wq->max_xfer_bytes = idxd->max_xfer_bytes;
wq->max_batch_size = idxd->max_batch_size;
init_completion(&wq->wq_resurrect);
wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER;
wq->max_batch_size = WQ_DEFAULT_MAX_BATCH;
wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev));
if (!wq->wqcfg) {
put_device(conf_dev);
......@@ -379,13 +319,6 @@ static int idxd_setup_internals(struct idxd_device *idxd)
init_waitqueue_head(&idxd->cmd_waitq);
if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) {
idxd->int_handles = kcalloc_node(idxd->max_wqs, sizeof(int), GFP_KERNEL,
dev_to_node(dev));
if (!idxd->int_handles)
return -ENOMEM;
}
rc = idxd_setup_wqs(idxd);
if (rc < 0)
goto err_wqs;
......@@ -416,7 +349,6 @@ static int idxd_setup_internals(struct idxd_device *idxd)
for (i = 0; i < idxd->max_wqs; i++)
put_device(wq_confdev(idxd->wqs[i]));
err_wqs:
kfree(idxd->int_handles);
return rc;
}
......@@ -451,6 +383,10 @@ static void idxd_read_caps(struct idxd_device *idxd)
dev_dbg(dev, "cmd_cap: %#x\n", idxd->hw.cmd_cap);
}
/* reading command capabilities */
if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE))
idxd->request_int_handles = true;
idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift;
dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes);
idxd->max_batch_size = 1U << idxd->hw.gen_cap.max_batch_shift;
......@@ -464,9 +400,9 @@ static void idxd_read_caps(struct idxd_device *idxd)
dev_dbg(dev, "group_cap: %#llx\n", idxd->hw.group_cap.bits);
idxd->max_groups = idxd->hw.group_cap.num_groups;
dev_dbg(dev, "max groups: %u\n", idxd->max_groups);
idxd->max_tokens = idxd->hw.group_cap.total_tokens;
dev_dbg(dev, "max tokens: %u\n", idxd->max_tokens);
idxd->nr_tokens = idxd->max_tokens;
idxd->max_rdbufs = idxd->hw.group_cap.total_rdbufs;
dev_dbg(dev, "max read buffers: %u\n", idxd->max_rdbufs);
idxd->nr_rdbufs = idxd->max_rdbufs;
/* read engine capabilities */
idxd->hw.engine_cap.bits =
......@@ -611,8 +547,6 @@ static int idxd_probe(struct idxd_device *idxd)
if (rc)
goto err_config;
dev_dbg(dev, "IDXD interrupt setup complete.\n");
idxd->major = idxd_cdev_get_major(idxd);
rc = perfmon_pmu_init(idxd);
......@@ -708,32 +642,6 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return rc;
}
static void idxd_flush_pending_llist(struct idxd_irq_entry *ie)
{
struct idxd_desc *desc, *itr;
struct llist_node *head;
head = llist_del_all(&ie->pending_llist);
if (!head)
return;
llist_for_each_entry_safe(desc, itr, head, llnode) {
idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT);
idxd_free_desc(desc->wq, desc);
}
}
static void idxd_flush_work_list(struct idxd_irq_entry *ie)
{
struct idxd_desc *desc, *iter;
list_for_each_entry_safe(desc, iter, &ie->work_list, list) {
list_del(&desc->list);
idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT);
idxd_free_desc(desc->wq, desc);
}
}
void idxd_wqs_quiesce(struct idxd_device *idxd)
{
struct idxd_wq *wq;
......@@ -746,47 +654,19 @@ void idxd_wqs_quiesce(struct idxd_device *idxd)
}
}
static void idxd_release_int_handles(struct idxd_device *idxd)
{
struct device *dev = &idxd->pdev->dev;
int i, rc;
for (i = 0; i < idxd->num_wq_irqs; i++) {
if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) {
rc = idxd_device_release_int_handle(idxd, idxd->int_handles[i],
IDXD_IRQ_MSIX);
if (rc < 0)
dev_warn(dev, "irq handle %d release failed\n",
idxd->int_handles[i]);
else
dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i]);
}
}
}
static void idxd_shutdown(struct pci_dev *pdev)
{
struct idxd_device *idxd = pci_get_drvdata(pdev);
int rc, i;
struct idxd_irq_entry *irq_entry;
int msixcnt = pci_msix_vec_count(pdev);
int rc;
rc = idxd_device_disable(idxd);
if (rc)
dev_err(&pdev->dev, "Disabling device failed\n");
dev_dbg(&pdev->dev, "%s called\n", __func__);
idxd_mask_msix_vectors(idxd);
irq_entry = &idxd->ie;
synchronize_irq(irq_entry->vector);
idxd_mask_error_interrupts(idxd);
for (i = 0; i < msixcnt; i++) {
irq_entry = &idxd->irq_entries[i];
synchronize_irq(irq_entry->vector);
if (i == 0)
continue;
idxd_flush_pending_llist(irq_entry);
idxd_flush_work_list(irq_entry);
}
flush_workqueue(idxd->wq);
}
......@@ -794,8 +674,6 @@ static void idxd_remove(struct pci_dev *pdev)
{
struct idxd_device *idxd = pci_get_drvdata(pdev);
struct idxd_irq_entry *irq_entry;
int msixcnt = pci_msix_vec_count(pdev);
int i;
idxd_unregister_devices(idxd);
/*
......@@ -811,12 +689,8 @@ static void idxd_remove(struct pci_dev *pdev)
if (device_pasid_enabled(idxd))
idxd_disable_system_pasid(idxd);
for (i = 0; i < msixcnt; i++) {
irq_entry = &idxd->irq_entries[i];
free_irq(irq_entry->vector, irq_entry);
}
idxd_msix_perm_clear(idxd);
idxd_release_int_handles(idxd);
irq_entry = idxd_get_ie(idxd, 0);
free_irq(irq_entry->vector, irq_entry);
pci_free_irq_vectors(pdev);
pci_iounmap(pdev, idxd->reg_base);
iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
......
......@@ -6,6 +6,7 @@
#include <linux/pci.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/dmaengine.h>
#include <linux/delay.h>
#include <uapi/linux/idxd.h>
#include "../dmaengine.h"
#include "idxd.h"
......@@ -22,6 +23,16 @@ struct idxd_fault {
struct idxd_device *idxd;
};
struct idxd_resubmit {
struct work_struct work;
struct idxd_desc *desc;
};
struct idxd_int_handle_revoke {
struct work_struct work;
struct idxd_device *idxd;
};
static void idxd_device_reinit(struct work_struct *work)
{
struct idxd_device *idxd = container_of(work, struct idxd_device, work);
......@@ -55,6 +66,162 @@ static void idxd_device_reinit(struct work_struct *work)
idxd_device_clear_state(idxd);
}
/*
* The function sends a drain descriptor for the interrupt handle. The drain ensures
* all descriptors with this interrupt handle is flushed and the interrupt
* will allow the cleanup of the outstanding descriptors.
*/
static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie)
{
struct idxd_wq *wq = ie_to_wq(ie);
struct idxd_device *idxd = wq->idxd;
struct device *dev = &idxd->pdev->dev;
struct dsa_hw_desc desc = {};
void __iomem *portal;
int rc;
/* Issue a simple drain operation with interrupt but no completion record */
desc.flags = IDXD_OP_FLAG_RCI;
desc.opcode = DSA_OPCODE_DRAIN;
desc.priv = 1;
if (ie->pasid != INVALID_IOASID)
desc.pasid = ie->pasid;
desc.int_handle = ie->int_handle;
portal = idxd_wq_portal_addr(wq);
/*
* The wmb() makes sure that the descriptor is all there before we
* issue.
*/
wmb();
if (wq_dedicated(wq)) {
iosubmit_cmds512(portal, &desc, 1);
} else {
rc = idxd_enqcmds(wq, portal, &desc);
/* This should not fail unless hardware failed. */
if (rc < 0)
dev_warn(dev, "Failed to submit drain desc on wq %d\n", wq->id);
}
}
static void idxd_abort_invalid_int_handle_descs(struct idxd_irq_entry *ie)
{
LIST_HEAD(flist);
struct idxd_desc *d, *t;
struct llist_node *head;
spin_lock(&ie->list_lock);
head = llist_del_all(&ie->pending_llist);
if (head) {
llist_for_each_entry_safe(d, t, head, llnode)
list_add_tail(&d->list, &ie->work_list);
}
list_for_each_entry_safe(d, t, &ie->work_list, list) {
if (d->completion->status == DSA_COMP_INT_HANDLE_INVAL)
list_move_tail(&d->list, &flist);
}
spin_unlock(&ie->list_lock);
list_for_each_entry_safe(d, t, &flist, list) {
list_del(&d->list);
idxd_dma_complete_txd(d, IDXD_COMPLETE_ABORT, true);
}
}
static void idxd_int_handle_revoke(struct work_struct *work)
{
struct idxd_int_handle_revoke *revoke =
container_of(work, struct idxd_int_handle_revoke, work);
struct idxd_device *idxd = revoke->idxd;
struct pci_dev *pdev = idxd->pdev;
struct device *dev = &pdev->dev;
int i, new_handle, rc;
if (!idxd->request_int_handles) {
kfree(revoke);
dev_warn(dev, "Unexpected int handle refresh interrupt.\n");
return;
}
/*
* The loop attempts to acquire new interrupt handle for all interrupt
* vectors that supports a handle. If a new interrupt handle is acquired and the
* wq is kernel type, the driver will kill the percpu_ref to pause all
* ongoing descriptor submissions. The interrupt handle is then changed.
* After change, the percpu_ref is revived and all the pending submissions
* are woken to try again. A drain is sent to for the interrupt handle
* at the end to make sure all invalid int handle descriptors are processed.
*/
for (i = 1; i < idxd->irq_cnt; i++) {
struct idxd_irq_entry *ie = idxd_get_ie(idxd, i);
struct idxd_wq *wq = ie_to_wq(ie);
if (ie->int_handle == INVALID_INT_HANDLE)
continue;
rc = idxd_device_request_int_handle(idxd, i, &new_handle, IDXD_IRQ_MSIX);
if (rc < 0) {
dev_warn(dev, "get int handle %d failed: %d\n", i, rc);
/*
* Failed to acquire new interrupt handle. Kill the WQ
* and release all the pending submitters. The submitters will
* get error return code and handle appropriately.
*/
ie->int_handle = INVALID_INT_HANDLE;
idxd_wq_quiesce(wq);
idxd_abort_invalid_int_handle_descs(ie);
continue;
}
/* No change in interrupt handle, nothing needs to be done */
if (ie->int_handle == new_handle)
continue;
if (wq->state != IDXD_WQ_ENABLED || wq->type != IDXD_WQT_KERNEL) {
/*
* All the MSIX interrupts are allocated at once during probe.
* Therefore we need to update all interrupts even if the WQ
* isn't supporting interrupt operations.
*/
ie->int_handle = new_handle;
continue;
}
mutex_lock(&wq->wq_lock);
reinit_completion(&wq->wq_resurrect);
/* Kill percpu_ref to pause additional descriptor submissions */
percpu_ref_kill(&wq->wq_active);
/* Wait for all submitters quiesce before we change interrupt handle */
wait_for_completion(&wq->wq_dead);
ie->int_handle = new_handle;
/* Revive percpu ref and wake up all the waiting submitters */
percpu_ref_reinit(&wq->wq_active);
complete_all(&wq->wq_resurrect);
mutex_unlock(&wq->wq_lock);
/*
* The delay here is to wait for all possible MOVDIR64B that
* are issued before percpu_ref_kill() has happened to have
* reached the PCIe domain before the drain is issued. The driver
* needs to ensure that the drain descriptor issued does not pass
* all the other issued descriptors that contain the invalid
* interrupt handle in order to ensure that the drain descriptor
* interrupt will allow the cleanup of all the descriptors with
* invalid interrupt handle.
*/
if (wq_dedicated(wq))
udelay(100);
idxd_int_handle_revoke_drain(ie);
}
kfree(revoke);
}
static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
{
struct device *dev = &idxd->pdev->dev;
......@@ -101,6 +268,23 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
err = true;
}
if (cause & IDXD_INTC_INT_HANDLE_REVOKED) {
struct idxd_int_handle_revoke *revoke;
val |= IDXD_INTC_INT_HANDLE_REVOKED;
revoke = kzalloc(sizeof(*revoke), GFP_ATOMIC);
if (revoke) {
revoke->idxd = idxd;
INIT_WORK(&revoke->work, idxd_int_handle_revoke);
queue_work(idxd->wq, &revoke->work);
} else {
dev_err(dev, "Failed to allocate work for int handle revoke\n");
idxd_wqs_quiesce(idxd);
}
}
if (cause & IDXD_INTC_CMD) {
val |= IDXD_INTC_CMD;
complete(idxd->cmd_done);
......@@ -157,7 +341,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
irqreturn_t idxd_misc_thread(int vec, void *data)
{
struct idxd_irq_entry *irq_entry = data;
struct idxd_device *idxd = irq_entry->idxd;
struct idxd_device *idxd = ie_to_idxd(irq_entry);
int rc;
u32 cause;
......@@ -177,6 +361,51 @@ irqreturn_t idxd_misc_thread(int vec, void *data)
return IRQ_HANDLED;
}
static void idxd_int_handle_resubmit_work(struct work_struct *work)
{
struct idxd_resubmit *irw = container_of(work, struct idxd_resubmit, work);
struct idxd_desc *desc = irw->desc;
struct idxd_wq *wq = desc->wq;
int rc;
desc->completion->status = 0;
rc = idxd_submit_desc(wq, desc);
if (rc < 0) {
dev_dbg(&wq->idxd->pdev->dev, "Failed to resubmit desc %d to wq %d.\n",
desc->id, wq->id);
/*
* If the error is not -EAGAIN, it means the submission failed due to wq
* has been killed instead of ENQCMDS failure. Here the driver needs to
* notify the submitter of the failure by reporting abort status.
*
* -EAGAIN comes from ENQCMDS failure. idxd_submit_desc() will handle the
* abort.
*/
if (rc != -EAGAIN) {
desc->completion->status = IDXD_COMP_DESC_ABORT;
idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, false);
}
idxd_free_desc(wq, desc);
}
kfree(irw);
}
bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc)
{
struct idxd_wq *wq = desc->wq;
struct idxd_device *idxd = wq->idxd;
struct idxd_resubmit *irw;
irw = kzalloc(sizeof(*irw), GFP_KERNEL);
if (!irw)
return false;
irw->desc = desc;
INIT_WORK(&irw->work, idxd_int_handle_resubmit_work);
queue_work(idxd->wq, &irw->work);
return true;
}
static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry)
{
struct idxd_desc *desc, *t;
......@@ -195,11 +424,11 @@ static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry)
* and 0xff, which DSA_COMP_STATUS_MASK can mask out.
*/
if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) {
complete_desc(desc, IDXD_COMPLETE_ABORT);
idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true);
continue;
}
complete_desc(desc, IDXD_COMPLETE_NORMAL);
idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true);
} else {
spin_lock(&irq_entry->list_lock);
list_add_tail(&desc->list,
......@@ -238,11 +467,11 @@ static void irq_process_work_list(struct idxd_irq_entry *irq_entry)
* and 0xff, which DSA_COMP_STATUS_MASK can mask out.
*/
if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) {
complete_desc(desc, IDXD_COMPLETE_ABORT);
idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true);
continue;
}
complete_desc(desc, IDXD_COMPLETE_NORMAL);
idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true);
}
}
......
......@@ -64,9 +64,9 @@ union wq_cap_reg {
union group_cap_reg {
struct {
u64 num_groups:8;
u64 total_tokens:8;
u64 token_en:1;
u64 token_limit:1;
u64 total_rdbufs:8; /* formerly total_tokens */
u64 rdbuf_ctrl:1; /* formerly token_en */
u64 rdbuf_limit:1; /* formerly token_limit */
u64 rsvd:46;
};
u64 bits;
......@@ -110,7 +110,7 @@ union offsets_reg {
#define IDXD_GENCFG_OFFSET 0x80
union gencfg_reg {
struct {
u32 token_limit:8;
u32 rdbuf_limit:8;
u32 rsvd:4;
u32 user_int_en:1;
u32 rsvd2:19;
......@@ -158,6 +158,7 @@ enum idxd_device_reset_type {
#define IDXD_INTC_OCCUPY 0x04
#define IDXD_INTC_PERFMON_OVFL 0x08
#define IDXD_INTC_HALT_STATE 0x10
#define IDXD_INTC_INT_HANDLE_REVOKED 0x80000000
#define IDXD_CMD_OFFSET 0xa0
union idxd_command_reg {
......@@ -287,10 +288,10 @@ union group_flags {
u32 tc_a:3;
u32 tc_b:3;
u32 rsvd:1;
u32 use_token_limit:1;
u32 tokens_reserved:8;
u32 use_rdbuf_limit:1;
u32 rdbufs_reserved:8;
u32 rsvd2:4;
u32 tokens_allowed:8;
u32 rdbufs_allowed:8;
u32 rsvd3:4;
};
u32 bits;
......
......@@ -21,15 +21,6 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
if (device_pasid_enabled(idxd))
desc->hw->pasid = idxd->pasid;
/*
* On host, MSIX vecotr 0 is used for misc interrupt. Therefore when we match
* vector 1:1 to the WQ id, we need to add 1
*/
if (!idxd->int_handles)
desc->hw->int_handle = wq->id + 1;
else
desc->hw->int_handle = idxd->int_handles[wq->id];
return desc;
}
......@@ -134,35 +125,58 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
spin_unlock(&ie->list_lock);
if (found)
complete_desc(found, IDXD_COMPLETE_ABORT);
idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false);
/*
* complete_desc() will return desc to allocator and the desc can be
* acquired by a different process and the desc->list can be modified.
* Delete desc from list so the list trasversing does not get corrupted
* by the other process.
* completing the descriptor will return desc to allocator and
* the desc can be acquired by a different process and the
* desc->list can be modified. Delete desc from list so the
* list trasversing does not get corrupted by the other process.
*/
list_for_each_entry_safe(d, t, &flist, list) {
list_del_init(&d->list);
complete_desc(d, IDXD_COMPLETE_NORMAL);
idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true);
}
}
/*
* ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver
* has better control of number of descriptors being submitted to a shared wq by limiting
* the number of driver allocated descriptors to the wq size. However, when the swq is
* exported to a guest kernel, it may be shared with multiple guest kernels. This means
* the likelihood of getting busy returned on the swq when submitting goes significantly up.
* Having a tunable retry mechanism allows the driver to keep trying for a bit before giving
* up. The sysfs knob can be tuned by the system administrator.
*/
int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc)
{
int rc, retries = 0;
do {
rc = enqcmds(portal, desc);
if (rc == 0)
break;
cpu_relax();
} while (retries++ < wq->enqcmds_retries);
return rc;
}
int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
{
struct idxd_device *idxd = wq->idxd;
struct idxd_irq_entry *ie = NULL;
u32 desc_flags = desc->hw->flags;
void __iomem *portal;
int rc;
if (idxd->state != IDXD_DEV_ENABLED) {
idxd_free_desc(wq, desc);
if (idxd->state != IDXD_DEV_ENABLED)
return -EIO;
}
if (!percpu_ref_tryget_live(&wq->wq_active)) {
idxd_free_desc(wq, desc);
return -ENXIO;
wait_for_completion(&wq->wq_resurrect);
if (!percpu_ref_tryget_live(&wq->wq_active))
return -ENXIO;
}
portal = idxd_wq_portal_addr(wq);
......@@ -178,28 +192,21 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
* Pending the descriptor to the lockless list for the irq_entry
* that we designated the descriptor to.
*/
if (desc->hw->flags & IDXD_OP_FLAG_RCI) {
ie = &idxd->irq_entries[wq->id + 1];
if (desc_flags & IDXD_OP_FLAG_RCI) {
ie = &wq->ie;
desc->hw->int_handle = ie->int_handle;
llist_add(&desc->llnode, &ie->pending_llist);
}
if (wq_dedicated(wq)) {
iosubmit_cmds512(portal, desc->hw, 1);
} else {
/*
* It's not likely that we would receive queue full rejection
* since the descriptor allocation gates at wq size. If we
* receive a -EAGAIN, that means something went wrong such as the
* device is not accepting descriptor at all.
*/
rc = enqcmds(portal, desc->hw);
rc = idxd_enqcmds(wq, portal, desc->hw);
if (rc < 0) {
percpu_ref_put(&wq->wq_active);
/* abort operation frees the descriptor */
if (ie)
llist_abort_desc(wq, ie, desc);
else
idxd_free_desc(wq, desc);
return rc;
}
}
......
This diff is collapsed.
......@@ -158,8 +158,9 @@ static struct attribute *ioat_attrs[] = {
&intr_coalesce_attr.attr,
NULL,
};
ATTRIBUTE_GROUPS(ioat);
struct kobj_type ioat_ktype = {
.sysfs_ops = &ioat_sysfs_ops,
.default_attrs = ioat_attrs,
.default_groups = ioat_groups,
};
......@@ -835,7 +835,7 @@ static int pch_dma_probe(struct pci_dev *pdev,
goto err_disable_pdev;
}
err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "Cannot set proper DMA config\n");
goto err_free_res;
......
......@@ -3240,7 +3240,6 @@ static int ppc440spe_adma_dma2rxor_prep_src(
struct ppc440spe_rxor *cursor, int index,
int src_cnt, u32 addr)
{
int rval = 0;
u32 sign;
struct ppc440spe_adma_desc_slot *desc = hdesc;
int i;
......@@ -3348,7 +3347,7 @@ static int ppc440spe_adma_dma2rxor_prep_src(
break;
}
return rval;
return 0;
}
/**
......
......@@ -2206,10 +2206,8 @@ static int gpi_probe(struct platform_device *pdev)
/* set up irq */
ret = platform_get_irq(pdev, i);
if (ret < 0) {
dev_err(gpi_dev->dev, "platform_get_irq failed for %d:%d\n", i, ret);
if (ret < 0)
return ret;
}
gpii->irq = ret;
/* set up channel specific register info */
......
......@@ -236,7 +236,7 @@ struct rcar_dmac_of_data {
#define RCAR_DMAOR_PRI_ROUND_ROBIN (3 << 8)
#define RCAR_DMAOR_AE (1 << 2)
#define RCAR_DMAOR_DME (1 << 0)
#define RCAR_DMACHCLR 0x0080 /* Not on R-Car V3U */
#define RCAR_DMACHCLR 0x0080 /* Not on R-Car Gen4 */
#define RCAR_DMADPSEC 0x00a0
#define RCAR_DMASAR 0x0000
......@@ -299,8 +299,8 @@ struct rcar_dmac_of_data {
#define RCAR_DMAFIXDAR 0x0014
#define RCAR_DMAFIXDPBASE 0x0060
/* For R-Car V3U */
#define RCAR_V3U_DMACHCLR 0x0100
/* For R-Car Gen4 */
#define RCAR_GEN4_DMACHCLR 0x0100
/* Hardcode the MEMCPY transfer size to 4 bytes. */
#define RCAR_DMAC_MEMCPY_XFER_SIZE 4
......@@ -345,7 +345,7 @@ static void rcar_dmac_chan_clear(struct rcar_dmac *dmac,
struct rcar_dmac_chan *chan)
{
if (dmac->chan_base)
rcar_dmac_chan_write(chan, RCAR_V3U_DMACHCLR, 1);
rcar_dmac_chan_write(chan, RCAR_GEN4_DMACHCLR, 1);
else
rcar_dmac_write(dmac, RCAR_DMACHCLR, BIT(chan->index));
}
......@@ -357,7 +357,7 @@ static void rcar_dmac_chan_clear_all(struct rcar_dmac *dmac)
if (dmac->chan_base) {
for_each_rcar_dmac_chan(i, dmac, chan)
rcar_dmac_chan_write(chan, RCAR_V3U_DMACHCLR, 1);
rcar_dmac_chan_write(chan, RCAR_GEN4_DMACHCLR, 1);
} else {
rcar_dmac_write(dmac, RCAR_DMACHCLR, dmac->channels_mask);
}
......@@ -2009,7 +2009,7 @@ static const struct rcar_dmac_of_data rcar_dmac_data = {
.chan_offset_stride = 0x80,
};
static const struct rcar_dmac_of_data rcar_v3u_dmac_data = {
static const struct rcar_dmac_of_data rcar_gen4_dmac_data = {
.chan_offset_base = 0x0,
.chan_offset_stride = 0x1000,
};
......@@ -2018,9 +2018,12 @@ static const struct of_device_id rcar_dmac_of_ids[] = {
{
.compatible = "renesas,rcar-dmac",
.data = &rcar_dmac_data,
}, {
.compatible = "renesas,rcar-gen4-dmac",
.data = &rcar_gen4_dmac_data,
}, {
.compatible = "renesas,dmac-r8a779a0",
.data = &rcar_v3u_dmac_data,
.data = &rcar_gen4_dmac_data,
},
{ /* Sentinel */ }
};
......
......@@ -1034,9 +1034,7 @@ EXPORT_SYMBOL(shdma_cleanup);
static int __init shdma_enter(void)
{
shdma_slave_used = kcalloc(DIV_ROUND_UP(slave_num, BITS_PER_LONG),
sizeof(long),
GFP_KERNEL);
shdma_slave_used = bitmap_zalloc(slave_num, GFP_KERNEL);
if (!shdma_slave_used)
return -ENOMEM;
return 0;
......@@ -1045,7 +1043,7 @@ module_init(shdma_enter);
static void __exit shdma_exit(void)
{
kfree(shdma_slave_used);
bitmap_free(shdma_slave_used);
}
module_exit(shdma_exit);
......
......@@ -10,6 +10,7 @@
* Inspired by stm32-dma.c and dma-jz4780.c
*/
#include <linux/bitfield.h>
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/dmaengine.h>
......@@ -32,13 +33,6 @@
#include "virt-dma.h"
/* MDMA Generic getter/setter */
#define STM32_MDMA_SHIFT(n) (ffs(n) - 1)
#define STM32_MDMA_SET(n, mask) (((n) << STM32_MDMA_SHIFT(mask)) & \
(mask))
#define STM32_MDMA_GET(n, mask) (((n) & (mask)) >> \
STM32_MDMA_SHIFT(mask))
#define STM32_MDMA_GISR0 0x0000 /* MDMA Int Status Reg 1 */
#define STM32_MDMA_GISR1 0x0004 /* MDMA Int Status Reg 2 */
......@@ -80,8 +74,7 @@
#define STM32_MDMA_CCR_HEX BIT(13)
#define STM32_MDMA_CCR_BEX BIT(12)
#define STM32_MDMA_CCR_PL_MASK GENMASK(7, 6)
#define STM32_MDMA_CCR_PL(n) STM32_MDMA_SET(n, \
STM32_MDMA_CCR_PL_MASK)
#define STM32_MDMA_CCR_PL(n) FIELD_PREP(STM32_MDMA_CCR_PL_MASK, (n))
#define STM32_MDMA_CCR_TCIE BIT(5)
#define STM32_MDMA_CCR_BTIE BIT(4)
#define STM32_MDMA_CCR_BRTIE BIT(3)
......@@ -99,48 +92,33 @@
#define STM32_MDMA_CTCR_BWM BIT(31)
#define STM32_MDMA_CTCR_SWRM BIT(30)
#define STM32_MDMA_CTCR_TRGM_MSK GENMASK(29, 28)
#define STM32_MDMA_CTCR_TRGM(n) STM32_MDMA_SET((n), \
STM32_MDMA_CTCR_TRGM_MSK)
#define STM32_MDMA_CTCR_TRGM_GET(n) STM32_MDMA_GET((n), \
STM32_MDMA_CTCR_TRGM_MSK)
#define STM32_MDMA_CTCR_TRGM(n) FIELD_PREP(STM32_MDMA_CTCR_TRGM_MSK, (n))
#define STM32_MDMA_CTCR_TRGM_GET(n) FIELD_GET(STM32_MDMA_CTCR_TRGM_MSK, (n))
#define STM32_MDMA_CTCR_PAM_MASK GENMASK(27, 26)
#define STM32_MDMA_CTCR_PAM(n) STM32_MDMA_SET(n, \
STM32_MDMA_CTCR_PAM_MASK)
#define STM32_MDMA_CTCR_PAM(n) FIELD_PREP(STM32_MDMA_CTCR_PAM_MASK, (n))
#define STM32_MDMA_CTCR_PKE BIT(25)
#define STM32_MDMA_CTCR_TLEN_MSK GENMASK(24, 18)
#define STM32_MDMA_CTCR_TLEN(n) STM32_MDMA_SET((n), \
STM32_MDMA_CTCR_TLEN_MSK)
#define STM32_MDMA_CTCR_TLEN_GET(n) STM32_MDMA_GET((n), \
STM32_MDMA_CTCR_TLEN_MSK)
#define STM32_MDMA_CTCR_TLEN(n) FIELD_PREP(STM32_MDMA_CTCR_TLEN_MSK, (n))
#define STM32_MDMA_CTCR_TLEN_GET(n) FIELD_GET(STM32_MDMA_CTCR_TLEN_MSK, (n))
#define STM32_MDMA_CTCR_LEN2_MSK GENMASK(25, 18)
#define STM32_MDMA_CTCR_LEN2(n) STM32_MDMA_SET((n), \
STM32_MDMA_CTCR_LEN2_MSK)
#define STM32_MDMA_CTCR_LEN2_GET(n) STM32_MDMA_GET((n), \
STM32_MDMA_CTCR_LEN2_MSK)
#define STM32_MDMA_CTCR_LEN2(n) FIELD_PREP(STM32_MDMA_CTCR_LEN2_MSK, (n))
#define STM32_MDMA_CTCR_LEN2_GET(n) FIELD_GET(STM32_MDMA_CTCR_LEN2_MSK, (n))
#define STM32_MDMA_CTCR_DBURST_MASK GENMASK(17, 15)
#define STM32_MDMA_CTCR_DBURST(n) STM32_MDMA_SET(n, \
STM32_MDMA_CTCR_DBURST_MASK)
#define STM32_MDMA_CTCR_DBURST(n) FIELD_PREP(STM32_MDMA_CTCR_DBURST_MASK, (n))
#define STM32_MDMA_CTCR_SBURST_MASK GENMASK(14, 12)
#define STM32_MDMA_CTCR_SBURST(n) STM32_MDMA_SET(n, \
STM32_MDMA_CTCR_SBURST_MASK)
#define STM32_MDMA_CTCR_SBURST(n) FIELD_PREP(STM32_MDMA_CTCR_SBURST_MASK, (n))
#define STM32_MDMA_CTCR_DINCOS_MASK GENMASK(11, 10)
#define STM32_MDMA_CTCR_DINCOS(n) STM32_MDMA_SET((n), \
STM32_MDMA_CTCR_DINCOS_MASK)
#define STM32_MDMA_CTCR_DINCOS(n) FIELD_PREP(STM32_MDMA_CTCR_DINCOS_MASK, (n))
#define STM32_MDMA_CTCR_SINCOS_MASK GENMASK(9, 8)
#define STM32_MDMA_CTCR_SINCOS(n) STM32_MDMA_SET((n), \
STM32_MDMA_CTCR_SINCOS_MASK)
#define STM32_MDMA_CTCR_SINCOS(n) FIELD_PREP(STM32_MDMA_CTCR_SINCOS_MASK, (n))
#define STM32_MDMA_CTCR_DSIZE_MASK GENMASK(7, 6)
#define STM32_MDMA_CTCR_DSIZE(n) STM32_MDMA_SET(n, \
STM32_MDMA_CTCR_DSIZE_MASK)
#define STM32_MDMA_CTCR_DSIZE(n) FIELD_PREP(STM32_MDMA_CTCR_DSIZE_MASK, (n))
#define STM32_MDMA_CTCR_SSIZE_MASK GENMASK(5, 4)
#define STM32_MDMA_CTCR_SSIZE(n) STM32_MDMA_SET(n, \
STM32_MDMA_CTCR_SSIZE_MASK)
#define STM32_MDMA_CTCR_SSIZE(n) FIELD_PREP(STM32_MDMA_CTCR_SSIZE_MASK, (n))
#define STM32_MDMA_CTCR_DINC_MASK GENMASK(3, 2)
#define STM32_MDMA_CTCR_DINC(n) STM32_MDMA_SET((n), \
STM32_MDMA_CTCR_DINC_MASK)
#define STM32_MDMA_CTCR_DINC(n) FIELD_PREP(STM32_MDMA_CTCR_DINC_MASK, (n))
#define STM32_MDMA_CTCR_SINC_MASK GENMASK(1, 0)
#define STM32_MDMA_CTCR_SINC(n) STM32_MDMA_SET((n), \
STM32_MDMA_CTCR_SINC_MASK)
#define STM32_MDMA_CTCR_SINC(n) FIELD_PREP(STM32_MDMA_CTCR_SINC_MASK, (n))
#define STM32_MDMA_CTCR_CFG_MASK (STM32_MDMA_CTCR_SINC_MASK \
| STM32_MDMA_CTCR_DINC_MASK \
| STM32_MDMA_CTCR_SINCOS_MASK \
......@@ -151,16 +129,13 @@
/* MDMA Channel x block number of data register */
#define STM32_MDMA_CBNDTR(x) (0x54 + 0x40 * (x))
#define STM32_MDMA_CBNDTR_BRC_MK GENMASK(31, 20)
#define STM32_MDMA_CBNDTR_BRC(n) STM32_MDMA_SET(n, \
STM32_MDMA_CBNDTR_BRC_MK)
#define STM32_MDMA_CBNDTR_BRC_GET(n) STM32_MDMA_GET((n), \
STM32_MDMA_CBNDTR_BRC_MK)
#define STM32_MDMA_CBNDTR_BRC(n) FIELD_PREP(STM32_MDMA_CBNDTR_BRC_MK, (n))
#define STM32_MDMA_CBNDTR_BRC_GET(n) FIELD_GET(STM32_MDMA_CBNDTR_BRC_MK, (n))
#define STM32_MDMA_CBNDTR_BRDUM BIT(19)
#define STM32_MDMA_CBNDTR_BRSUM BIT(18)
#define STM32_MDMA_CBNDTR_BNDT_MASK GENMASK(16, 0)
#define STM32_MDMA_CBNDTR_BNDT(n) STM32_MDMA_SET(n, \
STM32_MDMA_CBNDTR_BNDT_MASK)
#define STM32_MDMA_CBNDTR_BNDT(n) FIELD_PREP(STM32_MDMA_CBNDTR_BNDT_MASK, (n))
/* MDMA Channel x source address register */
#define STM32_MDMA_CSAR(x) (0x58 + 0x40 * (x))
......@@ -171,11 +146,9 @@
/* MDMA Channel x block repeat address update register */
#define STM32_MDMA_CBRUR(x) (0x60 + 0x40 * (x))
#define STM32_MDMA_CBRUR_DUV_MASK GENMASK(31, 16)
#define STM32_MDMA_CBRUR_DUV(n) STM32_MDMA_SET(n, \
STM32_MDMA_CBRUR_DUV_MASK)
#define STM32_MDMA_CBRUR_DUV(n) FIELD_PREP(STM32_MDMA_CBRUR_DUV_MASK, (n))
#define STM32_MDMA_CBRUR_SUV_MASK GENMASK(15, 0)
#define STM32_MDMA_CBRUR_SUV(n) STM32_MDMA_SET(n, \
STM32_MDMA_CBRUR_SUV_MASK)
#define STM32_MDMA_CBRUR_SUV(n) FIELD_PREP(STM32_MDMA_CBRUR_SUV_MASK, (n))
/* MDMA Channel x link address register */
#define STM32_MDMA_CLAR(x) (0x64 + 0x40 * (x))
......@@ -184,9 +157,8 @@
#define STM32_MDMA_CTBR(x) (0x68 + 0x40 * (x))
#define STM32_MDMA_CTBR_DBUS BIT(17)
#define STM32_MDMA_CTBR_SBUS BIT(16)
#define STM32_MDMA_CTBR_TSEL_MASK GENMASK(7, 0)
#define STM32_MDMA_CTBR_TSEL(n) STM32_MDMA_SET(n, \
STM32_MDMA_CTBR_TSEL_MASK)
#define STM32_MDMA_CTBR_TSEL_MASK GENMASK(5, 0)
#define STM32_MDMA_CTBR_TSEL(n) FIELD_PREP(STM32_MDMA_CTBR_TSEL_MASK, (n))
/* MDMA Channel x mask address register */
#define STM32_MDMA_CMAR(x) (0x70 + 0x40 * (x))
......@@ -1279,7 +1251,7 @@ static size_t stm32_mdma_desc_residue(struct stm32_mdma_chan *chan,
u32 curr_hwdesc)
{
struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
struct stm32_mdma_hwdesc *hwdesc = desc->node[0].hwdesc;
struct stm32_mdma_hwdesc *hwdesc;
u32 cbndtr, residue, modulo, burst_size;
int i;
......
......@@ -8,5 +8,6 @@ obj-$(CONFIG_TI_K3_PSIL) += k3-psil.o \
k3-psil-am654.o \
k3-psil-j721e.o \
k3-psil-j7200.o \
k3-psil-am64.o
k3-psil-am64.o \
k3-psil-j721s2.o
obj-$(CONFIG_TI_DMA_CROSSBAR) += dma-crossbar.o
......@@ -1681,8 +1681,7 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data)
dev_dbg(ecc->dev, "EMR%d 0x%08x\n", j, val);
emr = val;
for (i = find_next_bit(&emr, 32, 0); i < 32;
i = find_next_bit(&emr, 32, i + 1)) {
for_each_set_bit(i, &emr, 32) {
int k = (j << 5) + i;
/* Clear the corresponding EMR bits */
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2021 Texas Instruments Incorporated - https://www.ti.com
*/
#include <linux/kernel.h>
#include "k3-psil-priv.h"
#define PSIL_PDMA_XY_TR(x) \
{ \
.thread_id = x, \
.ep_config = { \
.ep_type = PSIL_EP_PDMA_XY, \
}, \
}
#define PSIL_PDMA_XY_PKT(x) \
{ \
.thread_id = x, \
.ep_config = { \
.ep_type = PSIL_EP_PDMA_XY, \
.pkt_mode = 1, \
}, \
}
#define PSIL_PDMA_MCASP(x) \
{ \
.thread_id = x, \
.ep_config = { \
.ep_type = PSIL_EP_PDMA_XY, \
.pdma_acc32 = 1, \
.pdma_burst = 1, \
}, \
}
#define PSIL_ETHERNET(x) \
{ \
.thread_id = x, \
.ep_config = { \
.ep_type = PSIL_EP_NATIVE, \
.pkt_mode = 1, \
.needs_epib = 1, \
.psd_size = 16, \
}, \
}
#define PSIL_SA2UL(x, tx) \
{ \
.thread_id = x, \
.ep_config = { \
.ep_type = PSIL_EP_NATIVE, \
.pkt_mode = 1, \
.needs_epib = 1, \
.psd_size = 64, \
.notdpkt = tx, \
}, \
}
/* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */
static struct psil_ep j721s2_src_ep_map[] = {
/* PDMA_MCASP - McASP0-4 */
PSIL_PDMA_MCASP(0x4400),
PSIL_PDMA_MCASP(0x4401),
PSIL_PDMA_MCASP(0x4402),
PSIL_PDMA_MCASP(0x4403),
PSIL_PDMA_MCASP(0x4404),
/* PDMA_SPI_G0 - SPI0-3 */
PSIL_PDMA_XY_PKT(0x4600),
PSIL_PDMA_XY_PKT(0x4601),
PSIL_PDMA_XY_PKT(0x4602),
PSIL_PDMA_XY_PKT(0x4603),
PSIL_PDMA_XY_PKT(0x4604),
PSIL_PDMA_XY_PKT(0x4605),
PSIL_PDMA_XY_PKT(0x4606),
PSIL_PDMA_XY_PKT(0x4607),
PSIL_PDMA_XY_PKT(0x4608),
PSIL_PDMA_XY_PKT(0x4609),
PSIL_PDMA_XY_PKT(0x460a),
PSIL_PDMA_XY_PKT(0x460b),
PSIL_PDMA_XY_PKT(0x460c),
PSIL_PDMA_XY_PKT(0x460d),
PSIL_PDMA_XY_PKT(0x460e),
PSIL_PDMA_XY_PKT(0x460f),
/* PDMA_SPI_G1 - SPI4-7 */
PSIL_PDMA_XY_PKT(0x4610),
PSIL_PDMA_XY_PKT(0x4611),
PSIL_PDMA_XY_PKT(0x4612),
PSIL_PDMA_XY_PKT(0x4613),
PSIL_PDMA_XY_PKT(0x4614),
PSIL_PDMA_XY_PKT(0x4615),
PSIL_PDMA_XY_PKT(0x4616),
PSIL_PDMA_XY_PKT(0x4617),
PSIL_PDMA_XY_PKT(0x4618),
PSIL_PDMA_XY_PKT(0x4619),
PSIL_PDMA_XY_PKT(0x461a),
PSIL_PDMA_XY_PKT(0x461b),
PSIL_PDMA_XY_PKT(0x461c),
PSIL_PDMA_XY_PKT(0x461d),
PSIL_PDMA_XY_PKT(0x461e),
PSIL_PDMA_XY_PKT(0x461f),
/* PDMA_USART_G0 - UART0-1 */
PSIL_PDMA_XY_PKT(0x4700),
PSIL_PDMA_XY_PKT(0x4701),
/* PDMA_USART_G1 - UART2-3 */
PSIL_PDMA_XY_PKT(0x4702),
PSIL_PDMA_XY_PKT(0x4703),
/* PDMA_USART_G2 - UART4-9 */
PSIL_PDMA_XY_PKT(0x4704),
PSIL_PDMA_XY_PKT(0x4705),
PSIL_PDMA_XY_PKT(0x4706),
PSIL_PDMA_XY_PKT(0x4707),
PSIL_PDMA_XY_PKT(0x4708),
PSIL_PDMA_XY_PKT(0x4709),
/* CPSW0 */
PSIL_ETHERNET(0x7000),
/* MCU_PDMA0 (MCU_PDMA_MISC_G0) - SPI0 */
PSIL_PDMA_XY_PKT(0x7100),
PSIL_PDMA_XY_PKT(0x7101),
PSIL_PDMA_XY_PKT(0x7102),
PSIL_PDMA_XY_PKT(0x7103),
/* MCU_PDMA1 (MCU_PDMA_MISC_G1) - SPI1-2 */
PSIL_PDMA_XY_PKT(0x7200),
PSIL_PDMA_XY_PKT(0x7201),
PSIL_PDMA_XY_PKT(0x7202),
PSIL_PDMA_XY_PKT(0x7203),
PSIL_PDMA_XY_PKT(0x7204),
PSIL_PDMA_XY_PKT(0x7205),
PSIL_PDMA_XY_PKT(0x7206),
PSIL_PDMA_XY_PKT(0x7207),
/* MCU_PDMA2 (MCU_PDMA_MISC_G2) - UART0 */
PSIL_PDMA_XY_PKT(0x7300),
/* MCU_PDMA_ADC - ADC0-1 */
PSIL_PDMA_XY_TR(0x7400),
PSIL_PDMA_XY_TR(0x7401),
PSIL_PDMA_XY_TR(0x7402),
PSIL_PDMA_XY_TR(0x7403),
/* SA2UL */
PSIL_SA2UL(0x7500, 0),
PSIL_SA2UL(0x7501, 0),
PSIL_SA2UL(0x7502, 0),
PSIL_SA2UL(0x7503, 0),
};
/* PSI-L destination thread IDs, used for TX (DMA_MEM_TO_DEV) */
static struct psil_ep j721s2_dst_ep_map[] = {
/* CPSW0 */
PSIL_ETHERNET(0xf000),
PSIL_ETHERNET(0xf001),
PSIL_ETHERNET(0xf002),
PSIL_ETHERNET(0xf003),
PSIL_ETHERNET(0xf004),
PSIL_ETHERNET(0xf005),
PSIL_ETHERNET(0xf006),
PSIL_ETHERNET(0xf007),
/* SA2UL */
PSIL_SA2UL(0xf500, 1),
PSIL_SA2UL(0xf501, 1),
};
struct psil_ep_map j721s2_ep_map = {
.name = "j721s2",
.src = j721s2_src_ep_map,
.src_count = ARRAY_SIZE(j721s2_src_ep_map),
.dst = j721s2_dst_ep_map,
.dst_count = ARRAY_SIZE(j721s2_dst_ep_map),
};
......@@ -41,5 +41,6 @@ extern struct psil_ep_map am654_ep_map;
extern struct psil_ep_map j721e_ep_map;
extern struct psil_ep_map j7200_ep_map;
extern struct psil_ep_map am64_ep_map;
extern struct psil_ep_map j721s2_ep_map;
#endif /* K3_PSIL_PRIV_H_ */
......@@ -21,6 +21,7 @@ static const struct soc_device_attribute k3_soc_devices[] = {
{ .family = "J721E", .data = &j721e_ep_map },
{ .family = "J7200", .data = &j7200_ep_map },
{ .family = "AM64X", .data = &am64_ep_map },
{ .family = "J721S2", .data = &j721s2_ep_map },
{ /* sentinel */ }
};
......
......@@ -4374,6 +4374,7 @@ static const struct soc_device_attribute k3_soc_devices[] = {
{ .family = "J721E", .data = &j721e_soc_data },
{ .family = "J7200", .data = &j7200_soc_data },
{ .family = "AM64X", .data = &am64_soc_data },
{ .family = "J721S2", .data = &j721e_soc_data},
{ /* sentinel */ }
};
......
......@@ -131,8 +131,9 @@ uniphier_xdmac_next_desc(struct uniphier_xdmac_chan *xc)
static void uniphier_xdmac_chan_start(struct uniphier_xdmac_chan *xc,
struct uniphier_xdmac_desc *xd)
{
u32 src_mode, src_addr, src_width;
u32 dst_mode, dst_addr, dst_width;
u32 src_mode, src_width;
u32 dst_mode, dst_width;
dma_addr_t src_addr, dst_addr;
u32 val, its, tnum;
enum dma_slave_buswidth buswidth;
......
......@@ -2127,6 +2127,126 @@ xilinx_cdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst,
return NULL;
}
/**
* xilinx_cdma_prep_memcpy_sg - prepare descriptors for a memcpy_sg transaction
* @dchan: DMA channel
* @dst_sg: Destination scatter list
* @dst_sg_len: Number of entries in destination scatter list
* @src_sg: Source scatter list
* @src_sg_len: Number of entries in source scatter list
* @flags: transfer ack flags
*
* Return: Async transaction descriptor on success and NULL on failure
*/
static struct dma_async_tx_descriptor *xilinx_cdma_prep_memcpy_sg(
struct dma_chan *dchan, struct scatterlist *dst_sg,
unsigned int dst_sg_len, struct scatterlist *src_sg,
unsigned int src_sg_len, unsigned long flags)
{
struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
struct xilinx_dma_tx_descriptor *desc;
struct xilinx_cdma_tx_segment *segment, *prev = NULL;
struct xilinx_cdma_desc_hw *hw;
size_t len, dst_avail, src_avail;
dma_addr_t dma_dst, dma_src;
if (unlikely(dst_sg_len == 0 || src_sg_len == 0))
return NULL;
if (unlikely(!dst_sg || !src_sg))
return NULL;
desc = xilinx_dma_alloc_tx_descriptor(chan);
if (!desc)
return NULL;
dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
desc->async_tx.tx_submit = xilinx_dma_tx_submit;
dst_avail = sg_dma_len(dst_sg);
src_avail = sg_dma_len(src_sg);
/*
* loop until there is either no more source or no more destination
* scatterlist entry
*/
while (true) {
len = min_t(size_t, src_avail, dst_avail);
len = min_t(size_t, len, chan->xdev->max_buffer_len);
if (len == 0)
goto fetch;
/* Allocate the link descriptor from DMA pool */
segment = xilinx_cdma_alloc_tx_segment(chan);
if (!segment)
goto error;
dma_dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) -
dst_avail;
dma_src = sg_dma_address(src_sg) + sg_dma_len(src_sg) -
src_avail;
hw = &segment->hw;
hw->control = len;
hw->src_addr = dma_src;
hw->dest_addr = dma_dst;
if (chan->ext_addr) {
hw->src_addr_msb = upper_32_bits(dma_src);
hw->dest_addr_msb = upper_32_bits(dma_dst);
}
if (prev) {
prev->hw.next_desc = segment->phys;
if (chan->ext_addr)
prev->hw.next_desc_msb =
upper_32_bits(segment->phys);
}
prev = segment;
dst_avail -= len;
src_avail -= len;
list_add_tail(&segment->node, &desc->segments);
fetch:
/* Fetch the next dst scatterlist entry */
if (dst_avail == 0) {
if (dst_sg_len == 0)
break;
dst_sg = sg_next(dst_sg);
if (dst_sg == NULL)
break;
dst_sg_len--;
dst_avail = sg_dma_len(dst_sg);
}
/* Fetch the next src scatterlist entry */
if (src_avail == 0) {
if (src_sg_len == 0)
break;
src_sg = sg_next(src_sg);
if (src_sg == NULL)
break;
src_sg_len--;
src_avail = sg_dma_len(src_sg);
}
}
if (list_empty(&desc->segments)) {
dev_err(chan->xdev->dev,
"%s: Zero-size SG transfer requested\n", __func__);
goto error;
}
/* Link the last hardware descriptor with the first. */
segment = list_first_entry(&desc->segments,
struct xilinx_cdma_tx_segment, node);
desc->async_tx.phys = segment->phys;
prev->hw.next_desc = segment->phys;
return &desc->async_tx;
error:
xilinx_dma_free_tx_descriptor(chan, desc);
return NULL;
}
/**
* xilinx_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
* @dchan: DMA channel
......@@ -2860,7 +2980,9 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
}
/* Request the interrupt */
chan->irq = irq_of_parse_and_map(node, chan->tdest);
chan->irq = of_irq_get(node, chan->tdest);
if (chan->irq < 0)
return dev_err_probe(xdev->dev, chan->irq, "failed to get irq\n");
err = request_irq(chan->irq, xdev->dma_config->irq_handler,
IRQF_SHARED, "xilinx-dma-controller", chan);
if (err) {
......@@ -2934,8 +3056,11 @@ static int xilinx_dma_child_probe(struct xilinx_dma_device *xdev,
if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA && ret < 0)
dev_warn(xdev->dev, "missing dma-channels property\n");
for (i = 0; i < nr_channels; i++)
xilinx_dma_chan_probe(xdev, node);
for (i = 0; i < nr_channels; i++) {
ret = xilinx_dma_chan_probe(xdev, node);
if (ret)
return ret;
}
return 0;
}
......@@ -3115,7 +3240,9 @@ static int xilinx_dma_probe(struct platform_device *pdev)
DMA_RESIDUE_GRANULARITY_SEGMENT;
} else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
dma_cap_set(DMA_MEMCPY, xdev->common.cap_mask);
dma_cap_set(DMA_MEMCPY_SG, xdev->common.cap_mask);
xdev->common.device_prep_dma_memcpy = xilinx_cdma_prep_memcpy;
xdev->common.device_prep_dma_memcpy_sg = xilinx_cdma_prep_memcpy_sg;
/* Residue calculation is supported by only AXI DMA and CDMA */
xdev->common.residue_granularity =
DMA_RESIDUE_GRANULARITY_SEGMENT;
......
// SPDX-License-Identifier: GPL-2.0
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_DMA_XILINX_DPDMA_H
#define __LINUX_DMA_XILINX_DPDMA_H
......
......@@ -50,6 +50,7 @@ enum dma_status {
*/
enum dma_transaction_type {
DMA_MEMCPY,
DMA_MEMCPY_SG,
DMA_XOR,
DMA_PQ,
DMA_XOR_VAL,
......@@ -887,6 +888,11 @@ struct dma_device {
struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)(
struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
size_t len, unsigned long flags);
struct dma_async_tx_descriptor *(*device_prep_dma_memcpy_sg)(
struct dma_chan *chan,
struct scatterlist *dst_sg, unsigned int dst_nents,
struct scatterlist *src_sg, unsigned int src_nents,
unsigned long flags);
struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
unsigned int src_cnt, size_t len, unsigned long flags);
......@@ -1047,6 +1053,20 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memcpy(
len, flags);
}
static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memcpy_sg(
struct dma_chan *chan,
struct scatterlist *dst_sg, unsigned int dst_nents,
struct scatterlist *src_sg, unsigned int src_nents,
unsigned long flags)
{
if (!chan || !chan->device || !chan->device->device_prep_dma_memcpy_sg)
return NULL;
return chan->device->device_prep_dma_memcpy_sg(chan, dst_sg, dst_nents,
src_sg, src_nents,
flags);
}
static inline bool dmaengine_is_metadata_mode_supported(struct dma_chan *chan,
enum dma_desc_metadata_mode mode)
{
......
......@@ -28,6 +28,7 @@ enum idxd_scmd_stat {
IDXD_SCMD_WQ_NONE_CONFIGURED = 0x800d0000,
IDXD_SCMD_WQ_NO_SIZE = 0x800e0000,
IDXD_SCMD_WQ_NO_PRIV = 0x800f0000,
IDXD_SCMD_WQ_IRQ_ERR = 0x80100000,
};
#define IDXD_SCMD_SOFTERR_MASK 0x80000000
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment