Commit 78611565 authored by Mark Brown's avatar Mark Brown

spi: add support for pre-cooking messages

Merge series from David Lechner <dlechner@baylibre.com>:

This is a follow-up to [1] where it was suggested to break down the
proposed SPI offload support into smaller series.

This takes on the first suggested task of introducing an API to
"pre-cook" SPI messages. This idea was first discussed extensively in
2013 [2][3] and revisited more briefly 2022 [4].

The goal here is to be able to improve performance (higher throughput,
and reduced CPU usage) by allowing peripheral drivers that use the
same struct spi_message repeatedly to "pre-cook" the message once to
avoid repeating the same validation, and possibly other operations each
time the message is sent.

This series includes __spi_validate() and the automatic splitting of
xfers in the optimizations. Another frequently suggested optimization
is doing DMA mapping only once. This is not included in this series, but
can be added later (preferably by someone with a real use case for it).

To show how this all works and get some real-world measurements, this
series includes the core changes, optimization of a SPI controller
driver, and optimization of an ADC driver. This test case was only able
to take advantage of the single validation optimization, since it didn't
require splitting transfers. With these changes, CPU usage of the
threaded interrupt handler, which calls spi_sync(), was reduced from
83% to 73% while at the same time the sample rate (frequency of SPI
xfers) was increased from 20kHz to 25kHz.

[1]: https://lore.kernel.org/linux-spi/20240109-axi-spi-engine-series-3-v1-1-e42c6a986580@baylibre.com/T/
[2]: https://lore.kernel.org/linux-spi/E81F4810-48DD-41EE-B110-D0D848B8A510@martin.sperl.org/T/
[3]: https://lore.kernel.org/linux-spi/39DEC004-10A1-47EF-9D77-276188D2580C@martin.sperl.org/T/
[4]: https://lore.kernel.org/linux-spi/20220525163946.48ea40c9@erd992/T/
parents e63aef9c 7dba2adb
...@@ -109,6 +109,7 @@ struct spi_engine { ...@@ -109,6 +109,7 @@ struct spi_engine {
spinlock_t lock; spinlock_t lock;
void __iomem *base; void __iomem *base;
struct spi_engine_message_state msg_state;
struct completion msg_complete; struct completion msg_complete;
unsigned int int_enable; unsigned int int_enable;
}; };
...@@ -499,17 +500,11 @@ static irqreturn_t spi_engine_irq(int irq, void *devid) ...@@ -499,17 +500,11 @@ static irqreturn_t spi_engine_irq(int irq, void *devid)
return IRQ_HANDLED; return IRQ_HANDLED;
} }
static int spi_engine_prepare_message(struct spi_controller *host, static int spi_engine_optimize_message(struct spi_message *msg)
struct spi_message *msg)
{ {
struct spi_engine_program p_dry, *p; struct spi_engine_program p_dry, *p;
struct spi_engine_message_state *st;
size_t size; size_t size;
st = kzalloc(sizeof(*st), GFP_KERNEL);
if (!st)
return -ENOMEM;
spi_engine_precompile_message(msg); spi_engine_precompile_message(msg);
p_dry.length = 0; p_dry.length = 0;
...@@ -517,31 +512,22 @@ static int spi_engine_prepare_message(struct spi_controller *host, ...@@ -517,31 +512,22 @@ static int spi_engine_prepare_message(struct spi_controller *host,
size = sizeof(*p->instructions) * (p_dry.length + 1); size = sizeof(*p->instructions) * (p_dry.length + 1);
p = kzalloc(sizeof(*p) + size, GFP_KERNEL); p = kzalloc(sizeof(*p) + size, GFP_KERNEL);
if (!p) { if (!p)
kfree(st);
return -ENOMEM; return -ENOMEM;
}
spi_engine_compile_message(msg, false, p); spi_engine_compile_message(msg, false, p);
spi_engine_program_add_cmd(p, false, SPI_ENGINE_CMD_SYNC( spi_engine_program_add_cmd(p, false, SPI_ENGINE_CMD_SYNC(
AXI_SPI_ENGINE_CUR_MSG_SYNC_ID)); AXI_SPI_ENGINE_CUR_MSG_SYNC_ID));
st->p = p; msg->opt_state = p;
st->cmd_buf = p->instructions;
st->cmd_length = p->length;
msg->state = st;
return 0; return 0;
} }
static int spi_engine_unprepare_message(struct spi_controller *host, static int spi_engine_unoptimize_message(struct spi_message *msg)
struct spi_message *msg)
{ {
struct spi_engine_message_state *st = msg->state; kfree(msg->opt_state);
kfree(st->p);
kfree(st);
return 0; return 0;
} }
...@@ -550,10 +536,18 @@ static int spi_engine_transfer_one_message(struct spi_controller *host, ...@@ -550,10 +536,18 @@ static int spi_engine_transfer_one_message(struct spi_controller *host,
struct spi_message *msg) struct spi_message *msg)
{ {
struct spi_engine *spi_engine = spi_controller_get_devdata(host); struct spi_engine *spi_engine = spi_controller_get_devdata(host);
struct spi_engine_message_state *st = msg->state; struct spi_engine_message_state *st = &spi_engine->msg_state;
struct spi_engine_program *p = msg->opt_state;
unsigned int int_enable = 0; unsigned int int_enable = 0;
unsigned long flags; unsigned long flags;
/* reinitialize message state for this transfer */
memset(st, 0, sizeof(*st));
st->p = p;
st->cmd_buf = p->instructions;
st->cmd_length = p->length;
msg->state = st;
reinit_completion(&spi_engine->msg_complete); reinit_completion(&spi_engine->msg_complete);
spin_lock_irqsave(&spi_engine->lock, flags); spin_lock_irqsave(&spi_engine->lock, flags);
...@@ -658,8 +652,8 @@ static int spi_engine_probe(struct platform_device *pdev) ...@@ -658,8 +652,8 @@ static int spi_engine_probe(struct platform_device *pdev)
host->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32); host->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
host->max_speed_hz = clk_get_rate(spi_engine->ref_clk) / 2; host->max_speed_hz = clk_get_rate(spi_engine->ref_clk) / 2;
host->transfer_one_message = spi_engine_transfer_one_message; host->transfer_one_message = spi_engine_transfer_one_message;
host->prepare_message = spi_engine_prepare_message; host->optimize_message = spi_engine_optimize_message;
host->unprepare_message = spi_engine_unprepare_message; host->unoptimize_message = spi_engine_unoptimize_message;
host->num_chipselect = 8; host->num_chipselect = 8;
if (host->max_speed_hz == 0) if (host->max_speed_hz == 0)
......
...@@ -1118,6 +1118,21 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id) ...@@ -1118,6 +1118,21 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id)
return IRQ_HANDLED; return IRQ_HANDLED;
} }
static int stm32_spi_optimize_message(struct spi_message *msg)
{
struct spi_controller *ctrl = msg->spi->controller;
struct stm32_spi *spi = spi_controller_get_devdata(ctrl);
/* On STM32H7, messages should not exceed a maximum size set
* later via the set_number_of_data function. In order to
* ensure that, split large messages into several messages
*/
if (spi->cfg->set_number_of_data)
return spi_split_transfers_maxwords(ctrl, msg, spi->t_size_max);
return 0;
}
/** /**
* stm32_spi_prepare_msg - set up the controller to transfer a single message * stm32_spi_prepare_msg - set up the controller to transfer a single message
* @ctrl: controller interface * @ctrl: controller interface
...@@ -1163,18 +1178,6 @@ static int stm32_spi_prepare_msg(struct spi_controller *ctrl, ...@@ -1163,18 +1178,6 @@ static int stm32_spi_prepare_msg(struct spi_controller *ctrl,
!!(spi_dev->mode & SPI_LSB_FIRST), !!(spi_dev->mode & SPI_LSB_FIRST),
!!(spi_dev->mode & SPI_CS_HIGH)); !!(spi_dev->mode & SPI_CS_HIGH));
/* On STM32H7, messages should not exceed a maximum size setted
* afterward via the set_number_of_data function. In order to
* ensure that, split large messages into several messages
*/
if (spi->cfg->set_number_of_data) {
int ret;
ret = spi_split_transfers_maxwords(ctrl, msg, spi->t_size_max);
if (ret)
return ret;
}
spin_lock_irqsave(&spi->lock, flags); spin_lock_irqsave(&spi->lock, flags);
/* CPOL, CPHA and LSB FIRST bits have common register */ /* CPOL, CPHA and LSB FIRST bits have common register */
...@@ -2180,6 +2183,7 @@ static int stm32_spi_probe(struct platform_device *pdev) ...@@ -2180,6 +2183,7 @@ static int stm32_spi_probe(struct platform_device *pdev)
ctrl->max_speed_hz = spi->clk_rate / spi->cfg->baud_rate_div_min; ctrl->max_speed_hz = spi->clk_rate / spi->cfg->baud_rate_div_min;
ctrl->min_speed_hz = spi->clk_rate / spi->cfg->baud_rate_div_max; ctrl->min_speed_hz = spi->clk_rate / spi->cfg->baud_rate_div_max;
ctrl->use_gpio_descriptors = true; ctrl->use_gpio_descriptors = true;
ctrl->optimize_message = stm32_spi_optimize_message;
ctrl->prepare_message = stm32_spi_prepare_msg; ctrl->prepare_message = stm32_spi_prepare_msg;
ctrl->transfer_one = stm32_spi_transfer_one; ctrl->transfer_one = stm32_spi_transfer_one;
ctrl->unprepare_message = stm32_spi_unprepare_msg; ctrl->unprepare_message = stm32_spi_unprepare_msg;
......
This diff is collapsed.
...@@ -475,6 +475,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch ...@@ -475,6 +475,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
* *
* @set_cs: set the logic level of the chip select line. May be called * @set_cs: set the logic level of the chip select line. May be called
* from interrupt context. * from interrupt context.
* @optimize_message: optimize the message for reuse
* @unoptimize_message: release resources allocated by optimize_message
* @prepare_message: set up the controller to transfer a single message, * @prepare_message: set up the controller to transfer a single message,
* for example doing DMA mapping. Called from threaded * for example doing DMA mapping. Called from threaded
* context. * context.
...@@ -715,6 +717,8 @@ struct spi_controller { ...@@ -715,6 +717,8 @@ struct spi_controller {
struct completion xfer_completion; struct completion xfer_completion;
size_t max_dma_len; size_t max_dma_len;
int (*optimize_message)(struct spi_message *msg);
int (*unoptimize_message)(struct spi_message *msg);
int (*prepare_transfer_hardware)(struct spi_controller *ctlr); int (*prepare_transfer_hardware)(struct spi_controller *ctlr);
int (*transfer_one_message)(struct spi_controller *ctlr, int (*transfer_one_message)(struct spi_controller *ctlr,
struct spi_message *mesg); struct spi_message *mesg);
...@@ -1111,6 +1115,8 @@ struct spi_transfer { ...@@ -1111,6 +1115,8 @@ struct spi_transfer {
* @spi: SPI device to which the transaction is queued * @spi: SPI device to which the transaction is queued
* @is_dma_mapped: if true, the caller provided both DMA and CPU virtual * @is_dma_mapped: if true, the caller provided both DMA and CPU virtual
* addresses for each transfer buffer * addresses for each transfer buffer
* @pre_optimized: peripheral driver pre-optimized the message
* @optimized: the message is in the optimized state
* @prepared: spi_prepare_message was called for the this message * @prepared: spi_prepare_message was called for the this message
* @status: zero for success, else negative errno * @status: zero for success, else negative errno
* @complete: called to report transaction completions * @complete: called to report transaction completions
...@@ -1120,6 +1126,7 @@ struct spi_transfer { ...@@ -1120,6 +1126,7 @@ struct spi_transfer {
* successful segments * successful segments
* @queue: for use by whichever driver currently owns the message * @queue: for use by whichever driver currently owns the message
* @state: for use by whichever driver currently owns the message * @state: for use by whichever driver currently owns the message
* @opt_state: for use by whichever driver currently owns the message
* @resources: for resource management when the SPI message is processed * @resources: for resource management when the SPI message is processed
* *
* A @spi_message is used to execute an atomic sequence of data transfers, * A @spi_message is used to execute an atomic sequence of data transfers,
...@@ -1143,6 +1150,11 @@ struct spi_message { ...@@ -1143,6 +1150,11 @@ struct spi_message {
unsigned is_dma_mapped:1; unsigned is_dma_mapped:1;
/* spi_optimize_message() was called for this message */
bool pre_optimized;
/* __spi_optimize_message() was called for this message */
bool optimized;
/* spi_prepare_message() was called for this message */ /* spi_prepare_message() was called for this message */
bool prepared; bool prepared;
...@@ -1172,6 +1184,11 @@ struct spi_message { ...@@ -1172,6 +1184,11 @@ struct spi_message {
*/ */
struct list_head queue; struct list_head queue;
void *state; void *state;
/*
* Optional state for use by controller driver between calls to
* __spi_optimize_message() and __spi_unoptimize_message().
*/
void *opt_state;
/* List of spi_res resources when the SPI message is processed */ /* List of spi_res resources when the SPI message is processed */
struct list_head resources; struct list_head resources;
...@@ -1255,6 +1272,9 @@ static inline void spi_message_free(struct spi_message *m) ...@@ -1255,6 +1272,9 @@ static inline void spi_message_free(struct spi_message *m)
kfree(m); kfree(m);
} }
extern int spi_optimize_message(struct spi_device *spi, struct spi_message *msg);
extern void spi_unoptimize_message(struct spi_message *msg);
extern int spi_setup(struct spi_device *spi); extern int spi_setup(struct spi_device *spi);
extern int spi_async(struct spi_device *spi, struct spi_message *message); extern int spi_async(struct spi_device *spi, struct spi_message *message);
extern int spi_slave_abort(struct spi_device *spi); extern int spi_slave_abort(struct spi_device *spi);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment