Commit 78611565 authored by Mark Brown's avatar Mark Brown

spi: add support for pre-cooking messages

Merge series from David Lechner <dlechner@baylibre.com>:

This is a follow-up to [1] where it was suggested to break down the
proposed SPI offload support into smaller series.

This takes on the first suggested task of introducing an API to
"pre-cook" SPI messages. This idea was first discussed extensively in
2013 [2][3] and revisited more briefly 2022 [4].

The goal here is to be able to improve performance (higher throughput,
and reduced CPU usage) by allowing peripheral drivers that use the
same struct spi_message repeatedly to "pre-cook" the message once to
avoid repeating the same validation, and possibly other operations each
time the message is sent.

This series includes __spi_validate() and the automatic splitting of
xfers in the optimizations. Another frequently suggested optimization
is doing DMA mapping only once. This is not included in this series, but
can be added later (preferably by someone with a real use case for it).

To show how this all works and get some real-world measurements, this
series includes the core changes, optimization of a SPI controller
driver, and optimization of an ADC driver. This test case was only able
to take advantage of the single validation optimization, since it didn't
require splitting transfers. With these changes, CPU usage of the
threaded interrupt handler, which calls spi_sync(), was reduced from
83% to 73% while at the same time the sample rate (frequency of SPI
xfers) was increased from 20kHz to 25kHz.

[1]: https://lore.kernel.org/linux-spi/20240109-axi-spi-engine-series-3-v1-1-e42c6a986580@baylibre.com/T/
[2]: https://lore.kernel.org/linux-spi/E81F4810-48DD-41EE-B110-D0D848B8A510@martin.sperl.org/T/
[3]: https://lore.kernel.org/linux-spi/39DEC004-10A1-47EF-9D77-276188D2580C@martin.sperl.org/T/
[4]: https://lore.kernel.org/linux-spi/20220525163946.48ea40c9@erd992/T/
parents e63aef9c 7dba2adb
......@@ -109,6 +109,7 @@ struct spi_engine {
spinlock_t lock;
void __iomem *base;
struct spi_engine_message_state msg_state;
struct completion msg_complete;
unsigned int int_enable;
};
......@@ -499,17 +500,11 @@ static irqreturn_t spi_engine_irq(int irq, void *devid)
return IRQ_HANDLED;
}
static int spi_engine_prepare_message(struct spi_controller *host,
struct spi_message *msg)
static int spi_engine_optimize_message(struct spi_message *msg)
{
struct spi_engine_program p_dry, *p;
struct spi_engine_message_state *st;
size_t size;
st = kzalloc(sizeof(*st), GFP_KERNEL);
if (!st)
return -ENOMEM;
spi_engine_precompile_message(msg);
p_dry.length = 0;
......@@ -517,31 +512,22 @@ static int spi_engine_prepare_message(struct spi_controller *host,
size = sizeof(*p->instructions) * (p_dry.length + 1);
p = kzalloc(sizeof(*p) + size, GFP_KERNEL);
if (!p) {
kfree(st);
if (!p)
return -ENOMEM;
}
spi_engine_compile_message(msg, false, p);
spi_engine_program_add_cmd(p, false, SPI_ENGINE_CMD_SYNC(
AXI_SPI_ENGINE_CUR_MSG_SYNC_ID));
st->p = p;
st->cmd_buf = p->instructions;
st->cmd_length = p->length;
msg->state = st;
msg->opt_state = p;
return 0;
}
static int spi_engine_unprepare_message(struct spi_controller *host,
struct spi_message *msg)
static int spi_engine_unoptimize_message(struct spi_message *msg)
{
struct spi_engine_message_state *st = msg->state;
kfree(st->p);
kfree(st);
kfree(msg->opt_state);
return 0;
}
......@@ -550,10 +536,18 @@ static int spi_engine_transfer_one_message(struct spi_controller *host,
struct spi_message *msg)
{
struct spi_engine *spi_engine = spi_controller_get_devdata(host);
struct spi_engine_message_state *st = msg->state;
struct spi_engine_message_state *st = &spi_engine->msg_state;
struct spi_engine_program *p = msg->opt_state;
unsigned int int_enable = 0;
unsigned long flags;
/* reinitialize message state for this transfer */
memset(st, 0, sizeof(*st));
st->p = p;
st->cmd_buf = p->instructions;
st->cmd_length = p->length;
msg->state = st;
reinit_completion(&spi_engine->msg_complete);
spin_lock_irqsave(&spi_engine->lock, flags);
......@@ -658,8 +652,8 @@ static int spi_engine_probe(struct platform_device *pdev)
host->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
host->max_speed_hz = clk_get_rate(spi_engine->ref_clk) / 2;
host->transfer_one_message = spi_engine_transfer_one_message;
host->prepare_message = spi_engine_prepare_message;
host->unprepare_message = spi_engine_unprepare_message;
host->optimize_message = spi_engine_optimize_message;
host->unoptimize_message = spi_engine_unoptimize_message;
host->num_chipselect = 8;
if (host->max_speed_hz == 0)
......
......@@ -1118,6 +1118,21 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id)
return IRQ_HANDLED;
}
static int stm32_spi_optimize_message(struct spi_message *msg)
{
struct spi_controller *ctrl = msg->spi->controller;
struct stm32_spi *spi = spi_controller_get_devdata(ctrl);
/* On STM32H7, messages should not exceed a maximum size set
* later via the set_number_of_data function. In order to
* ensure that, split large messages into several messages
*/
if (spi->cfg->set_number_of_data)
return spi_split_transfers_maxwords(ctrl, msg, spi->t_size_max);
return 0;
}
/**
* stm32_spi_prepare_msg - set up the controller to transfer a single message
* @ctrl: controller interface
......@@ -1163,18 +1178,6 @@ static int stm32_spi_prepare_msg(struct spi_controller *ctrl,
!!(spi_dev->mode & SPI_LSB_FIRST),
!!(spi_dev->mode & SPI_CS_HIGH));
/* On STM32H7, messages should not exceed a maximum size setted
* afterward via the set_number_of_data function. In order to
* ensure that, split large messages into several messages
*/
if (spi->cfg->set_number_of_data) {
int ret;
ret = spi_split_transfers_maxwords(ctrl, msg, spi->t_size_max);
if (ret)
return ret;
}
spin_lock_irqsave(&spi->lock, flags);
/* CPOL, CPHA and LSB FIRST bits have common register */
......@@ -2180,6 +2183,7 @@ static int stm32_spi_probe(struct platform_device *pdev)
ctrl->max_speed_hz = spi->clk_rate / spi->cfg->baud_rate_div_min;
ctrl->min_speed_hz = spi->clk_rate / spi->cfg->baud_rate_div_max;
ctrl->use_gpio_descriptors = true;
ctrl->optimize_message = stm32_spi_optimize_message;
ctrl->prepare_message = stm32_spi_prepare_msg;
ctrl->transfer_one = stm32_spi_transfer_one;
ctrl->unprepare_message = stm32_spi_unprepare_msg;
......
......@@ -1747,38 +1747,6 @@ static int __spi_pump_transfer_message(struct spi_controller *ctlr,
trace_spi_message_start(msg);
/*
* If an SPI controller does not support toggling the CS line on each
* transfer (indicated by the SPI_CS_WORD flag) or we are using a GPIO
* for the CS line, we can emulate the CS-per-word hardware function by
* splitting transfers into one-word transfers and ensuring that
* cs_change is set for each transfer.
*/
if ((msg->spi->mode & SPI_CS_WORD) && (!(ctlr->mode_bits & SPI_CS_WORD) ||
spi_is_csgpiod(msg->spi))) {
ret = spi_split_transfers_maxwords(ctlr, msg, 1);
if (ret) {
msg->status = ret;
spi_finalize_current_message(ctlr);
return ret;
}
list_for_each_entry(xfer, &msg->transfers, transfer_list) {
/* Don't change cs_change on the last entry in the list */
if (list_is_last(&xfer->transfer_list, &msg->transfers))
break;
xfer->cs_change = 1;
}
} else {
ret = spi_split_transfers_maxsize(ctlr, msg,
spi_max_transfer_size(msg->spi));
if (ret) {
msg->status = ret;
spi_finalize_current_message(ctlr);
return ret;
}
}
if (ctlr->prepare_message) {
ret = ctlr->prepare_message(ctlr, msg);
if (ret) {
......@@ -2106,6 +2074,43 @@ struct spi_message *spi_get_next_queued_message(struct spi_controller *ctlr)
}
EXPORT_SYMBOL_GPL(spi_get_next_queued_message);
/*
* __spi_unoptimize_message - shared implementation of spi_unoptimize_message()
* and spi_maybe_unoptimize_message()
* @msg: the message to unoptimize
*
* Peripheral drivers should use spi_unoptimize_message() and callers inside
* core should use spi_maybe_unoptimize_message() rather than calling this
* function directly.
*
* It is not valid to call this on a message that is not currently optimized.
*/
static void __spi_unoptimize_message(struct spi_message *msg)
{
struct spi_controller *ctlr = msg->spi->controller;
if (ctlr->unoptimize_message)
ctlr->unoptimize_message(msg);
spi_res_release(ctlr, msg);
msg->optimized = false;
msg->opt_state = NULL;
}
/*
* spi_maybe_unoptimize_message - unoptimize msg not managed by a peripheral
* @msg: the message to unoptimize
*
* This function is used to unoptimize a message if and only if it was
* optimized by the core (via spi_maybe_optimize_message()).
*/
static void spi_maybe_unoptimize_message(struct spi_message *msg)
{
if (!msg->pre_optimized && msg->optimized)
__spi_unoptimize_message(msg);
}
/**
* spi_finalize_current_message() - the current message is complete
* @ctlr: the controller to return the message to
......@@ -2134,15 +2139,6 @@ void spi_finalize_current_message(struct spi_controller *ctlr)
spi_unmap_msg(ctlr, mesg);
/*
* In the prepare_messages callback the SPI bus has the opportunity
* to split a transfer to smaller chunks.
*
* Release the split transfers here since spi_map_msg() is done on
* the split transfers.
*/
spi_res_release(ctlr, mesg);
if (mesg->prepared && ctlr->unprepare_message) {
ret = ctlr->unprepare_message(ctlr, mesg);
if (ret) {
......@@ -2153,6 +2149,8 @@ void spi_finalize_current_message(struct spi_controller *ctlr)
mesg->prepared = false;
spi_maybe_unoptimize_message(mesg);
WRITE_ONCE(ctlr->cur_msg_incomplete, false);
smp_mb(); /* See __spi_pump_transfer_message()... */
if (READ_ONCE(ctlr->cur_msg_need_completion))
......@@ -3782,6 +3780,10 @@ static int __spi_split_transfer_maxsize(struct spi_controller *ctlr,
* @msg: the @spi_message to transform
* @maxsize: the maximum when to apply this
*
* This function allocates resources that are automatically freed during the
* spi message unoptimize phase so this function should only be called from
* optimize_message callbacks.
*
* Return: status of transformation
*/
int spi_split_transfers_maxsize(struct spi_controller *ctlr,
......@@ -3820,6 +3822,10 @@ EXPORT_SYMBOL_GPL(spi_split_transfers_maxsize);
* @msg: the @spi_message to transform
* @maxwords: the number of words to limit each transfer to
*
* This function allocates resources that are automatically freed during the
* spi message unoptimize phase so this function should only be called from
* optimize_message callbacks.
*
* Return: status of transformation
*/
int spi_split_transfers_maxwords(struct spi_controller *ctlr,
......@@ -4194,6 +4200,167 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
return 0;
}
/*
* spi_split_transfers - generic handling of transfer splitting
* @msg: the message to split
*
* Under certain conditions, a SPI controller may not support arbitrary
* transfer sizes or other features required by a peripheral. This function
* will split the transfers in the message into smaller transfers that are
* supported by the controller.
*
* Controllers with special requirements not covered here can also split
* transfers in the optimize_message() callback.
*
* Context: can sleep
* Return: zero on success, else a negative error code
*/
static int spi_split_transfers(struct spi_message *msg)
{
struct spi_controller *ctlr = msg->spi->controller;
struct spi_transfer *xfer;
int ret;
/*
* If an SPI controller does not support toggling the CS line on each
* transfer (indicated by the SPI_CS_WORD flag) or we are using a GPIO
* for the CS line, we can emulate the CS-per-word hardware function by
* splitting transfers into one-word transfers and ensuring that
* cs_change is set for each transfer.
*/
if ((msg->spi->mode & SPI_CS_WORD) &&
(!(ctlr->mode_bits & SPI_CS_WORD) || spi_is_csgpiod(msg->spi))) {
ret = spi_split_transfers_maxwords(ctlr, msg, 1);
if (ret)
return ret;
list_for_each_entry(xfer, &msg->transfers, transfer_list) {
/* Don't change cs_change on the last entry in the list */
if (list_is_last(&xfer->transfer_list, &msg->transfers))
break;
xfer->cs_change = 1;
}
} else {
ret = spi_split_transfers_maxsize(ctlr, msg,
spi_max_transfer_size(msg->spi));
if (ret)
return ret;
}
return 0;
}
/*
* __spi_optimize_message - shared implementation for spi_optimize_message()
* and spi_maybe_optimize_message()
* @spi: the device that will be used for the message
* @msg: the message to optimize
*
* Peripheral drivers will call spi_optimize_message() and the spi core will
* call spi_maybe_optimize_message() instead of calling this directly.
*
* It is not valid to call this on a message that has already been optimized.
*
* Return: zero on success, else a negative error code
*/
static int __spi_optimize_message(struct spi_device *spi,
struct spi_message *msg)
{
struct spi_controller *ctlr = spi->controller;
int ret;
ret = __spi_validate(spi, msg);
if (ret)
return ret;
ret = spi_split_transfers(msg);
if (ret)
return ret;
if (ctlr->optimize_message) {
ret = ctlr->optimize_message(msg);
if (ret) {
spi_res_release(ctlr, msg);
return ret;
}
}
msg->optimized = true;
return 0;
}
/*
* spi_maybe_optimize_message - optimize message if it isn't already pre-optimized
* @spi: the device that will be used for the message
* @msg: the message to optimize
* Return: zero on success, else a negative error code
*/
static int spi_maybe_optimize_message(struct spi_device *spi,
struct spi_message *msg)
{
if (msg->pre_optimized)
return 0;
return __spi_optimize_message(spi, msg);
}
/**
* spi_optimize_message - do any one-time validation and setup for a SPI message
* @spi: the device that will be used for the message
* @msg: the message to optimize
*
* Peripheral drivers that reuse the same message repeatedly may call this to
* perform as much message prep as possible once, rather than repeating it each
* time a message transfer is performed to improve throughput and reduce CPU
* usage.
*
* Once a message has been optimized, it cannot be modified with the exception
* of updating the contents of any xfer->tx_buf (the pointer can't be changed,
* only the data in the memory it points to).
*
* Calls to this function must be balanced with calls to spi_unoptimize_message()
* to avoid leaking resources.
*
* Context: can sleep
* Return: zero on success, else a negative error code
*/
int spi_optimize_message(struct spi_device *spi, struct spi_message *msg)
{
int ret;
ret = __spi_optimize_message(spi, msg);
if (ret)
return ret;
/*
* This flag indicates that the peripheral driver called spi_optimize_message()
* and therefore we shouldn't unoptimize message automatically when finalizing
* the message but rather wait until spi_unoptimize_message() is called
* by the peripheral driver.
*/
msg->pre_optimized = true;
return 0;
}
EXPORT_SYMBOL_GPL(spi_optimize_message);
/**
* spi_unoptimize_message - releases any resources allocated by spi_optimize_message()
* @msg: the message to unoptimize
*
* Calls to this function must be balanced with calls to spi_optimize_message().
*
* Context: can sleep
*/
void spi_unoptimize_message(struct spi_message *msg)
{
__spi_unoptimize_message(msg);
msg->pre_optimized = false;
}
EXPORT_SYMBOL_GPL(spi_unoptimize_message);
static int __spi_async(struct spi_device *spi, struct spi_message *message)
{
struct spi_controller *ctlr = spi->controller;
......@@ -4258,8 +4425,8 @@ int spi_async(struct spi_device *spi, struct spi_message *message)
int ret;
unsigned long flags;
ret = __spi_validate(spi, message);
if (ret != 0)
ret = spi_maybe_optimize_message(spi, message);
if (ret)
return ret;
spin_lock_irqsave(&ctlr->bus_lock_spinlock, flags);
......@@ -4271,6 +4438,8 @@ int spi_async(struct spi_device *spi, struct spi_message *message)
spin_unlock_irqrestore(&ctlr->bus_lock_spinlock, flags);
spi_maybe_unoptimize_message(message);
return ret;
}
EXPORT_SYMBOL_GPL(spi_async);
......@@ -4331,8 +4500,8 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message)
return -ESHUTDOWN;
}
status = __spi_validate(spi, message);
if (status != 0)
status = spi_maybe_optimize_message(spi, message);
if (status)
return status;
SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics, spi_sync);
......
......@@ -475,6 +475,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch
*
* @set_cs: set the logic level of the chip select line. May be called
* from interrupt context.
* @optimize_message: optimize the message for reuse
* @unoptimize_message: release resources allocated by optimize_message
* @prepare_message: set up the controller to transfer a single message,
* for example doing DMA mapping. Called from threaded
* context.
......@@ -715,6 +717,8 @@ struct spi_controller {
struct completion xfer_completion;
size_t max_dma_len;
int (*optimize_message)(struct spi_message *msg);
int (*unoptimize_message)(struct spi_message *msg);
int (*prepare_transfer_hardware)(struct spi_controller *ctlr);
int (*transfer_one_message)(struct spi_controller *ctlr,
struct spi_message *mesg);
......@@ -1111,6 +1115,8 @@ struct spi_transfer {
* @spi: SPI device to which the transaction is queued
* @is_dma_mapped: if true, the caller provided both DMA and CPU virtual
* addresses for each transfer buffer
* @pre_optimized: peripheral driver pre-optimized the message
* @optimized: the message is in the optimized state
* @prepared: spi_prepare_message was called for the this message
* @status: zero for success, else negative errno
* @complete: called to report transaction completions
......@@ -1120,6 +1126,7 @@ struct spi_transfer {
* successful segments
* @queue: for use by whichever driver currently owns the message
* @state: for use by whichever driver currently owns the message
* @opt_state: for use by whichever driver currently owns the message
* @resources: for resource management when the SPI message is processed
*
* A @spi_message is used to execute an atomic sequence of data transfers,
......@@ -1143,6 +1150,11 @@ struct spi_message {
unsigned is_dma_mapped:1;
/* spi_optimize_message() was called for this message */
bool pre_optimized;
/* __spi_optimize_message() was called for this message */
bool optimized;
/* spi_prepare_message() was called for this message */
bool prepared;
......@@ -1172,6 +1184,11 @@ struct spi_message {
*/
struct list_head queue;
void *state;
/*
* Optional state for use by controller driver between calls to
* __spi_optimize_message() and __spi_unoptimize_message().
*/
void *opt_state;
/* List of spi_res resources when the SPI message is processed */
struct list_head resources;
......@@ -1255,6 +1272,9 @@ static inline void spi_message_free(struct spi_message *m)
kfree(m);
}
extern int spi_optimize_message(struct spi_device *spi, struct spi_message *msg);
extern void spi_unoptimize_message(struct spi_message *msg);
extern int spi_setup(struct spi_device *spi);
extern int spi_async(struct spi_device *spi, struct spi_message *message);
extern int spi_slave_abort(struct spi_device *spi);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment