Commit 295711fa authored by David S. Miller's avatar David S. Miller

Merge branch 'dpaa2-irq-coalescing'

Ioana Ciornei says:

====================
dpaa2-eth: add support for IRQ coalescing

This patch set adds support for interrupts coalescing in dpaa2-eth.
The first patches add support for the hardware level configuration of
the IRQ coalescing in the dpio driver, while the ones that touch the
dpaa2-eth driver are responsible for the ethtool user interraction.

With the adaptive IRQ coalescing in place and enabled we have observed
the following changes in interrupt rates on one A72 core @2.2GHz
(LX2160A) while running a Rx TCP flow.  The TCP stream is sent on a
10Gbit link and the only cpu that does Rx is fully utilized.
                                IRQ rate (irqs / sec)
before:   4.59 Gbits/sec                24k
after:    5.67 Gbits/sec                1.3k
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents f3fafbcb fc398bec
......@@ -533,6 +533,7 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
percpu_stats->rx_packets++;
percpu_stats->rx_bytes += dpaa2_fd_get_len(fd);
ch->stats.bytes_per_cdan += dpaa2_fd_get_len(fd);
list_add_tail(&skb->list, ch->rx_list);
......@@ -641,6 +642,7 @@ static int dpaa2_eth_consume_frames(struct dpaa2_eth_channel *ch,
fq->stats.frames += cleaned;
ch->stats.frames += cleaned;
ch->stats.frames_per_cdan += cleaned;
/* A dequeue operation only pulls frames from a single queue
* into the store. Return the frame queue as an out param.
......@@ -1264,7 +1266,7 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
/* Tx confirmation frame processing routine */
static void dpaa2_eth_tx_conf(struct dpaa2_eth_priv *priv,
struct dpaa2_eth_channel *ch __always_unused,
struct dpaa2_eth_channel *ch,
const struct dpaa2_fd *fd,
struct dpaa2_eth_fq *fq)
{
......@@ -1279,6 +1281,7 @@ static void dpaa2_eth_tx_conf(struct dpaa2_eth_priv *priv,
percpu_extras = this_cpu_ptr(priv->percpu_extras);
percpu_extras->tx_conf_frames++;
percpu_extras->tx_conf_bytes += fd_len;
ch->stats.bytes_per_cdan += fd_len;
/* Check frame errors in the FD field */
fd_errors = dpaa2_fd_get_ctrl(fd) & DPAA2_FD_TX_ERR_MASK;
......@@ -1601,6 +1604,12 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
}
} while (store_cleaned);
/* Update NET DIM with the values for this CDAN */
dpaa2_io_update_net_dim(ch->dpio, ch->stats.frames_per_cdan,
ch->stats.bytes_per_cdan);
ch->stats.frames_per_cdan = 0;
ch->stats.bytes_per_cdan = 0;
/* We didn't consume the entire budget, so finish napi and
* re-enable data availability notifications
*/
......
......@@ -384,6 +384,8 @@ struct dpaa2_eth_ch_stats {
__u64 xdp_redirect;
/* Must be last, does not show up in ethtool stats */
__u64 frames;
__u64 frames_per_cdan;
__u64 bytes_per_cdan;
};
/* Maximum number of queues associated with a DPNI */
......
......@@ -820,7 +820,63 @@ static int dpaa2_eth_set_tunable(struct net_device *net_dev,
return err;
}
static int dpaa2_eth_get_coalesce(struct net_device *dev,
struct ethtool_coalesce *ic,
struct kernel_ethtool_coalesce *kernel_coal,
struct netlink_ext_ack *extack)
{
struct dpaa2_eth_priv *priv = netdev_priv(dev);
struct dpaa2_io *dpio = priv->channel[0]->dpio;
dpaa2_io_get_irq_coalescing(dpio, &ic->rx_coalesce_usecs);
ic->use_adaptive_rx_coalesce = dpaa2_io_get_adaptive_coalescing(dpio);
return 0;
}
static int dpaa2_eth_set_coalesce(struct net_device *dev,
struct ethtool_coalesce *ic,
struct kernel_ethtool_coalesce *kernel_coal,
struct netlink_ext_ack *extack)
{
struct dpaa2_eth_priv *priv = netdev_priv(dev);
struct dpaa2_io *dpio;
int prev_adaptive;
u32 prev_rx_usecs;
int i, j, err;
/* Keep track of the previous value, just in case we fail */
dpio = priv->channel[0]->dpio;
dpaa2_io_get_irq_coalescing(dpio, &prev_rx_usecs);
prev_adaptive = dpaa2_io_get_adaptive_coalescing(dpio);
/* Setup new value for rx coalescing */
for (i = 0; i < priv->num_channels; i++) {
dpio = priv->channel[i]->dpio;
dpaa2_io_set_adaptive_coalescing(dpio,
ic->use_adaptive_rx_coalesce);
err = dpaa2_io_set_irq_coalescing(dpio, ic->rx_coalesce_usecs);
if (err)
goto restore_rx_usecs;
}
return 0;
restore_rx_usecs:
for (j = 0; j < i; j++) {
dpio = priv->channel[j]->dpio;
dpaa2_io_set_irq_coalescing(dpio, prev_rx_usecs);
dpaa2_io_set_adaptive_coalescing(dpio, prev_adaptive);
}
return err;
}
const struct ethtool_ops dpaa2_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS |
ETHTOOL_COALESCE_USE_ADAPTIVE_RX,
.get_drvinfo = dpaa2_eth_get_drvinfo,
.nway_reset = dpaa2_eth_nway_reset,
.get_link = ethtool_op_get_link,
......@@ -836,4 +892,6 @@ const struct ethtool_ops dpaa2_ethtool_ops = {
.get_ts_info = dpaa2_eth_get_ts_info,
.get_tunable = dpaa2_eth_get_tunable,
.set_tunable = dpaa2_eth_set_tunable,
.get_coalesce = dpaa2_eth_get_coalesce,
.set_coalesce = dpaa2_eth_set_coalesce,
};
......@@ -24,6 +24,7 @@ config FSL_MC_DPIO
tristate "QorIQ DPAA2 DPIO driver"
depends on FSL_MC_BUS
select SOC_BUS
select DIMLIB
help
Driver for the DPAA2 DPIO object. A DPIO provides queue and
buffer management facilities for software to interact with
......
......@@ -46,6 +46,9 @@ struct dpio_rsp_get_attr {
__le64 qbman_portal_ci_addr;
/* cmd word 3 */
__le32 qbman_version;
__le32 pad1;
/* cmd word 4 */
__le32 clk;
};
struct dpio_stashing_dest {
......
......@@ -162,6 +162,7 @@ static int dpaa2_dpio_probe(struct fsl_mc_device *dpio_dev)
goto err_get_attr;
}
desc.qman_version = dpio_attrs.qbman_version;
desc.qman_clk = dpio_attrs.clk;
err = dpio_enable(dpio_dev->mc_io, 0, dpio_dev->mc_handle);
if (err) {
......
......@@ -12,6 +12,7 @@
#include <linux/platform_device.h>
#include <linux/interrupt.h>
#include <linux/dma-mapping.h>
#include <linux/dim.h>
#include <linux/slab.h>
#include "dpio.h"
......@@ -28,6 +29,14 @@ struct dpaa2_io {
spinlock_t lock_notifications;
struct list_head notifications;
struct device *dev;
/* Net DIM */
struct dim rx_dim;
/* protect against concurrent Net DIM updates */
spinlock_t dim_lock;
u16 event_ctr;
u64 bytes;
u64 frames;
};
struct dpaa2_io_store {
......@@ -100,6 +109,17 @@ struct dpaa2_io *dpaa2_io_service_select(int cpu)
}
EXPORT_SYMBOL_GPL(dpaa2_io_service_select);
static void dpaa2_io_dim_work(struct work_struct *w)
{
struct dim *dim = container_of(w, struct dim, work);
struct dim_cq_moder moder =
net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
struct dpaa2_io *d = container_of(dim, struct dpaa2_io, rx_dim);
dpaa2_io_set_irq_coalescing(d, moder.usec);
dim->state = DIM_START_MEASURE;
}
/**
* dpaa2_io_create() - create a dpaa2_io object.
* @desc: the dpaa2_io descriptor
......@@ -114,6 +134,7 @@ struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc,
struct device *dev)
{
struct dpaa2_io *obj = kmalloc(sizeof(*obj), GFP_KERNEL);
u32 qman_256_cycles_per_ns;
if (!obj)
return NULL;
......@@ -127,7 +148,15 @@ struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc,
obj->dpio_desc = *desc;
obj->swp_desc.cena_bar = obj->dpio_desc.regs_cena;
obj->swp_desc.cinh_bar = obj->dpio_desc.regs_cinh;
obj->swp_desc.qman_clk = obj->dpio_desc.qman_clk;
obj->swp_desc.qman_version = obj->dpio_desc.qman_version;
/* Compute how many 256 QBMAN cycles fit into one ns. This is because
* the interrupt timeout period register needs to be specified in QBMAN
* clock cycles in increments of 256.
*/
qman_256_cycles_per_ns = 256000 / (obj->swp_desc.qman_clk / 1000000);
obj->swp_desc.qman_256_cycles_per_ns = qman_256_cycles_per_ns;
obj->swp = qbman_swp_init(&obj->swp_desc);
if (!obj->swp) {
......@@ -138,6 +167,7 @@ struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc,
INIT_LIST_HEAD(&obj->node);
spin_lock_init(&obj->lock_mgmt_cmd);
spin_lock_init(&obj->lock_notifications);
spin_lock_init(&obj->dim_lock);
INIT_LIST_HEAD(&obj->notifications);
/* For now only enable DQRR interrupts */
......@@ -155,6 +185,12 @@ struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc,
obj->dev = dev;
memset(&obj->rx_dim, 0, sizeof(obj->rx_dim));
INIT_WORK(&obj->rx_dim.work, dpaa2_io_dim_work);
obj->event_ctr = 0;
obj->bytes = 0;
obj->frames = 0;
return obj;
}
......@@ -194,6 +230,8 @@ irqreturn_t dpaa2_io_irq(struct dpaa2_io *obj)
struct qbman_swp *swp;
u32 status;
obj->event_ctr++;
swp = obj->swp;
status = qbman_swp_interrupt_read_status(swp);
if (!status)
......@@ -779,3 +817,82 @@ int dpaa2_io_query_bp_count(struct dpaa2_io *d, u16 bpid, u32 *num)
return 0;
}
EXPORT_SYMBOL_GPL(dpaa2_io_query_bp_count);
/**
* dpaa2_io_set_irq_coalescing() - Set new IRQ coalescing values
* @d: the given DPIO object
* @irq_holdoff: interrupt holdoff (timeout) period in us
*
* Return 0 for success, or negative error code on error.
*/
int dpaa2_io_set_irq_coalescing(struct dpaa2_io *d, u32 irq_holdoff)
{
struct qbman_swp *swp = d->swp;
return qbman_swp_set_irq_coalescing(swp, swp->dqrr.dqrr_size - 1,
irq_holdoff);
}
EXPORT_SYMBOL(dpaa2_io_set_irq_coalescing);
/**
* dpaa2_io_get_irq_coalescing() - Get the current IRQ coalescing parameters
* @d: the given DPIO object
* @irq_holdoff: interrupt holdoff (timeout) period in us
*/
void dpaa2_io_get_irq_coalescing(struct dpaa2_io *d, u32 *irq_holdoff)
{
struct qbman_swp *swp = d->swp;
qbman_swp_get_irq_coalescing(swp, NULL, irq_holdoff);
}
EXPORT_SYMBOL(dpaa2_io_get_irq_coalescing);
/**
* dpaa2_io_set_adaptive_coalescing() - Enable/disable adaptive coalescing
* @d: the given DPIO object
* @use_adaptive_rx_coalesce: adaptive coalescing state
*/
void dpaa2_io_set_adaptive_coalescing(struct dpaa2_io *d,
int use_adaptive_rx_coalesce)
{
d->swp->use_adaptive_rx_coalesce = use_adaptive_rx_coalesce;
}
EXPORT_SYMBOL(dpaa2_io_set_adaptive_coalescing);
/**
* dpaa2_io_get_adaptive_coalescing() - Query adaptive coalescing state
* @d: the given DPIO object
*
* Return 1 when adaptive coalescing is enabled on the DPIO object and 0
* otherwise.
*/
int dpaa2_io_get_adaptive_coalescing(struct dpaa2_io *d)
{
return d->swp->use_adaptive_rx_coalesce;
}
EXPORT_SYMBOL(dpaa2_io_get_adaptive_coalescing);
/**
* dpaa2_io_update_net_dim() - Update Net DIM
* @d: the given DPIO object
* @frames: how many frames have been dequeued by the user since the last call
* @bytes: how many bytes have been dequeued by the user since the last call
*/
void dpaa2_io_update_net_dim(struct dpaa2_io *d, __u64 frames, __u64 bytes)
{
struct dim_sample dim_sample = {};
if (!d->swp->use_adaptive_rx_coalesce)
return;
spin_lock(&d->dim_lock);
d->bytes += bytes;
d->frames += frames;
dim_update_sample(d->event_ctr, d->frames, d->bytes, &dim_sample);
net_dim(&d->rx_dim, dim_sample);
spin_unlock(&d->dim_lock);
}
EXPORT_SYMBOL(dpaa2_io_update_net_dim);
......@@ -162,6 +162,7 @@ int dpio_get_attributes(struct fsl_mc_io *mc_io,
attr->qbman_portal_ci_offset =
le64_to_cpu(dpio_rsp->qbman_portal_ci_addr);
attr->qbman_version = le32_to_cpu(dpio_rsp->qbman_version);
attr->clk = le32_to_cpu(dpio_rsp->clk);
return 0;
}
......
......@@ -59,6 +59,7 @@ int dpio_disable(struct fsl_mc_io *mc_io,
* @num_priorities: Number of priorities for the notification channel (1-8);
* relevant only if 'channel_mode = DPIO_LOCAL_CHANNEL'
* @qbman_version: QBMAN version
* @clk: QBMAN clock frequency value in Hz
*/
struct dpio_attr {
int id;
......@@ -68,6 +69,7 @@ struct dpio_attr {
enum dpio_channel_mode channel_mode;
u8 num_priorities;
u32 qbman_version;
u32 clk;
};
int dpio_get_attributes(struct fsl_mc_io *mc_io,
......
......@@ -29,6 +29,7 @@
#define QBMAN_CINH_SWP_EQCR_AM_RT 0x980
#define QBMAN_CINH_SWP_RCR_AM_RT 0x9c0
#define QBMAN_CINH_SWP_DQPI 0xa00
#define QBMAN_CINH_SWP_DQRR_ITR 0xa80
#define QBMAN_CINH_SWP_DCAP 0xac0
#define QBMAN_CINH_SWP_SDQCR 0xb00
#define QBMAN_CINH_SWP_EQCR_AM_RT2 0xb40
......@@ -38,6 +39,7 @@
#define QBMAN_CINH_SWP_IER 0xe40
#define QBMAN_CINH_SWP_ISDR 0xe80
#define QBMAN_CINH_SWP_IIR 0xec0
#define QBMAN_CINH_SWP_ITPR 0xf40
/* CENA register offsets */
#define QBMAN_CENA_SWP_EQCR(n) (0x000 + ((u32)(n) << 6))
......@@ -355,6 +357,9 @@ struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d)
& p->eqcr.pi_ci_mask;
p->eqcr.available = p->eqcr.pi_ring_size;
/* Initialize the software portal with a irq timeout period of 0us */
qbman_swp_set_irq_coalescing(p, p->dqrr.dqrr_size - 1, 0);
return p;
}
......@@ -1796,3 +1801,57 @@ u32 qbman_bp_info_num_free_bufs(struct qbman_bp_query_rslt *a)
{
return le32_to_cpu(a->fill);
}
/**
* qbman_swp_set_irq_coalescing() - Set new IRQ coalescing values
* @p: the software portal object
* @irq_threshold: interrupt threshold
* @irq_holdoff: interrupt holdoff (timeout) period in us
*
* Return 0 for success, or negative error code on error.
*/
int qbman_swp_set_irq_coalescing(struct qbman_swp *p, u32 irq_threshold,
u32 irq_holdoff)
{
u32 itp, max_holdoff;
/* Convert irq_holdoff value from usecs to 256 QBMAN clock cycles
* increments. This depends to the QBMAN internal frequency.
*/
itp = (irq_holdoff * 1000) / p->desc->qman_256_cycles_per_ns;
if (itp < 0 || itp > 4096) {
max_holdoff = (p->desc->qman_256_cycles_per_ns * 4096) / 1000;
pr_err("irq_holdoff must be between 0..%dus\n", max_holdoff);
return -EINVAL;
}
if (irq_threshold >= p->dqrr.dqrr_size || irq_threshold < 0) {
pr_err("irq_threshold must be between 0..%d\n",
p->dqrr.dqrr_size - 1);
return -EINVAL;
}
p->irq_threshold = irq_threshold;
p->irq_holdoff = irq_holdoff;
qbman_write_register(p, QBMAN_CINH_SWP_DQRR_ITR, irq_threshold);
qbman_write_register(p, QBMAN_CINH_SWP_ITPR, itp);
return 0;
}
/**
* qbman_swp_get_irq_coalescing() - Get the current IRQ coalescing parameters
* @p: the software portal object
* @irq_threshold: interrupt threshold (an IRQ is generated when there are more
* DQRR entries in the portal than the threshold)
* @irq_holdoff: interrupt holdoff (timeout) period in us
*/
void qbman_swp_get_irq_coalescing(struct qbman_swp *p, u32 *irq_threshold,
u32 *irq_holdoff)
{
if (irq_threshold)
*irq_threshold = p->irq_threshold;
if (irq_holdoff)
*irq_holdoff = p->irq_holdoff;
}
......@@ -24,6 +24,8 @@ struct qbman_swp_desc {
void *cena_bar; /* Cache-enabled portal base address */
void __iomem *cinh_bar; /* Cache-inhibited portal base address */
u32 qman_version;
u32 qman_clk;
u32 qman_256_cycles_per_ns;
};
#define QBMAN_SWP_INTERRUPT_EQRI 0x01
......@@ -156,6 +158,11 @@ struct qbman_swp {
} eqcr;
spinlock_t access_spinlock;
/* Interrupt coalescing */
u32 irq_threshold;
u32 irq_holdoff;
int use_adaptive_rx_coalesce;
};
/* Function pointers */
......@@ -648,4 +655,10 @@ static inline const struct dpaa2_dq *qbman_swp_dqrr_next(struct qbman_swp *s)
return qbman_swp_dqrr_next_ptr(s);
}
int qbman_swp_set_irq_coalescing(struct qbman_swp *p, u32 irq_threshold,
u32 irq_holdoff);
void qbman_swp_get_irq_coalescing(struct qbman_swp *p, u32 *irq_threshold,
u32 *irq_holdoff);
#endif /* __FSL_QBMAN_PORTAL_H */
......@@ -44,6 +44,7 @@ struct device;
* @regs_cinh: The cache inhibited regs
* @dpio_id: The dpio index
* @qman_version: The qman version
* @qman_clk: The qman clock frequency in Hz
*
* Describes the attributes and features of the DPIO object.
*/
......@@ -55,6 +56,7 @@ struct dpaa2_io_desc {
void __iomem *regs_cinh;
int dpio_id;
u32 qman_version;
u32 qman_clk;
};
struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc,
......@@ -129,4 +131,11 @@ int dpaa2_io_query_fq_count(struct dpaa2_io *d, u32 fqid,
u32 *fcnt, u32 *bcnt);
int dpaa2_io_query_bp_count(struct dpaa2_io *d, u16 bpid,
u32 *num);
int dpaa2_io_set_irq_coalescing(struct dpaa2_io *d, u32 irq_holdoff);
void dpaa2_io_get_irq_coalescing(struct dpaa2_io *d, u32 *irq_holdoff);
void dpaa2_io_set_adaptive_coalescing(struct dpaa2_io *d,
int use_adaptive_rx_coalesce);
int dpaa2_io_get_adaptive_coalescing(struct dpaa2_io *d);
void dpaa2_io_update_net_dim(struct dpaa2_io *d, __u64 frames, __u64 bytes);
#endif /* __FSL_DPAA2_IO_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment