Commit cc26a490 authored by Tariq Toukan's avatar Tariq Toukan Committed by David S. Miller

net/mlx4_en: Improve transmit CQ polling

Several small performance improvements in TX CQ polling,
including:
- Compiler branch predictor hints.
- Minimize variables scope.
- More proper check of cq type.
- Use boolean instead of int for a binary indication.

Performance tests:
Tested on ConnectX3Pro, Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz

Packet-rate tests for both regular stack and XDP use cases:
No noticeable gain, no degradation.
Signed-off-by: default avatarTariq Toukan <tariqt@mellanox.com>
Reviewed-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
Cc: kernel-team@fb.com
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 9bcee89a
......@@ -402,8 +402,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
struct mlx4_cq *mcq = &cq->mcq;
struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring];
struct mlx4_cqe *cqe;
u16 index;
u16 new_index, ring_index, stamp_index;
u16 index, ring_index, stamp_index;
u32 txbbs_skipped = 0;
u32 txbbs_stamp = 0;
u32 cons_index = mcq->cons_index;
......@@ -418,7 +417,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
u32 last_nr_txbb;
u32 ring_cons;
if (!priv->port_up)
if (unlikely(!priv->port_up))
return true;
netdev_txq_bql_complete_prefetchw(ring->tx_queue);
......@@ -433,6 +432,8 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
/* Process all completed CQEs */
while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
cons_index & size) && (done < budget)) {
u16 new_index;
/*
* make sure we read the CQE after we read the
* ownership bit
......@@ -479,7 +480,6 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
}
/*
* To prevent CQ overflow we first update CQ consumer and only then
* the ring consumer.
......@@ -492,7 +492,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb;
ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped;
if (ring->free_tx_desc == mlx4_en_recycle_tx_desc)
if (cq->type == TX_XDP)
return done < budget;
netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
......@@ -504,6 +504,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
netif_tx_wake_queue(ring->tx_queue);
ring->wake_queue++;
}
return done < budget;
}
......@@ -524,7 +525,7 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget)
struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
struct net_device *dev = cq->dev;
struct mlx4_en_priv *priv = netdev_priv(dev);
int clean_complete;
bool clean_complete;
clean_complete = mlx4_en_process_tx_cq(dev, cq, budget);
if (!clean_complete)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment