Commit 117b07e6 authored by David S. Miller's avatar David S. Miller

Merge branch 'mlx4-XDP-performance-improvements'

Tariq Toukan says:

====================
mlx4 XDP performance improvements

This patchset contains data-path improvements, mainly for XDP_DROP
and XDP_TX cases.

Main patches:
* Patch 2 by Saeed allows enabling optimized A0 RX steering (in HW) when
  setting a single RX ring.
  With this configuration, HW packet-rate dramatically improves,
  reaching 28.1 Mpps in XDP_DROP case for both IPv4 (37% gain)
  and IPv6 (53% gain).
* Patch 6 enhances the XDP xmit function. Among other changes, now we
  ring one doorbell per NAPI. Patch gives 17% gain in XDP_TX case.
* Patch 7 obsoletes the NAPI of XDP_TX completion queue and integrates its
  poll into the respective RX NAPI. Patch gives 15% gain in XDP_TX case.

Series generated against net-next commit:
f7aec129 rxrpc: Cache the congestion window setting
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 1492a3a7 4c07c132
...@@ -146,16 +146,25 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, ...@@ -146,16 +146,25 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
if (err) if (err)
goto free_eq; goto free_eq;
cq->mcq.comp = cq->type != RX ? mlx4_en_tx_irq : mlx4_en_rx_irq;
cq->mcq.event = mlx4_en_cq_event; cq->mcq.event = mlx4_en_cq_event;
if (cq->type != RX) switch (cq->type) {
case TX:
cq->mcq.comp = mlx4_en_tx_irq;
netif_tx_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq, netif_tx_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq,
NAPI_POLL_WEIGHT); NAPI_POLL_WEIGHT);
else napi_enable(&cq->napi);
break;
case RX:
cq->mcq.comp = mlx4_en_rx_irq;
netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64); netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64);
napi_enable(&cq->napi);
napi_enable(&cq->napi); break;
case TX_XDP:
/* nothing regarding napi, it's shared with rx ring */
cq->xdp_busy = false;
break;
}
return 0; return 0;
...@@ -184,8 +193,10 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) ...@@ -184,8 +193,10 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq)
void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
{ {
napi_disable(&cq->napi); if (cq->type != TX_XDP) {
netif_napi_del(&cq->napi); napi_disable(&cq->napi);
netif_napi_del(&cq->napi);
}
mlx4_cq_free(priv->mdev->dev, &cq->mcq); mlx4_cq_free(priv->mdev->dev, &cq->mcq);
} }
......
...@@ -125,9 +125,9 @@ void mlx4_en_update_loopback_state(struct net_device *dev, ...@@ -125,9 +125,9 @@ void mlx4_en_update_loopback_state(struct net_device *dev,
priv->flags |= MLX4_EN_FLAG_ENABLE_HW_LOOPBACK; priv->flags |= MLX4_EN_FLAG_ENABLE_HW_LOOPBACK;
mutex_lock(&priv->mdev->state_lock); mutex_lock(&priv->mdev->state_lock);
if (priv->mdev->dev->caps.flags2 & if ((priv->mdev->dev->caps.flags2 &
MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB && MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB) &&
priv->rss_map.indir_qp.qpn) { priv->rss_map.indir_qp && priv->rss_map.indir_qp->qpn) {
int i; int i;
int err = 0; int err = 0;
int loopback = !!(features & NETIF_F_LOOPBACK); int loopback = !!(features & NETIF_F_LOOPBACK);
......
...@@ -596,6 +596,8 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv) ...@@ -596,6 +596,8 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv)
return err; return err;
} }
en_info(priv, "Steering Mode %d\n", dev->caps.steering_mode);
if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) {
int base_qpn = mlx4_get_base_qpn(dev, priv->port); int base_qpn = mlx4_get_base_qpn(dev, priv->port);
*qpn = base_qpn + index; *qpn = base_qpn + index;
...@@ -1010,7 +1012,7 @@ static void mlx4_en_do_multicast(struct mlx4_en_priv *priv, ...@@ -1010,7 +1012,7 @@ static void mlx4_en_do_multicast(struct mlx4_en_priv *priv,
memcpy(&mc_list[10], mclist->addr, ETH_ALEN); memcpy(&mc_list[10], mclist->addr, ETH_ALEN);
mc_list[5] = priv->port; mc_list[5] = priv->port;
err = mlx4_multicast_detach(mdev->dev, err = mlx4_multicast_detach(mdev->dev,
&priv->rss_map.indir_qp, priv->rss_map.indir_qp,
mc_list, mc_list,
MLX4_PROT_ETH, MLX4_PROT_ETH,
mclist->reg_id); mclist->reg_id);
...@@ -1032,7 +1034,7 @@ static void mlx4_en_do_multicast(struct mlx4_en_priv *priv, ...@@ -1032,7 +1034,7 @@ static void mlx4_en_do_multicast(struct mlx4_en_priv *priv,
/* needed for B0 steering support */ /* needed for B0 steering support */
mc_list[5] = priv->port; mc_list[5] = priv->port;
err = mlx4_multicast_attach(mdev->dev, err = mlx4_multicast_attach(mdev->dev,
&priv->rss_map.indir_qp, priv->rss_map.indir_qp,
mc_list, mc_list,
priv->port, 0, priv->port, 0,
MLX4_PROT_ETH, MLX4_PROT_ETH,
...@@ -1677,13 +1679,15 @@ int mlx4_en_start_port(struct net_device *dev) ...@@ -1677,13 +1679,15 @@ int mlx4_en_start_port(struct net_device *dev)
if (t != TX_XDP) { if (t != TX_XDP) {
tx_ring->tx_queue = netdev_get_tx_queue(dev, i); tx_ring->tx_queue = netdev_get_tx_queue(dev, i);
tx_ring->recycle_ring = NULL; tx_ring->recycle_ring = NULL;
/* Arm CQ for TX completions */
mlx4_en_arm_cq(priv, cq);
} else { } else {
mlx4_en_init_recycle_ring(priv, i); mlx4_en_init_recycle_ring(priv, i);
/* XDP TX CQ should never be armed */
} }
/* Arm CQ for TX completions */
mlx4_en_arm_cq(priv, cq);
/* Set initial ownership of all Tx TXBBs to SW (1) */ /* Set initial ownership of all Tx TXBBs to SW (1) */
for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE)
*((u32 *)(tx_ring->buf + j)) = 0xffffffff; *((u32 *)(tx_ring->buf + j)) = 0xffffffff;
...@@ -1742,7 +1746,7 @@ int mlx4_en_start_port(struct net_device *dev) ...@@ -1742,7 +1746,7 @@ int mlx4_en_start_port(struct net_device *dev)
/* Attach rx QP to bradcast address */ /* Attach rx QP to bradcast address */
eth_broadcast_addr(&mc_list[10]); eth_broadcast_addr(&mc_list[10]);
mc_list[5] = priv->port; /* needed for B0 steering support */ mc_list[5] = priv->port; /* needed for B0 steering support */
if (mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list, if (mlx4_multicast_attach(mdev->dev, priv->rss_map.indir_qp, mc_list,
priv->port, 0, MLX4_PROT_ETH, priv->port, 0, MLX4_PROT_ETH,
&priv->broadcast_id)) &priv->broadcast_id))
mlx4_warn(mdev, "Failed Attaching Broadcast\n"); mlx4_warn(mdev, "Failed Attaching Broadcast\n");
...@@ -1866,12 +1870,12 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) ...@@ -1866,12 +1870,12 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
/* Detach All multicasts */ /* Detach All multicasts */
eth_broadcast_addr(&mc_list[10]); eth_broadcast_addr(&mc_list[10]);
mc_list[5] = priv->port; /* needed for B0 steering support */ mc_list[5] = priv->port; /* needed for B0 steering support */
mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, mlx4_multicast_detach(mdev->dev, priv->rss_map.indir_qp, mc_list,
MLX4_PROT_ETH, priv->broadcast_id); MLX4_PROT_ETH, priv->broadcast_id);
list_for_each_entry(mclist, &priv->curr_list, list) { list_for_each_entry(mclist, &priv->curr_list, list) {
memcpy(&mc_list[10], mclist->addr, ETH_ALEN); memcpy(&mc_list[10], mclist->addr, ETH_ALEN);
mc_list[5] = priv->port; mc_list[5] = priv->port;
mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mlx4_multicast_detach(mdev->dev, priv->rss_map.indir_qp,
mc_list, MLX4_PROT_ETH, mclist->reg_id); mc_list, MLX4_PROT_ETH, mclist->reg_id);
if (mclist->tunnel_reg_id) if (mclist->tunnel_reg_id)
mlx4_flow_detach(mdev->dev, mclist->tunnel_reg_id); mlx4_flow_detach(mdev->dev, mclist->tunnel_reg_id);
......
...@@ -134,10 +134,11 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, ...@@ -134,10 +134,11 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring *ring, int index, struct mlx4_en_rx_ring *ring, int index,
gfp_t gfp) gfp_t gfp)
{ {
struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride); struct mlx4_en_rx_desc *rx_desc = ring->buf +
(index << ring->log_stride);
struct mlx4_en_rx_alloc *frags = ring->rx_info + struct mlx4_en_rx_alloc *frags = ring->rx_info +
(index << priv->log_rx_info); (index << priv->log_rx_info);
if (ring->page_cache.index > 0) { if (likely(ring->page_cache.index > 0)) {
/* XDP uses a single page per frame */ /* XDP uses a single page per frame */
if (!frags->page) { if (!frags->page) {
ring->page_cache.index--; ring->page_cache.index--;
...@@ -178,6 +179,7 @@ static void mlx4_en_free_rx_desc(const struct mlx4_en_priv *priv, ...@@ -178,6 +179,7 @@ static void mlx4_en_free_rx_desc(const struct mlx4_en_priv *priv,
} }
} }
/* Function not in fast-path */
static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
{ {
struct mlx4_en_rx_ring *ring; struct mlx4_en_rx_ring *ring;
...@@ -539,14 +541,14 @@ static void validate_loopback(struct mlx4_en_priv *priv, void *va) ...@@ -539,14 +541,14 @@ static void validate_loopback(struct mlx4_en_priv *priv, void *va)
priv->loopback_ok = 1; priv->loopback_ok = 1;
} }
static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring *ring) struct mlx4_en_rx_ring *ring)
{ {
u32 missing = ring->actual_size - (ring->prod - ring->cons); u32 missing = ring->actual_size - (ring->prod - ring->cons);
/* Try to batch allocations, but not too much. */ /* Try to batch allocations, but not too much. */
if (missing < 8) if (missing < 8)
return false; return;
do { do {
if (mlx4_en_prepare_rx_desc(priv, ring, if (mlx4_en_prepare_rx_desc(priv, ring,
ring->prod & ring->size_mask, ring->prod & ring->size_mask,
...@@ -554,9 +556,9 @@ static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, ...@@ -554,9 +556,9 @@ static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
__GFP_MEMALLOC)) __GFP_MEMALLOC))
break; break;
ring->prod++; ring->prod++;
} while (--missing); } while (likely(--missing));
return true; mlx4_en_update_rx_prod_db(ring);
} }
/* When hardware doesn't strip the vlan, we need to calculate the checksum /* When hardware doesn't strip the vlan, we need to calculate the checksum
...@@ -637,21 +639,14 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, ...@@ -637,21 +639,14 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
{ {
struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_en_dev *mdev = priv->mdev; int factor = priv->cqe_factor;
struct mlx4_cqe *cqe; struct mlx4_en_rx_ring *ring;
struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring];
struct mlx4_en_rx_alloc *frags;
struct bpf_prog *xdp_prog; struct bpf_prog *xdp_prog;
int doorbell_pending; int cq_ring = cq->ring;
struct sk_buff *skb; bool doorbell_pending;
int index; struct mlx4_cqe *cqe;
int nr;
unsigned int length;
int polled = 0; int polled = 0;
int ip_summed; int index;
int factor = priv->cqe_factor;
u64 timestamp;
bool l2_tunnel;
if (unlikely(!priv->port_up)) if (unlikely(!priv->port_up))
return 0; return 0;
...@@ -659,6 +654,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ...@@ -659,6 +654,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
if (unlikely(budget <= 0)) if (unlikely(budget <= 0))
return polled; return polled;
ring = priv->rx_ring[cq_ring];
/* Protect accesses to: ring->xdp_prog, priv->mac_hash list */ /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
rcu_read_lock(); rcu_read_lock();
xdp_prog = rcu_dereference(ring->xdp_prog); xdp_prog = rcu_dereference(ring->xdp_prog);
...@@ -673,10 +670,17 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ...@@ -673,10 +670,17 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
/* Process all completed CQEs */ /* Process all completed CQEs */
while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
cq->mcq.cons_index & cq->size)) { cq->mcq.cons_index & cq->size)) {
struct mlx4_en_rx_alloc *frags;
enum pkt_hash_types hash_type;
struct sk_buff *skb;
unsigned int length;
int ip_summed;
void *va; void *va;
int nr;
frags = ring->rx_info + (index << priv->log_rx_info); frags = ring->rx_info + (index << priv->log_rx_info);
va = page_address(frags[0].page) + frags[0].page_offset; va = page_address(frags[0].page) + frags[0].page_offset;
prefetchw(va);
/* /*
* make sure we read the CQE after we read the ownership bit * make sure we read the CQE after we read the ownership bit
*/ */
...@@ -768,7 +772,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ...@@ -768,7 +772,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
break; break;
case XDP_TX: case XDP_TX:
if (likely(!mlx4_en_xmit_frame(ring, frags, dev, if (likely(!mlx4_en_xmit_frame(ring, frags, dev,
length, cq->ring, length, cq_ring,
&doorbell_pending))) { &doorbell_pending))) {
frags[0].page = NULL; frags[0].page = NULL;
goto next; goto next;
...@@ -790,24 +794,27 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ...@@ -790,24 +794,27 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
ring->packets++; ring->packets++;
skb = napi_get_frags(&cq->napi); skb = napi_get_frags(&cq->napi);
if (!skb) if (unlikely(!skb))
goto next; goto next;
if (unlikely(ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL)) { if (unlikely(ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL)) {
timestamp = mlx4_en_get_cqe_ts(cqe); u64 timestamp = mlx4_en_get_cqe_ts(cqe);
mlx4_en_fill_hwtstamps(mdev, skb_hwtstamps(skb),
mlx4_en_fill_hwtstamps(priv->mdev, skb_hwtstamps(skb),
timestamp); timestamp);
} }
skb_record_rx_queue(skb, cq->ring); skb_record_rx_queue(skb, cq_ring);
if (likely(dev->features & NETIF_F_RXCSUM)) { if (likely(dev->features & NETIF_F_RXCSUM)) {
if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
MLX4_CQE_STATUS_UDP)) { MLX4_CQE_STATUS_UDP)) {
if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
cqe->checksum == cpu_to_be16(0xffff)) { cqe->checksum == cpu_to_be16(0xffff)) {
ip_summed = CHECKSUM_UNNECESSARY; bool l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
(cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
ip_summed = CHECKSUM_UNNECESSARY;
hash_type = PKT_HASH_TYPE_L4;
if (l2_tunnel) if (l2_tunnel)
skb->csum_level = 1; skb->csum_level = 1;
ring->csum_ok++; ring->csum_ok++;
...@@ -822,6 +829,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ...@@ -822,6 +829,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
goto csum_none; goto csum_none;
} else { } else {
ip_summed = CHECKSUM_COMPLETE; ip_summed = CHECKSUM_COMPLETE;
hash_type = PKT_HASH_TYPE_L3;
ring->csum_complete++; ring->csum_complete++;
} }
} else { } else {
...@@ -831,16 +839,14 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ...@@ -831,16 +839,14 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
} else { } else {
csum_none: csum_none:
ip_summed = CHECKSUM_NONE; ip_summed = CHECKSUM_NONE;
hash_type = PKT_HASH_TYPE_L3;
ring->csum_none++; ring->csum_none++;
} }
skb->ip_summed = ip_summed; skb->ip_summed = ip_summed;
if (dev->features & NETIF_F_RXHASH) if (dev->features & NETIF_F_RXHASH)
skb_set_hash(skb, skb_set_hash(skb,
be32_to_cpu(cqe->immed_rss_invalid), be32_to_cpu(cqe->immed_rss_invalid),
(ip_summed == CHECKSUM_UNNECESSARY) ? hash_type);
PKT_HASH_TYPE_L4 :
PKT_HASH_TYPE_L3);
if ((cqe->vlan_my_qpn & if ((cqe->vlan_my_qpn &
cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK)) && cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK)) &&
...@@ -867,15 +873,17 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ...@@ -867,15 +873,17 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
++cq->mcq.cons_index; ++cq->mcq.cons_index;
index = (cq->mcq.cons_index) & ring->size_mask; index = (cq->mcq.cons_index) & ring->size_mask;
cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor; cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor;
if (++polled == budget) if (unlikely(++polled == budget))
break; break;
} }
rcu_read_unlock(); rcu_read_unlock();
if (polled) { if (likely(polled)) {
if (doorbell_pending) if (doorbell_pending) {
mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]); priv->tx_cq[TX_XDP][cq_ring]->xdp_busy = true;
mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq_ring]);
}
mlx4_cq_set_ci(&cq->mcq); mlx4_cq_set_ci(&cq->mcq);
wmb(); /* ensure HW sees CQ consumer before we post new buffers */ wmb(); /* ensure HW sees CQ consumer before we post new buffers */
...@@ -883,8 +891,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud ...@@ -883,8 +891,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
} }
AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
if (mlx4_en_refill_rx_buffers(priv, ring)) mlx4_en_refill_rx_buffers(priv, ring);
mlx4_en_update_rx_prod_db(ring);
return polled; return polled;
} }
...@@ -907,16 +914,30 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) ...@@ -907,16 +914,30 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
struct net_device *dev = cq->dev; struct net_device *dev = cq->dev;
struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_en_cq *xdp_tx_cq = NULL;
bool clean_complete = true;
int done; int done;
if (priv->tx_ring_num[TX_XDP]) {
xdp_tx_cq = priv->tx_cq[TX_XDP][cq->ring];
if (xdp_tx_cq->xdp_busy) {
clean_complete = mlx4_en_process_tx_cq(dev, xdp_tx_cq,
budget);
xdp_tx_cq->xdp_busy = !clean_complete;
}
}
done = mlx4_en_process_rx_cq(dev, cq, budget); done = mlx4_en_process_rx_cq(dev, cq, budget);
/* If we used up all the quota - we're probably not done yet... */ /* If we used up all the quota - we're probably not done yet... */
if (done == budget) { if (done == budget || !clean_complete) {
const struct cpumask *aff; const struct cpumask *aff;
struct irq_data *idata; struct irq_data *idata;
int cpu_curr; int cpu_curr;
/* in case we got here because of !clean_complete */
done = budget;
INC_PERF_COUNTER(priv->pstats.napi_quota); INC_PERF_COUNTER(priv->pstats.napi_quota);
cpu_curr = smp_processor_id(); cpu_curr = smp_processor_id();
...@@ -936,7 +957,7 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) ...@@ -936,7 +957,7 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
done--; done--;
} }
/* Done for now */ /* Done for now */
if (napi_complete_done(napi, done)) if (likely(napi_complete_done(napi, done)))
mlx4_en_arm_cq(priv, cq); mlx4_en_arm_cq(priv, cq);
return done; return done;
} }
...@@ -1099,11 +1120,14 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) ...@@ -1099,11 +1120,14 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
int i, qpn; int i, qpn;
int err = 0; int err = 0;
int good_qps = 0; int good_qps = 0;
u8 flags;
en_dbg(DRV, priv, "Configuring rss steering\n"); en_dbg(DRV, priv, "Configuring rss steering\n");
flags = priv->rx_ring_num == 1 ? MLX4_RESERVE_A0_QP : 0;
err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num, err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num,
priv->rx_ring_num, priv->rx_ring_num,
&rss_map->base_qpn, 0); &rss_map->base_qpn, flags);
if (err) { if (err) {
en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num); en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num);
return err; return err;
...@@ -1120,13 +1144,28 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) ...@@ -1120,13 +1144,28 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
++good_qps; ++good_qps;
} }
if (priv->rx_ring_num == 1) {
rss_map->indir_qp = &rss_map->qps[0];
priv->base_qpn = rss_map->indir_qp->qpn;
en_info(priv, "Optimized Non-RSS steering\n");
return 0;
}
rss_map->indir_qp = kzalloc(sizeof(*rss_map->indir_qp), GFP_KERNEL);
if (!rss_map->indir_qp) {
err = -ENOMEM;
goto rss_err;
}
/* Configure RSS indirection qp */ /* Configure RSS indirection qp */
err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp, GFP_KERNEL); err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, rss_map->indir_qp,
GFP_KERNEL);
if (err) { if (err) {
en_err(priv, "Failed to allocate RSS indirection QP\n"); en_err(priv, "Failed to allocate RSS indirection QP\n");
goto rss_err; goto rss_err;
} }
rss_map->indir_qp.event = mlx4_en_sqp_event;
rss_map->indir_qp->event = mlx4_en_sqp_event;
mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn, mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn,
priv->rx_ring[0]->cqn, -1, &context); priv->rx_ring[0]->cqn, -1, &context);
...@@ -1164,8 +1203,9 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) ...@@ -1164,8 +1203,9 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
err = -EINVAL; err = -EINVAL;
goto indir_err; goto indir_err;
} }
err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context, err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context,
&rss_map->indir_qp, &rss_map->indir_state); rss_map->indir_qp, &rss_map->indir_state);
if (err) if (err)
goto indir_err; goto indir_err;
...@@ -1173,9 +1213,11 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) ...@@ -1173,9 +1213,11 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
indir_err: indir_err:
mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); MLX4_QP_STATE_RST, NULL, 0, 0, rss_map->indir_qp);
mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); mlx4_qp_remove(mdev->dev, rss_map->indir_qp);
mlx4_qp_free(mdev->dev, &rss_map->indir_qp); mlx4_qp_free(mdev->dev, rss_map->indir_qp);
kfree(rss_map->indir_qp);
rss_map->indir_qp = NULL;
rss_err: rss_err:
for (i = 0; i < good_qps; i++) { for (i = 0; i < good_qps; i++) {
mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
...@@ -1193,10 +1235,15 @@ void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv) ...@@ -1193,10 +1235,15 @@ void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv)
struct mlx4_en_rss_map *rss_map = &priv->rss_map; struct mlx4_en_rss_map *rss_map = &priv->rss_map;
int i; int i;
mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state, if (priv->rx_ring_num > 1) {
MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp); mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
mlx4_qp_remove(mdev->dev, &rss_map->indir_qp); MLX4_QP_STATE_RST, NULL, 0, 0,
mlx4_qp_free(mdev->dev, &rss_map->indir_qp); rss_map->indir_qp);
mlx4_qp_remove(mdev->dev, rss_map->indir_qp);
mlx4_qp_free(mdev->dev, rss_map->indir_qp);
kfree(rss_map->indir_qp);
rss_map->indir_qp = NULL;
}
for (i = 0; i < priv->rx_ring_num; i++) { for (i = 0; i < priv->rx_ring_num; i++) {
mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i], mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
......
...@@ -234,23 +234,24 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, ...@@ -234,23 +234,24 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
u8 owner) u8 owner)
{ {
__be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT)); __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT));
struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; struct mlx4_en_tx_desc *tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
void *end = ring->buf + ring->buf_size; void *end = ring->buf + ring->buf_size;
__be32 *ptr = (__be32 *)tx_desc; __be32 *ptr = (__be32 *)tx_desc;
int i; int i;
/* Optimize the common case when there are no wraparounds */ /* Optimize the common case when there are no wraparounds */
if (likely((void *)tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { if (likely((void *)tx_desc +
(tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) {
/* Stamp the freed descriptor */ /* Stamp the freed descriptor */
for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; for (i = 0; i < tx_info->nr_txbb << LOG_TXBB_SIZE;
i += STAMP_STRIDE) { i += STAMP_STRIDE) {
*ptr = stamp; *ptr = stamp;
ptr += STAMP_DWORDS; ptr += STAMP_DWORDS;
} }
} else { } else {
/* Stamp the freed descriptor */ /* Stamp the freed descriptor */
for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; for (i = 0; i < tx_info->nr_txbb << LOG_TXBB_SIZE;
i += STAMP_STRIDE) { i += STAMP_STRIDE) {
*ptr = stamp; *ptr = stamp;
ptr += STAMP_DWORDS; ptr += STAMP_DWORDS;
...@@ -265,11 +266,11 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, ...@@ -265,11 +266,11 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring, struct mlx4_en_tx_ring *ring,
int index, u8 owner, u64 timestamp, int index, u64 timestamp,
int napi_mode) int napi_mode)
{ {
struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; struct mlx4_en_tx_desc *tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset; struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset;
void *end = ring->buf + ring->buf_size; void *end = ring->buf + ring->buf_size;
struct sk_buff *skb = tx_info->skb; struct sk_buff *skb = tx_info->skb;
...@@ -288,19 +289,20 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, ...@@ -288,19 +289,20 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
skb_tstamp_tx(skb, &hwts); skb_tstamp_tx(skb, &hwts);
} }
/* Optimize the common case when there are no wraparounds */ if (!tx_info->inl) {
if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { if (tx_info->linear)
if (!tx_info->inl) { dma_unmap_single(priv->ddev,
if (tx_info->linear) tx_info->map0_dma,
dma_unmap_single(priv->ddev, tx_info->map0_byte_count,
tx_info->map0_dma, PCI_DMA_TODEVICE);
tx_info->map0_byte_count, else
PCI_DMA_TODEVICE); dma_unmap_page(priv->ddev,
else tx_info->map0_dma,
dma_unmap_page(priv->ddev, tx_info->map0_byte_count,
tx_info->map0_dma, PCI_DMA_TODEVICE);
tx_info->map0_byte_count, /* Optimize the common case when there are no wraparounds */
PCI_DMA_TODEVICE); if (likely((void *)tx_desc +
(tx_info->nr_txbb << LOG_TXBB_SIZE) <= end)) {
for (i = 1; i < nr_maps; i++) { for (i = 1; i < nr_maps; i++) {
data++; data++;
dma_unmap_page(priv->ddev, dma_unmap_page(priv->ddev,
...@@ -308,23 +310,10 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, ...@@ -308,23 +310,10 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
be32_to_cpu(data->byte_count), be32_to_cpu(data->byte_count),
PCI_DMA_TODEVICE); PCI_DMA_TODEVICE);
} }
} } else {
} else { if ((void *)data >= end)
if (!tx_info->inl) {
if ((void *) data >= end) {
data = ring->buf + ((void *)data - end); data = ring->buf + ((void *)data - end);
}
if (tx_info->linear)
dma_unmap_single(priv->ddev,
tx_info->map0_dma,
tx_info->map0_byte_count,
PCI_DMA_TODEVICE);
else
dma_unmap_page(priv->ddev,
tx_info->map0_dma,
tx_info->map0_byte_count,
PCI_DMA_TODEVICE);
for (i = 1; i < nr_maps; i++) { for (i = 1; i < nr_maps; i++) {
data++; data++;
/* Check for wraparound before unmapping */ /* Check for wraparound before unmapping */
...@@ -344,7 +333,7 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, ...@@ -344,7 +333,7 @@ u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring, struct mlx4_en_tx_ring *ring,
int index, u8 owner, u64 timestamp, int index, u64 timestamp,
int napi_mode) int napi_mode)
{ {
struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
...@@ -381,8 +370,7 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) ...@@ -381,8 +370,7 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
while (ring->cons != ring->prod) { while (ring->cons != ring->prod) {
ring->last_nr_txbb = ring->free_tx_desc(priv, ring, ring->last_nr_txbb = ring->free_tx_desc(priv, ring,
ring->cons & ring->size_mask, ring->cons & ring->size_mask,
!!(ring->cons & ring->size), 0, 0, 0 /* Non-NAPI caller */);
0 /* Non-NAPI caller */);
ring->cons += ring->last_nr_txbb; ring->cons += ring->last_nr_txbb;
cnt++; cnt++;
} }
...@@ -396,15 +384,14 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) ...@@ -396,15 +384,14 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
return cnt; return cnt;
} }
static bool mlx4_en_process_tx_cq(struct net_device *dev, bool mlx4_en_process_tx_cq(struct net_device *dev,
struct mlx4_en_cq *cq, int napi_budget) struct mlx4_en_cq *cq, int napi_budget)
{ {
struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_cq *mcq = &cq->mcq; struct mlx4_cq *mcq = &cq->mcq;
struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring]; struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring];
struct mlx4_cqe *cqe; struct mlx4_cqe *cqe;
u16 index; u16 index, ring_index, stamp_index;
u16 new_index, ring_index, stamp_index;
u32 txbbs_skipped = 0; u32 txbbs_skipped = 0;
u32 txbbs_stamp = 0; u32 txbbs_stamp = 0;
u32 cons_index = mcq->cons_index; u32 cons_index = mcq->cons_index;
...@@ -419,7 +406,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, ...@@ -419,7 +406,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
u32 last_nr_txbb; u32 last_nr_txbb;
u32 ring_cons; u32 ring_cons;
if (!priv->port_up) if (unlikely(!priv->port_up))
return true; return true;
netdev_txq_bql_complete_prefetchw(ring->tx_queue); netdev_txq_bql_complete_prefetchw(ring->tx_queue);
...@@ -434,6 +421,8 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, ...@@ -434,6 +421,8 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
/* Process all completed CQEs */ /* Process all completed CQEs */
while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
cons_index & size) && (done < budget)) { cons_index & size) && (done < budget)) {
u16 new_index;
/* /*
* make sure we read the CQE after we read the * make sure we read the CQE after we read the
* ownership bit * ownership bit
...@@ -464,8 +453,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, ...@@ -464,8 +453,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
/* free next descriptor */ /* free next descriptor */
last_nr_txbb = ring->free_tx_desc( last_nr_txbb = ring->free_tx_desc(
priv, ring, ring_index, priv, ring, ring_index,
!!((ring_cons + txbbs_skipped) & timestamp, napi_budget);
ring->size), timestamp, napi_budget);
mlx4_en_stamp_wqe(priv, ring, stamp_index, mlx4_en_stamp_wqe(priv, ring, stamp_index,
!!((ring_cons + txbbs_stamp) & !!((ring_cons + txbbs_stamp) &
...@@ -481,7 +469,6 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, ...@@ -481,7 +469,6 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor; cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
} }
/* /*
* To prevent CQ overflow we first update CQ consumer and only then * To prevent CQ overflow we first update CQ consumer and only then
* the ring consumer. * the ring consumer.
...@@ -494,7 +481,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, ...@@ -494,7 +481,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb; ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb;
ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped; ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped;
if (ring->free_tx_desc == mlx4_en_recycle_tx_desc) if (cq->type == TX_XDP)
return done < budget; return done < budget;
netdev_tx_completed_queue(ring->tx_queue, packets, bytes); netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
...@@ -506,6 +493,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, ...@@ -506,6 +493,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
netif_tx_wake_queue(ring->tx_queue); netif_tx_wake_queue(ring->tx_queue);
ring->wake_queue++; ring->wake_queue++;
} }
return done < budget; return done < budget;
} }
...@@ -526,7 +514,7 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget) ...@@ -526,7 +514,7 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget)
struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
struct net_device *dev = cq->dev; struct net_device *dev = cq->dev;
struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_priv *priv = netdev_priv(dev);
int clean_complete; bool clean_complete;
clean_complete = mlx4_en_process_tx_cq(dev, cq, budget); clean_complete = mlx4_en_process_tx_cq(dev, cq, budget);
if (!clean_complete) if (!clean_complete)
...@@ -543,7 +531,7 @@ static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, ...@@ -543,7 +531,7 @@ static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv,
u32 index, u32 index,
unsigned int desc_size) unsigned int desc_size)
{ {
u32 copy = (ring->size - index) * TXBB_SIZE; u32 copy = (ring->size - index) << LOG_TXBB_SIZE;
int i; int i;
for (i = desc_size - copy - 4; i >= 0; i -= 4) { for (i = desc_size - copy - 4; i >= 0; i -= 4) {
...@@ -558,12 +546,12 @@ static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, ...@@ -558,12 +546,12 @@ static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv,
if ((i & (TXBB_SIZE - 1)) == 0) if ((i & (TXBB_SIZE - 1)) == 0)
wmb(); wmb();
*((u32 *) (ring->buf + index * TXBB_SIZE + i)) = *((u32 *)(ring->buf + (index << LOG_TXBB_SIZE) + i)) =
*((u32 *) (ring->bounce_buf + i)); *((u32 *) (ring->bounce_buf + i));
} }
/* Return real descriptor location */ /* Return real descriptor location */
return ring->buf + index * TXBB_SIZE; return ring->buf + (index << LOG_TXBB_SIZE);
} }
/* Decide if skb can be inlined in tx descriptor to avoid dma mapping /* Decide if skb can be inlined in tx descriptor to avoid dma mapping
...@@ -775,37 +763,101 @@ static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring, ...@@ -775,37 +763,101 @@ static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring,
} }
} }
static bool mlx4_en_build_dma_wqe(struct mlx4_en_priv *priv,
struct skb_shared_info *shinfo,
struct mlx4_wqe_data_seg *data,
struct sk_buff *skb,
int lso_header_size,
__be32 mr_key,
struct mlx4_en_tx_info *tx_info)
{
struct device *ddev = priv->ddev;
dma_addr_t dma = 0;
u32 byte_count = 0;
int i_frag;
/* Map fragments if any */
for (i_frag = shinfo->nr_frags - 1; i_frag >= 0; i_frag--) {
const struct skb_frag_struct *frag;
frag = &shinfo->frags[i_frag];
byte_count = skb_frag_size(frag);
dma = skb_frag_dma_map(ddev, frag,
0, byte_count,
DMA_TO_DEVICE);
if (dma_mapping_error(ddev, dma))
goto tx_drop_unmap;
data->addr = cpu_to_be64(dma);
data->lkey = mr_key;
dma_wmb();
data->byte_count = cpu_to_be32(byte_count);
--data;
}
/* Map linear part if needed */
if (tx_info->linear) {
byte_count = skb_headlen(skb) - lso_header_size;
dma = dma_map_single(ddev, skb->data +
lso_header_size, byte_count,
PCI_DMA_TODEVICE);
if (dma_mapping_error(ddev, dma))
goto tx_drop_unmap;
data->addr = cpu_to_be64(dma);
data->lkey = mr_key;
dma_wmb();
data->byte_count = cpu_to_be32(byte_count);
}
/* tx completion can avoid cache line miss for common cases */
tx_info->map0_dma = dma;
tx_info->map0_byte_count = byte_count;
return true;
tx_drop_unmap:
en_err(priv, "DMA mapping error\n");
while (++i_frag < shinfo->nr_frags) {
++data;
dma_unmap_page(ddev, (dma_addr_t)be64_to_cpu(data->addr),
be32_to_cpu(data->byte_count),
PCI_DMA_TODEVICE);
}
return false;
}
netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
{ {
struct skb_shared_info *shinfo = skb_shinfo(skb); struct skb_shared_info *shinfo = skb_shinfo(skb);
struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_priv *priv = netdev_priv(dev);
union mlx4_wqe_qpn_vlan qpn_vlan = {}; union mlx4_wqe_qpn_vlan qpn_vlan = {};
struct device *ddev = priv->ddev;
struct mlx4_en_tx_ring *ring; struct mlx4_en_tx_ring *ring;
struct mlx4_en_tx_desc *tx_desc; struct mlx4_en_tx_desc *tx_desc;
struct mlx4_wqe_data_seg *data; struct mlx4_wqe_data_seg *data;
struct mlx4_en_tx_info *tx_info; struct mlx4_en_tx_info *tx_info;
int tx_ind = 0; int tx_ind;
int nr_txbb; int nr_txbb;
int desc_size; int desc_size;
int real_size; int real_size;
u32 index, bf_index; u32 index, bf_index;
__be32 op_own; __be32 op_own;
u16 vlan_proto = 0;
int i_frag;
int lso_header_size; int lso_header_size;
void *fragptr = NULL; void *fragptr = NULL;
bool bounce = false; bool bounce = false;
bool send_doorbell; bool send_doorbell;
bool stop_queue; bool stop_queue;
bool inline_ok; bool inline_ok;
u8 data_offset;
u32 ring_cons; u32 ring_cons;
bool bf_ok; bool bf_ok;
tx_ind = skb_get_queue_mapping(skb); tx_ind = skb_get_queue_mapping(skb);
ring = priv->tx_ring[TX][tx_ind]; ring = priv->tx_ring[TX][tx_ind];
if (!priv->port_up) if (unlikely(!priv->port_up))
goto tx_drop; goto tx_drop;
/* fetch ring->cons far ahead before needing it to avoid stall */ /* fetch ring->cons far ahead before needing it to avoid stall */
...@@ -818,7 +870,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -818,7 +870,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
/* Align descriptor to TXBB size */ /* Align descriptor to TXBB size */
desc_size = ALIGN(real_size, TXBB_SIZE); desc_size = ALIGN(real_size, TXBB_SIZE);
nr_txbb = desc_size / TXBB_SIZE; nr_txbb = desc_size >> LOG_TXBB_SIZE;
if (unlikely(nr_txbb > MAX_DESC_TXBBS)) { if (unlikely(nr_txbb > MAX_DESC_TXBBS)) {
if (netif_msg_tx_err(priv)) if (netif_msg_tx_err(priv))
en_warn(priv, "Oversized header or SG list\n"); en_warn(priv, "Oversized header or SG list\n");
...@@ -827,6 +879,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -827,6 +879,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
bf_ok = ring->bf_enabled; bf_ok = ring->bf_enabled;
if (skb_vlan_tag_present(skb)) { if (skb_vlan_tag_present(skb)) {
u16 vlan_proto;
qpn_vlan.vlan_tag = cpu_to_be16(skb_vlan_tag_get(skb)); qpn_vlan.vlan_tag = cpu_to_be16(skb_vlan_tag_get(skb));
vlan_proto = be16_to_cpu(skb->vlan_proto); vlan_proto = be16_to_cpu(skb->vlan_proto);
if (vlan_proto == ETH_P_8021AD) if (vlan_proto == ETH_P_8021AD)
...@@ -851,7 +905,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -851,7 +905,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
/* See if we have enough space for whole descriptor TXBB for setting /* See if we have enough space for whole descriptor TXBB for setting
* SW ownership on next descriptor; if not, use a bounce buffer. */ * SW ownership on next descriptor; if not, use a bounce buffer. */
if (likely(index + nr_txbb <= ring->size)) if (likely(index + nr_txbb <= ring->size))
tx_desc = ring->buf + index * TXBB_SIZE; tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
else { else {
tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf; tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf;
bounce = true; bounce = true;
...@@ -863,64 +917,31 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -863,64 +917,31 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
tx_info->skb = skb; tx_info->skb = skb;
tx_info->nr_txbb = nr_txbb; tx_info->nr_txbb = nr_txbb;
data = &tx_desc->data; if (!lso_header_size) {
if (lso_header_size) data = &tx_desc->data;
data = ((void *)&tx_desc->lso + ALIGN(lso_header_size + 4, data_offset = offsetof(struct mlx4_en_tx_desc, data);
DS_SIZE)); } else {
int lso_align = ALIGN(lso_header_size + 4, DS_SIZE);
data = (void *)&tx_desc->lso + lso_align;
data_offset = offsetof(struct mlx4_en_tx_desc, lso) + lso_align;
}
/* valid only for none inline segments */ /* valid only for none inline segments */
tx_info->data_offset = (void *)data - (void *)tx_desc; tx_info->data_offset = data_offset;
tx_info->inl = inline_ok; tx_info->inl = inline_ok;
tx_info->linear = (lso_header_size < skb_headlen(skb) && tx_info->linear = lso_header_size < skb_headlen(skb) && !inline_ok;
!inline_ok) ? 1 : 0;
tx_info->nr_maps = shinfo->nr_frags + tx_info->linear; tx_info->nr_maps = shinfo->nr_frags + tx_info->linear;
data += tx_info->nr_maps - 1; data += tx_info->nr_maps - 1;
if (!tx_info->inl) { if (!tx_info->inl)
dma_addr_t dma = 0; if (!mlx4_en_build_dma_wqe(priv, shinfo, data, skb,
u32 byte_count = 0; lso_header_size, ring->mr_key,
tx_info))
/* Map fragments if any */ goto tx_drop_count;
for (i_frag = shinfo->nr_frags - 1; i_frag >= 0; i_frag--) {
const struct skb_frag_struct *frag;
frag = &shinfo->frags[i_frag];
byte_count = skb_frag_size(frag);
dma = skb_frag_dma_map(ddev, frag,
0, byte_count,
DMA_TO_DEVICE);
if (dma_mapping_error(ddev, dma))
goto tx_drop_unmap;
data->addr = cpu_to_be64(dma);
data->lkey = ring->mr_key;
dma_wmb();
data->byte_count = cpu_to_be32(byte_count);
--data;
}
/* Map linear part if needed */
if (tx_info->linear) {
byte_count = skb_headlen(skb) - lso_header_size;
dma = dma_map_single(ddev, skb->data +
lso_header_size, byte_count,
PCI_DMA_TODEVICE);
if (dma_mapping_error(ddev, dma))
goto tx_drop_unmap;
data->addr = cpu_to_be64(dma);
data->lkey = ring->mr_key;
dma_wmb();
data->byte_count = cpu_to_be32(byte_count);
}
/* tx completion can avoid cache line miss for common cases */
tx_info->map0_dma = dma;
tx_info->map0_byte_count = byte_count;
}
/* /*
* For timestamping add flag to skb_shinfo and * For timestamping add flag to skb_shinfo and
...@@ -1056,16 +1077,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -1056,16 +1077,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
} }
return NETDEV_TX_OK; return NETDEV_TX_OK;
tx_drop_unmap:
en_err(priv, "DMA mapping error\n");
while (++i_frag < shinfo->nr_frags) {
++data;
dma_unmap_page(ddev, (dma_addr_t) be64_to_cpu(data->addr),
be32_to_cpu(data->byte_count),
PCI_DMA_TODEVICE);
}
tx_drop_count: tx_drop_count:
ring->tx_dropped++; ring->tx_dropped++;
tx_drop: tx_drop:
...@@ -1073,52 +1084,41 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -1073,52 +1084,41 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
#define MLX4_EN_XDP_TX_NRTXBB 1
#define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \
/ 16) & 0x3f)
netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
struct mlx4_en_rx_alloc *frame, struct mlx4_en_rx_alloc *frame,
struct net_device *dev, unsigned int length, struct net_device *dev, unsigned int length,
int tx_ind, int *doorbell_pending) int tx_ind, bool *doorbell_pending)
{ {
struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_priv *priv = netdev_priv(dev);
union mlx4_wqe_qpn_vlan qpn_vlan = {}; union mlx4_wqe_qpn_vlan qpn_vlan = {};
struct mlx4_en_tx_ring *ring;
struct mlx4_en_tx_desc *tx_desc; struct mlx4_en_tx_desc *tx_desc;
struct mlx4_wqe_data_seg *data;
struct mlx4_en_tx_info *tx_info; struct mlx4_en_tx_info *tx_info;
int index, bf_index; struct mlx4_wqe_data_seg *data;
bool send_doorbell; struct mlx4_en_tx_ring *ring;
int nr_txbb = 1;
bool stop_queue;
dma_addr_t dma; dma_addr_t dma;
int real_size;
__be32 op_own; __be32 op_own;
u32 ring_cons; int index;
bool bf_ok;
BUILD_BUG_ON_MSG(ALIGN(CTRL_SIZE + DS_SIZE, TXBB_SIZE) != TXBB_SIZE, if (unlikely(!priv->port_up))
"mlx4_en_xmit_frame requires minimum size tx desc"); goto tx_drop;
ring = priv->tx_ring[TX_XDP][tx_ind]; ring = priv->tx_ring[TX_XDP][tx_ind];
if (!priv->port_up) if (unlikely(mlx4_en_is_tx_ring_full(ring)))
goto tx_drop;
if (mlx4_en_is_tx_ring_full(ring))
goto tx_drop_count; goto tx_drop_count;
/* fetch ring->cons far ahead before needing it to avoid stall */
ring_cons = READ_ONCE(ring->cons);
index = ring->prod & ring->size_mask; index = ring->prod & ring->size_mask;
tx_info = &ring->tx_info[index]; tx_info = &ring->tx_info[index];
bf_ok = ring->bf_enabled;
/* Track current inflight packets for performance analysis */ /* Track current inflight packets for performance analysis */
AVG_PERF_COUNTER(priv->pstats.inflight_avg, AVG_PERF_COUNTER(priv->pstats.inflight_avg,
(u32)(ring->prod - ring_cons - 1)); (u32)(ring->prod - READ_ONCE(ring->cons) - 1));
bf_index = ring->prod; tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
tx_desc = ring->buf + index * TXBB_SIZE;
data = &tx_desc->data; data = &tx_desc->data;
dma = frame->dma; dma = frame->dma;
...@@ -1127,9 +1127,9 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, ...@@ -1127,9 +1127,9 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
frame->page = NULL; frame->page = NULL;
tx_info->map0_dma = dma; tx_info->map0_dma = dma;
tx_info->map0_byte_count = PAGE_SIZE; tx_info->map0_byte_count = PAGE_SIZE;
tx_info->nr_txbb = nr_txbb; tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN); tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
tx_info->data_offset = (void *)data - (void *)tx_desc; tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
tx_info->ts_requested = 0; tx_info->ts_requested = 0;
tx_info->nr_maps = 1; tx_info->nr_maps = 1;
tx_info->linear = 1; tx_info->linear = 1;
...@@ -1153,28 +1153,19 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, ...@@ -1153,28 +1153,19 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
rx_ring->xdp_tx++; rx_ring->xdp_tx++;
AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length); AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length);
ring->prod += nr_txbb; ring->prod += MLX4_EN_XDP_TX_NRTXBB;
stop_queue = mlx4_en_is_tx_ring_full(ring);
send_doorbell = stop_queue ||
*doorbell_pending > MLX4_EN_DOORBELL_BUDGET;
bf_ok &= send_doorbell;
real_size = ((CTRL_SIZE + nr_txbb * DS_SIZE) / 16) & 0x3f; qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
if (bf_ok) mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, 0,
qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size); op_own, false, false);
else *doorbell_pending = true;
qpn_vlan.fence_size = real_size;
mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, bf_index,
op_own, bf_ok, send_doorbell);
*doorbell_pending = send_doorbell ? 0 : *doorbell_pending + 1;
return NETDEV_TX_OK; return NETDEV_TX_OK;
tx_drop_count: tx_drop_count:
rx_ring->xdp_tx_full++; rx_ring->xdp_tx_full++;
*doorbell_pending = true;
tx_drop: tx_drop:
return NETDEV_TX_BUSY; return NETDEV_TX_BUSY;
} }
...@@ -2356,8 +2356,8 @@ static int mlx4_init_hca(struct mlx4_dev *dev) ...@@ -2356,8 +2356,8 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
MLX4_A0_STEERING_TABLE_SIZE; MLX4_A0_STEERING_TABLE_SIZE;
} }
mlx4_dbg(dev, "DMFS high rate steer mode is: %s\n", mlx4_info(dev, "DMFS high rate steer mode is: %s\n",
dmfs_high_rate_steering_mode_str( dmfs_high_rate_steering_mode_str(
dev->caps.dmfs_high_steer_mode)); dev->caps.dmfs_high_steer_mode));
} }
} else { } else {
......
...@@ -72,7 +72,8 @@ ...@@ -72,7 +72,8 @@
#define DEF_RX_RINGS 16 #define DEF_RX_RINGS 16
#define MAX_RX_RINGS 128 #define MAX_RX_RINGS 128
#define MIN_RX_RINGS 4 #define MIN_RX_RINGS 4
#define TXBB_SIZE 64 #define LOG_TXBB_SIZE 6
#define TXBB_SIZE BIT(LOG_TXBB_SIZE)
#define HEADROOM (2048 / TXBB_SIZE + 1) #define HEADROOM (2048 / TXBB_SIZE + 1)
#define STAMP_STRIDE 64 #define STAMP_STRIDE 64
#define STAMP_DWORDS (STAMP_STRIDE / 4) #define STAMP_DWORDS (STAMP_STRIDE / 4)
...@@ -115,13 +116,12 @@ ...@@ -115,13 +116,12 @@
#define MLX4_EN_MIN_TX_RING_P_UP 1 #define MLX4_EN_MIN_TX_RING_P_UP 1
#define MLX4_EN_MAX_TX_RING_P_UP 32 #define MLX4_EN_MAX_TX_RING_P_UP 32
#define MLX4_EN_NUM_UP 8 #define MLX4_EN_NUM_UP 8
#define MLX4_EN_DEF_TX_RING_SIZE 512
#define MLX4_EN_DEF_RX_RING_SIZE 1024 #define MLX4_EN_DEF_RX_RING_SIZE 1024
#define MLX4_EN_DEF_TX_RING_SIZE MLX4_EN_DEF_RX_RING_SIZE
#define MAX_TX_RINGS (MLX4_EN_MAX_TX_RING_P_UP * \ #define MAX_TX_RINGS (MLX4_EN_MAX_TX_RING_P_UP * \
MLX4_EN_NUM_UP) MLX4_EN_NUM_UP)
#define MLX4_EN_DEFAULT_TX_WORK 256 #define MLX4_EN_DEFAULT_TX_WORK 256
#define MLX4_EN_DOORBELL_BUDGET 8
/* Target number of packets to coalesce with interrupt moderation */ /* Target number of packets to coalesce with interrupt moderation */
#define MLX4_EN_RX_COAL_TARGET 44 #define MLX4_EN_RX_COAL_TARGET 44
...@@ -276,7 +276,7 @@ struct mlx4_en_tx_ring { ...@@ -276,7 +276,7 @@ struct mlx4_en_tx_ring {
struct netdev_queue *tx_queue; struct netdev_queue *tx_queue;
u32 (*free_tx_desc)(struct mlx4_en_priv *priv, u32 (*free_tx_desc)(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring, struct mlx4_en_tx_ring *ring,
int index, u8 owner, int index,
u64 timestamp, int napi_mode); u64 timestamp, int napi_mode);
struct mlx4_en_rx_ring *recycle_ring; struct mlx4_en_rx_ring *recycle_ring;
...@@ -359,7 +359,10 @@ struct mlx4_en_cq { ...@@ -359,7 +359,10 @@ struct mlx4_en_cq {
struct mlx4_hwq_resources wqres; struct mlx4_hwq_resources wqres;
int ring; int ring;
struct net_device *dev; struct net_device *dev;
struct napi_struct napi; union {
struct napi_struct napi;
bool xdp_busy;
};
int size; int size;
int buf_size; int buf_size;
int vector; int vector;
...@@ -431,7 +434,7 @@ struct mlx4_en_rss_map { ...@@ -431,7 +434,7 @@ struct mlx4_en_rss_map {
int base_qpn; int base_qpn;
struct mlx4_qp qps[MAX_RX_RINGS]; struct mlx4_qp qps[MAX_RX_RINGS];
enum mlx4_qp_state state[MAX_RX_RINGS]; enum mlx4_qp_state state[MAX_RX_RINGS];
struct mlx4_qp indir_qp; struct mlx4_qp *indir_qp;
enum mlx4_qp_state indir_state; enum mlx4_qp_state indir_state;
}; };
...@@ -689,7 +692,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); ...@@ -689,7 +692,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
struct mlx4_en_rx_alloc *frame, struct mlx4_en_rx_alloc *frame,
struct net_device *dev, unsigned int length, struct net_device *dev, unsigned int length,
int tx_ind, int *doorbell_pending); int tx_ind, bool *doorbell_pending);
void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring); void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring);
bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
struct mlx4_en_rx_alloc *frame); struct mlx4_en_rx_alloc *frame);
...@@ -721,13 +724,15 @@ int mlx4_en_process_rx_cq(struct net_device *dev, ...@@ -721,13 +724,15 @@ int mlx4_en_process_rx_cq(struct net_device *dev,
int budget); int budget);
int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget); int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget);
int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget); int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget);
bool mlx4_en_process_tx_cq(struct net_device *dev,
struct mlx4_en_cq *cq, int napi_budget);
u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring, struct mlx4_en_tx_ring *ring,
int index, u8 owner, u64 timestamp, int index, u64 timestamp,
int napi_mode); int napi_mode);
u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring, struct mlx4_en_tx_ring *ring,
int index, u8 owner, u64 timestamp, int index, u64 timestamp,
int napi_mode); int napi_mode);
void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
int is_tx, int rss, int qpn, int cqn, int user_prio, int is_tx, int rss, int qpn, int cqn, int user_prio,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment