Commit b3226184 authored by Roland Dreier's avatar Roland Dreier

IB/mlx4: Micro-optimize mlx4_ib_poll_one()

Rather than byte-swapping cqe->g_mlpath_rqpn each time we extract a
field from it, byte-swap it once into a temporary variable.  This 
results in smaller, better code -- eg, on 32-bit x86:

add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-5 (-5)
function                                     old     new   delta
mlx4_ib_poll_cq                             1188    1183      -5
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent e57895d3
...@@ -313,6 +313,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, ...@@ -313,6 +313,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
struct mlx4_ib_srq *srq; struct mlx4_ib_srq *srq;
int is_send; int is_send;
int is_error; int is_error;
u32 g_mlpath_rqpn;
u16 wqe_ctr; u16 wqe_ctr;
cqe = next_cqe_sw(cq); cqe = next_cqe_sw(cq);
...@@ -426,10 +427,10 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, ...@@ -426,10 +427,10 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
wc->slid = be16_to_cpu(cqe->rlid); wc->slid = be16_to_cpu(cqe->rlid);
wc->sl = cqe->sl >> 4; wc->sl = cqe->sl >> 4;
wc->src_qp = be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff; g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
wc->dlid_path_bits = (be32_to_cpu(cqe->g_mlpath_rqpn) >> 24) & 0x7f; wc->src_qp = g_mlpath_rqpn & 0xffffff;
wc->wc_flags |= be32_to_cpu(cqe->g_mlpath_rqpn) & 0x80000000 ? wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
IB_WC_GRH : 0; wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment