Commit 00a59d30 authored by Xi Wang's avatar Xi Wang Committed by Jason Gunthorpe

RDMA/hns: Optimize wqe buffer filling process for post send

Encapsulates the wqe buffer process details for datagram seg, fast mr seg
and atomic seg.

Link: https://lore.kernel.org/r/1583839084-31579-3-git-send-email-liweihang@huawei.comSigned-off-by: default avatarXi Wang <wangxi11@huawei.com>
Signed-off-by: default avatarWeihang Li <liweihang@huawei.com>
Reviewed-by: default avatarLeon Romanovsky <leonro@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 6c6e3921
...@@ -57,10 +57,10 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, ...@@ -57,10 +57,10 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
} }
static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
struct hns_roce_wqe_frmr_seg *fseg, void *wqe, const struct ib_reg_wr *wr)
const struct ib_reg_wr *wr)
{ {
struct hns_roce_mr *mr = to_hr_mr(wr->mr); struct hns_roce_mr *mr = to_hr_mr(wr->mr);
struct hns_roce_wqe_frmr_seg *fseg = wqe;
/* use ib_access_flags */ /* use ib_access_flags */
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
...@@ -92,16 +92,26 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, ...@@ -92,16 +92,26 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0); V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
} }
static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg, static void set_atomic_seg(const struct ib_send_wr *wr, void *wqe,
const struct ib_atomic_wr *wr) struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
int valid_num_sge)
{ {
if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { struct hns_roce_wqe_atomic_seg *aseg;
aseg->fetchadd_swap_data = cpu_to_le64(wr->swap);
aseg->cmp_data = cpu_to_le64(wr->compare_add); set_data_seg_v2(wqe, wr->sg_list);
aseg = wqe + sizeof(struct hns_roce_v2_wqe_data_seg);
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
aseg->fetchadd_swap_data = cpu_to_le64(atomic_wr(wr)->swap);
aseg->cmp_data = cpu_to_le64(atomic_wr(wr)->compare_add);
} else { } else {
aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add); aseg->fetchadd_swap_data =
cpu_to_le64(atomic_wr(wr)->compare_add);
aseg->cmp_data = 0; aseg->cmp_data = 0;
} }
roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
} }
static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
...@@ -154,11 +164,11 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, ...@@ -154,11 +164,11 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
void *wqe, unsigned int *sge_ind, void *wqe, unsigned int *sge_ind,
int valid_num_sge, int valid_num_sge)
const struct ib_send_wr **bad_wr)
{ {
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_v2_wqe_data_seg *dseg = wqe; struct hns_roce_v2_wqe_data_seg *dseg = wqe;
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_qp *qp = to_hr_qp(ibqp); struct hns_roce_qp *qp = to_hr_qp(ibqp);
int j = 0; int j = 0;
int i; int i;
...@@ -166,15 +176,14 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, ...@@ -166,15 +176,14 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) { if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) {
if (le32_to_cpu(rc_sq_wqe->msg_len) > if (le32_to_cpu(rc_sq_wqe->msg_len) >
hr_dev->caps.max_sq_inline) { hr_dev->caps.max_sq_inline) {
*bad_wr = wr; ibdev_err(ibdev, "inline len(1-%d)=%d, illegal",
dev_err(hr_dev->dev, "inline len(1-%d)=%d, illegal", rc_sq_wqe->msg_len,
rc_sq_wqe->msg_len, hr_dev->caps.max_sq_inline); hr_dev->caps.max_sq_inline);
return -EINVAL; return -EINVAL;
} }
if (wr->opcode == IB_WR_RDMA_READ) { if (wr->opcode == IB_WR_RDMA_READ) {
*bad_wr = wr; ibdev_err(ibdev, "Not support inline data!\n");
dev_err(hr_dev->dev, "Not support inline data!\n");
return -EINVAL; return -EINVAL;
} }
...@@ -285,7 +294,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, ...@@ -285,7 +294,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
struct hns_roce_v2_ud_send_wqe *ud_sq_wqe; struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe; struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
struct hns_roce_qp *qp = to_hr_qp(ibqp); struct hns_roce_qp *qp = to_hr_qp(ibqp);
struct hns_roce_wqe_frmr_seg *fseg;
struct device *dev = hr_dev->dev; struct device *dev = hr_dev->dev;
unsigned int owner_bit; unsigned int owner_bit;
unsigned int sge_idx; unsigned int sge_idx;
...@@ -547,8 +555,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, ...@@ -547,8 +555,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
break; break;
case IB_WR_REG_MR: case IB_WR_REG_MR:
hr_op = HNS_ROCE_V2_WQE_OP_FAST_REG_PMR; hr_op = HNS_ROCE_V2_WQE_OP_FAST_REG_PMR;
fseg = wqe; set_frmr_seg(rc_sq_wqe, wqe, reg_wr(wr));
set_frmr_seg(rc_sq_wqe, fseg, reg_wr(wr));
break; break;
case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_CMP_AND_SWP:
hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP; hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP;
...@@ -582,24 +589,18 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, ...@@ -582,24 +589,18 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S, hr_op); V2_RC_SEND_WQE_BYTE_4_OPCODE_S, hr_op);
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
struct hns_roce_v2_wqe_data_seg *dseg; set_atomic_seg(wr, wqe, rc_sq_wqe,
dseg = wqe;
set_data_seg_v2(dseg, wr->sg_list);
wqe += sizeof(struct hns_roce_v2_wqe_data_seg);
set_atomic_seg(wqe, atomic_wr(wr));
roce_set_field(rc_sq_wqe->byte_16,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
valid_num_sge); valid_num_sge);
} else if (wr->opcode != IB_WR_REG_MR) { else if (wr->opcode != IB_WR_REG_MR) {
ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe,
wqe, &sge_idx, wqe, &sge_idx,
valid_num_sge, bad_wr); valid_num_sge);
if (ret) if (ret) {
*bad_wr = wr;
goto out; goto out;
} }
}
} else { } else {
dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type); dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
spin_unlock_irqrestore(&qp->sq.lock, flags); spin_unlock_irqrestore(&qp->sq.lock, flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment