Commit 9b2cf76c authored by Xi Wang's avatar Xi Wang Committed by Jason Gunthorpe

RDMA/hns: Optimize PBL buffer allocation process

PBL table has its own implementation for multi-hop addressing currently,
but for the hardware, all table's addressing use the same logic, there is
no need to implement repeatedly. So optimize the PBL buffer allocation
process by using the mtr's interfaces.

Link: https://lore.kernel.org/r/1588071823-40200-2-git-send-email-liweihang@huawei.comSigned-off-by: default avatarXi Wang <wangxi11@huawei.com>
Signed-off-by: default avatarLang Cheng <chenglang@huawei.com>
Signed-off-by: default avatarWeihang Li <liweihang@huawei.com>
Reported-by: default avatarkbuild test robot <lkp@intel.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent 5ac55dfc
......@@ -403,30 +403,17 @@ struct hns_roce_mw {
struct hns_roce_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
u64 iova; /* MR's virtual orignal addr */
u64 size; /* Address range of MR */
u32 key; /* Key of MR */
u32 pd; /* PD num of MR */
u32 access; /* Access permission of MR */
u32 npages;
int enabled; /* MR's active status */
int type; /* MR's register type */
u64 *pbl_buf; /* MR's PBL space */
dma_addr_t pbl_dma_addr; /* MR's PBL space PA */
u32 pbl_size; /* PA number in the PBL */
u64 pbl_ba; /* page table address */
u32 l0_chunk_last_num; /* L0 last number */
u32 l1_chunk_last_num; /* L1 last number */
u64 **pbl_bt_l2; /* PBL BT L2 */
u64 **pbl_bt_l1; /* PBL BT L1 */
u64 *pbl_bt_l0; /* PBL BT L0 */
dma_addr_t *pbl_l2_dma_addr; /* PBL BT L2 dma addr */
dma_addr_t *pbl_l1_dma_addr; /* PBL BT L1 dma addr */
dma_addr_t pbl_l0_dma_addr; /* PBL BT L0 dma addr */
u32 pbl_ba_pg_sz; /* BT chunk page size */
u32 pbl_buf_pg_sz; /* buf chunk page size */
u32 pbl_hop_num; /* multi-hop number */
struct hns_roce_mtr pbl_mtr;
u32 npages;
dma_addr_t *page_list;
};
struct hns_roce_mr_table {
......
......@@ -1099,7 +1099,6 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev,
struct completion comp;
long end = HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS;
unsigned long start = jiffies;
int npages;
int ret = 0;
priv = (struct hns_roce_v1_priv *)hr_dev->priv;
......@@ -1146,17 +1145,9 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev,
dev_dbg(dev, "Free mr 0x%x use 0x%x us.\n",
mr->key, jiffies_to_usecs(jiffies) - jiffies_to_usecs(start));
if (mr->size != ~0ULL) {
npages = ib_umem_page_count(mr->umem);
dma_free_coherent(dev, npages * 8, mr->pbl_buf,
mr->pbl_dma_addr);
}
hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
key_to_hw_index(mr->key), 0);
ib_umem_release(mr->umem);
hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr);
kfree(mr);
return ret;
......@@ -1826,9 +1817,12 @@ static void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port,
static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
unsigned long mtpt_idx)
{
struct hns_roce_dev *hr_dev = to_hr_dev(mr->ibmr.device);
u64 pages[HNS_ROCE_MAX_INNER_MTPT_NUM] = { 0 };
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_v1_mpt_entry *mpt_entry;
struct sg_dma_page_iter sg_iter;
u64 *pages;
dma_addr_t pbl_ba;
int count;
int i;
/* MPT filled into mailbox buf */
......@@ -1878,22 +1872,15 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
if (mr->type == MR_TYPE_DMA)
return 0;
pages = (u64 *) __get_free_page(GFP_KERNEL);
if (!pages)
return -ENOMEM;
i = 0;
for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) {
pages[i] = ((u64)sg_page_iter_dma_address(&sg_iter)) >> 12;
/* Directly record to MTPT table firstly 7 entry */
if (i >= HNS_ROCE_MAX_INNER_MTPT_NUM)
break;
i++;
count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages,
ARRAY_SIZE(pages), &pbl_ba);
if (count < 1) {
ibdev_err(ibdev, "failed to find PBL mtr, count = %d.", count);
return -ENOBUFS;
}
/* Register user mr */
for (i = 0; i < HNS_ROCE_MAX_INNER_MTPT_NUM; i++) {
for (i = 0; i < count; i++) {
switch (i) {
case 0:
mpt_entry->pa0_l = cpu_to_le32((u32)(pages[i]));
......@@ -1959,13 +1946,9 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
}
}
free_page((unsigned long) pages);
mpt_entry->pbl_addr_l = cpu_to_le32((u32)(mr->pbl_dma_addr));
mpt_entry->pbl_addr_l = cpu_to_le32(pbl_ba);
roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M,
MPT_BYTE_12_PBL_ADDR_H_S,
((u32)(mr->pbl_dma_addr >> 32)));
MPT_BYTE_12_PBL_ADDR_H_S, upper_32_bits(pbl_ba));
return 0;
}
......
......@@ -95,6 +95,7 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
{
struct hns_roce_mr *mr = to_hr_mr(wr->mr);
struct hns_roce_wqe_frmr_seg *fseg = wqe;
u64 pbl_ba;
/* use ib_access_flags */
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
......@@ -109,19 +110,20 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
/* Data structure reuse may lead to confusion */
rc_sq_wqe->msg_len = cpu_to_le32(mr->pbl_ba & 0xffffffff);
rc_sq_wqe->inv_key = cpu_to_le32(mr->pbl_ba >> 32);
pbl_ba = mr->pbl_mtr.hem_cfg.root_ba;
rc_sq_wqe->msg_len = cpu_to_le32(lower_32_bits(pbl_ba));
rc_sq_wqe->inv_key = cpu_to_le32(upper_32_bits(pbl_ba));
rc_sq_wqe->byte_16 = cpu_to_le32(wr->mr->length & 0xffffffff);
rc_sq_wqe->byte_20 = cpu_to_le32(wr->mr->length >> 32);
rc_sq_wqe->rkey = cpu_to_le32(wr->key);
rc_sq_wqe->va = cpu_to_le64(wr->mr->iova);
fseg->pbl_size = cpu_to_le32(mr->pbl_size);
fseg->pbl_size = cpu_to_le32(mr->npages);
roce_set_field(fseg->mode_buf_pg_sz,
V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M,
V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S,
mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
roce_set_bit(fseg->mode_buf_pg_sz,
V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
}
......@@ -2439,32 +2441,30 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
static int set_mtpt_pbl(struct hns_roce_v2_mpt_entry *mpt_entry,
struct hns_roce_mr *mr)
{
struct sg_dma_page_iter sg_iter;
u64 page_addr;
u64 *pages;
int i;
struct hns_roce_dev *hr_dev = to_hr_dev(mr->ibmr.device);
u64 pages[HNS_ROCE_V2_MAX_INNER_MTPT_NUM] = { 0 };
struct ib_device *ibdev = &hr_dev->ib_dev;
dma_addr_t pbl_ba;
int i, count;
mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size);
mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3));
roce_set_field(mpt_entry->byte_48_mode_ba,
V2_MPT_BYTE_48_PBL_BA_H_M, V2_MPT_BYTE_48_PBL_BA_H_S,
upper_32_bits(mr->pbl_ba >> 3));
count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages,
ARRAY_SIZE(pages), &pbl_ba);
if (count < 1) {
ibdev_err(ibdev, "failed to find PBL mtr, count = %d.\n",
count);
return -ENOBUFS;
}
pages = (u64 *)__get_free_page(GFP_KERNEL);
if (!pages)
return -ENOMEM;
/* Aligned to the hardware address access unit */
for (i = 0; i < count; i++)
pages[i] >>= 6;
i = 0;
for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) {
page_addr = sg_page_iter_dma_address(&sg_iter);
pages[i] = page_addr >> 6;
mpt_entry->pbl_size = cpu_to_le32(mr->npages);
mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3);
roce_set_field(mpt_entry->byte_48_mode_ba,
V2_MPT_BYTE_48_PBL_BA_H_M, V2_MPT_BYTE_48_PBL_BA_H_S,
upper_32_bits(pbl_ba >> 3));
/* Record the first 2 entry directly to MTPT table */
if (i >= HNS_ROCE_V2_MAX_INNER_MTPT_NUM - 1)
goto found;
i++;
}
found:
mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0]));
roce_set_field(mpt_entry->byte_56_pa0_h, V2_MPT_BYTE_56_PA0_H_M,
V2_MPT_BYTE_56_PA0_H_S, upper_32_bits(pages[0]));
......@@ -2475,9 +2475,7 @@ static int set_mtpt_pbl(struct hns_roce_v2_mpt_entry *mpt_entry,
roce_set_field(mpt_entry->byte_64_buf_pa1,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
free_page((unsigned long)pages);
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
return 0;
}
......@@ -2499,7 +2497,7 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
roce_set_field(mpt_entry->byte_4_pd_hop_st,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
V2_MPT_BYTE_4_PD_S, mr->pd);
......@@ -2585,11 +2583,19 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
{
struct hns_roce_dev *hr_dev = to_hr_dev(mr->ibmr.device);
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_v2_mpt_entry *mpt_entry;
dma_addr_t pbl_ba = 0;
mpt_entry = mb_buf;
memset(mpt_entry, 0, sizeof(*mpt_entry));
if (hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, NULL, 0, &pbl_ba) < 0) {
ibdev_err(ibdev, "failed to find frmr mtr.\n");
return -ENOBUFS;
}
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
......@@ -2597,7 +2603,7 @@ static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
roce_set_field(mpt_entry->byte_4_pd_hop_st,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
V2_MPT_BYTE_4_PD_S, mr->pd);
......@@ -2610,17 +2616,17 @@ static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size);
mpt_entry->pbl_size = cpu_to_le32(mr->npages);
mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3));
mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >> 3));
roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M,
V2_MPT_BYTE_48_PBL_BA_H_S,
upper_32_bits(mr->pbl_ba >> 3));
upper_32_bits(pbl_ba >> 3));
roce_set_field(mpt_entry->byte_64_buf_pa1,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
return 0;
}
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment