Commit ae6f6dd5 authored by Leon Romanovsky's avatar Leon Romanovsky

Delay mlx5_ib internal resources allocations

From: Leon Romanovsky <leonro@nvidia.com>

Internal mlx5_ib resources are created during mlx5_ib module load. This
behavior is not optimal because it consumes resources that are not
needed when SFs are created. This patch series delays the creation of
mlx5_ib internal resources to the stage when they actually used.
Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
parents ef551352 d98995b4
...@@ -1810,7 +1810,7 @@ static int set_ucontext_resp(struct ib_ucontext *uctx, ...@@ -1810,7 +1810,7 @@ static int set_ucontext_resp(struct ib_ucontext *uctx,
} }
resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
if (dev->wc_support) if (mlx5_wc_support_get(dev->mdev))
resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev,
log_bf_reg_size); log_bf_reg_size);
resp->cache_line_size = cache_line_size(); resp->cache_line_size = cache_line_size();
...@@ -2337,7 +2337,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm ...@@ -2337,7 +2337,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
switch (command) { switch (command) {
case MLX5_IB_MMAP_WC_PAGE: case MLX5_IB_MMAP_WC_PAGE:
case MLX5_IB_MMAP_ALLOC_WC: case MLX5_IB_MMAP_ALLOC_WC:
if (!dev->wc_support) if (!mlx5_wc_support_get(dev->mdev))
return -EPERM; return -EPERM;
fallthrough; fallthrough;
case MLX5_IB_MMAP_NC_PAGE: case MLX5_IB_MMAP_NC_PAGE:
...@@ -3612,7 +3612,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_UAR_OBJ_ALLOC)( ...@@ -3612,7 +3612,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_UAR_OBJ_ALLOC)(
alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC) alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC)
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (!to_mdev(c->ibucontext.device)->wc_support && if (!mlx5_wc_support_get(to_mdev(c->ibucontext.device)->mdev) &&
alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF) alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF)
return -EOPNOTSUPP; return -EOPNOTSUPP;
...@@ -3766,18 +3766,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) ...@@ -3766,18 +3766,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
return err; return err;
} }
static int mlx5_ib_enable_driver(struct ib_device *dev)
{
struct mlx5_ib_dev *mdev = to_mdev(dev);
int ret;
ret = mlx5_ib_test_wc(mdev);
mlx5_ib_dbg(mdev, "Write-Combining %s",
mdev->wc_support ? "supported" : "not supported");
return ret;
}
static const struct ib_device_ops mlx5_ib_dev_ops = { static const struct ib_device_ops mlx5_ib_dev_ops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_MLX5, .driver_id = RDMA_DRIVER_MLX5,
...@@ -3808,7 +3796,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { ...@@ -3808,7 +3796,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.drain_rq = mlx5_ib_drain_rq, .drain_rq = mlx5_ib_drain_rq,
.drain_sq = mlx5_ib_drain_sq, .drain_sq = mlx5_ib_drain_sq,
.device_group = &mlx5_attr_group, .device_group = &mlx5_attr_group,
.enable_driver = mlx5_ib_enable_driver,
.get_dev_fw_str = get_dev_fw_str, .get_dev_fw_str = get_dev_fw_str,
.get_dma_mr = mlx5_ib_get_dma_mr, .get_dma_mr = mlx5_ib_get_dma_mr,
.get_link_layer = mlx5_ib_port_link_layer, .get_link_layer = mlx5_ib_port_link_layer,
......
...@@ -30,10 +30,8 @@ ...@@ -30,10 +30,8 @@
* SOFTWARE. * SOFTWARE.
*/ */
#include <linux/io.h>
#include <rdma/ib_umem_odp.h> #include <rdma/ib_umem_odp.h>
#include "mlx5_ib.h" #include "mlx5_ib.h"
#include <linux/jiffies.h>
/* /*
* Fill in a physical address list. ib_umem_num_dma_blocks() entries will be * Fill in a physical address list. ib_umem_num_dma_blocks() entries will be
...@@ -95,199 +93,3 @@ unsigned long __mlx5_umem_find_best_quantized_pgoff( ...@@ -95,199 +93,3 @@ unsigned long __mlx5_umem_find_best_quantized_pgoff(
return 0; return 0;
return page_size; return page_size;
} }
#define WR_ID_BF 0xBF
#define WR_ID_END 0xBAD
#define TEST_WC_NUM_WQES 255
#define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100)
static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id,
bool signaled)
{
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_wqe_ctrl_seg *ctrl;
struct mlx5_bf *bf = &qp->bf;
__be32 mmio_wqe[16] = {};
unsigned long flags;
unsigned int idx;
if (unlikely(dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
return -EIO;
spin_lock_irqsave(&qp->sq.lock, flags);
idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
ctrl = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx);
memset(ctrl, 0, sizeof(struct mlx5_wqe_ctrl_seg));
ctrl->fm_ce_se = signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
ctrl->opmod_idx_opcode =
cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | MLX5_OPCODE_NOP);
ctrl->qpn_ds = cpu_to_be32((sizeof(struct mlx5_wqe_ctrl_seg) / 16) |
(qp->trans_qp.base.mqp.qpn << 8));
qp->sq.wrid[idx] = wr_id;
qp->sq.w_list[idx].opcode = MLX5_OPCODE_NOP;
qp->sq.wqe_head[idx] = qp->sq.head + 1;
qp->sq.cur_post += DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg),
MLX5_SEND_WQE_BB);
qp->sq.w_list[idx].next = qp->sq.cur_post;
qp->sq.head++;
memcpy(mmio_wqe, ctrl, sizeof(*ctrl));
((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |=
MLX5_WQE_CTRL_CQ_UPDATE;
/* Make sure that descriptors are written before
* updating doorbell record and ringing the doorbell
*/
wmb();
qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
/* Make sure doorbell record is visible to the HCA before
* we hit doorbell
*/
wmb();
__iowrite64_copy(bf->bfreg->map + bf->offset, mmio_wqe,
sizeof(mmio_wqe) / 8);
bf->offset ^= bf->buf_size;
spin_unlock_irqrestore(&qp->sq.lock, flags);
return 0;
}
static int test_wc_poll_cq_result(struct mlx5_ib_dev *dev, struct ib_cq *cq)
{
int ret;
struct ib_wc wc = {};
unsigned long end = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES;
do {
ret = ib_poll_cq(cq, 1, &wc);
if (ret < 0 || wc.status)
return ret < 0 ? ret : -EINVAL;
if (ret)
break;
} while (!time_after(jiffies, end));
if (!ret)
return -ETIMEDOUT;
if (wc.wr_id != WR_ID_BF)
ret = 0;
return ret;
}
static int test_wc_do_send(struct mlx5_ib_dev *dev, struct ib_qp *qp)
{
int err, i;
for (i = 0; i < TEST_WC_NUM_WQES; i++) {
err = post_send_nop(dev, qp, WR_ID_BF, false);
if (err)
return err;
}
return post_send_nop(dev, qp, WR_ID_END, true);
}
int mlx5_ib_test_wc(struct mlx5_ib_dev *dev)
{
struct ib_cq_init_attr cq_attr = { .cqe = TEST_WC_NUM_WQES + 1 };
int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
struct ib_qp_init_attr qp_init_attr = {
.cap = { .max_send_wr = TEST_WC_NUM_WQES },
.qp_type = IB_QPT_UD,
.sq_sig_type = IB_SIGNAL_REQ_WR,
.create_flags = MLX5_IB_QP_CREATE_WC_TEST,
};
struct ib_qp_attr qp_attr = { .port_num = 1 };
struct ib_device *ibdev = &dev->ib_dev;
struct ib_qp *qp;
struct ib_cq *cq;
struct ib_pd *pd;
int ret;
if (!MLX5_CAP_GEN(dev->mdev, bf))
return 0;
if (!dev->mdev->roce.roce_en &&
port_type_cap == MLX5_CAP_PORT_TYPE_ETH) {
if (mlx5_core_is_pf(dev->mdev))
dev->wc_support = arch_can_pci_mmap_wc();
return 0;
}
ret = mlx5_alloc_bfreg(dev->mdev, &dev->wc_bfreg, true, false);
if (ret)
goto print_err;
if (!dev->wc_bfreg.wc)
goto out1;
pd = ib_alloc_pd(ibdev, 0);
if (IS_ERR(pd)) {
ret = PTR_ERR(pd);
goto out1;
}
cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto out2;
}
qp_init_attr.recv_cq = cq;
qp_init_attr.send_cq = cq;
qp = ib_create_qp(pd, &qp_init_attr);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
goto out3;
}
qp_attr.qp_state = IB_QPS_INIT;
ret = ib_modify_qp(qp, &qp_attr,
IB_QP_STATE | IB_QP_PORT | IB_QP_PKEY_INDEX |
IB_QP_QKEY);
if (ret)
goto out4;
qp_attr.qp_state = IB_QPS_RTR;
ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
if (ret)
goto out4;
qp_attr.qp_state = IB_QPS_RTS;
ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
if (ret)
goto out4;
ret = test_wc_do_send(dev, qp);
if (ret < 0)
goto out4;
ret = test_wc_poll_cq_result(dev, cq);
if (ret > 0) {
dev->wc_support = true;
ret = 0;
}
out4:
ib_destroy_qp(qp);
out3:
ib_destroy_cq(cq);
out2:
ib_dealloc_pd(pd);
out1:
mlx5_free_bfreg(dev->mdev, &dev->wc_bfreg);
print_err:
if (ret)
mlx5_ib_err(
dev,
"Error %d while trying to test write-combining support\n",
ret);
return ret;
}
...@@ -341,7 +341,6 @@ struct mlx5_ib_flow_db { ...@@ -341,7 +341,6 @@ struct mlx5_ib_flow_db {
* rely on the range reserved for that use in the ib_qp_create_flags enum. * rely on the range reserved for that use in the ib_qp_create_flags enum.
*/ */
#define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START #define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START
#define MLX5_IB_QP_CREATE_WC_TEST (IB_QP_CREATE_RESERVED_START << 1)
struct wr_list { struct wr_list {
u16 opcode; u16 opcode;
...@@ -1123,7 +1122,6 @@ struct mlx5_ib_dev { ...@@ -1123,7 +1122,6 @@ struct mlx5_ib_dev {
u8 ib_active:1; u8 ib_active:1;
u8 is_rep:1; u8 is_rep:1;
u8 lag_active:1; u8 lag_active:1;
u8 wc_support:1;
u8 fill_delay; u8 fill_delay;
struct umr_common umrc; struct umr_common umrc;
/* sync used page count stats /* sync used page count stats
...@@ -1149,7 +1147,6 @@ struct mlx5_ib_dev { ...@@ -1149,7 +1147,6 @@ struct mlx5_ib_dev {
/* Array with num_ports elements */ /* Array with num_ports elements */
struct mlx5_ib_port *port; struct mlx5_ib_port *port;
struct mlx5_sq_bfreg bfreg; struct mlx5_sq_bfreg bfreg;
struct mlx5_sq_bfreg wc_bfreg;
struct mlx5_sq_bfreg fp_bfreg; struct mlx5_sq_bfreg fp_bfreg;
struct mlx5_ib_delay_drop delay_drop; struct mlx5_ib_delay_drop delay_drop;
const struct mlx5_ib_profile *profile; const struct mlx5_ib_profile *profile;
......
...@@ -1107,8 +1107,6 @@ static int _create_kernel_qp(struct mlx5_ib_dev *dev, ...@@ -1107,8 +1107,6 @@ static int _create_kernel_qp(struct mlx5_ib_dev *dev,
if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
qp->bf.bfreg = &dev->fp_bfreg; qp->bf.bfreg = &dev->fp_bfreg;
else if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST)
qp->bf.bfreg = &dev->wc_bfreg;
else else
qp->bf.bfreg = &dev->bfreg; qp->bf.bfreg = &dev->bfreg;
...@@ -2959,14 +2957,6 @@ static void process_create_flag(struct mlx5_ib_dev *dev, int *flags, int flag, ...@@ -2959,14 +2957,6 @@ static void process_create_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
return; return;
} }
if (flag == MLX5_IB_QP_CREATE_WC_TEST) {
/*
* Special case, if condition didn't meet, it won't be error,
* just different in-kernel flow.
*/
*flags &= ~MLX5_IB_QP_CREATE_WC_TEST;
return;
}
mlx5_ib_dbg(dev, "Verbs create QP flag 0x%X is not supported\n", flag); mlx5_ib_dbg(dev, "Verbs create QP flag 0x%X is not supported\n", flag);
} }
...@@ -3027,8 +3017,6 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, ...@@ -3027,8 +3017,6 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
IB_QP_CREATE_PCI_WRITE_END_PADDING, IB_QP_CREATE_PCI_WRITE_END_PADDING,
MLX5_CAP_GEN(mdev, end_pad), qp); MLX5_CAP_GEN(mdev, end_pad), qp);
process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_WC_TEST,
qp_type != MLX5_IB_QPT_REG_UMR, qp);
process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1, process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1,
true, qp); true, qp);
...@@ -4609,10 +4597,6 @@ static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev, ...@@ -4609,10 +4597,6 @@ static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev,
if (qp->type == IB_QPT_RAW_PACKET || qp->type == MLX5_IB_QPT_REG_UMR) if (qp->type == IB_QPT_RAW_PACKET || qp->type == MLX5_IB_QPT_REG_UMR)
return true; return true;
/* Internal QP used for wc testing, with NOPs in wq */
if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST)
return true;
return false; return false;
} }
......
...@@ -17,7 +17,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ ...@@ -17,7 +17,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \ fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \
lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \
fw_reset.o qos.o lib/tout.o lib/aso.o fw_reset.o qos.o lib/tout.o lib/aso.o wc.o
# #
# Netdev basic # Netdev basic
......
...@@ -1819,6 +1819,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) ...@@ -1819,6 +1819,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
mutex_init(&dev->intf_state_mutex); mutex_init(&dev->intf_state_mutex);
lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key); lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key);
mutex_init(&dev->mlx5e_res.uplink_netdev_lock); mutex_init(&dev->mlx5e_res.uplink_netdev_lock);
mutex_init(&dev->wc_state_lock);
mutex_init(&priv->bfregs.reg_head.lock); mutex_init(&priv->bfregs.reg_head.lock);
mutex_init(&priv->bfregs.wc_head.lock); mutex_init(&priv->bfregs.wc_head.lock);
...@@ -1916,6 +1917,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) ...@@ -1916,6 +1917,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
mutex_destroy(&priv->alloc_mutex); mutex_destroy(&priv->alloc_mutex);
mutex_destroy(&priv->bfregs.wc_head.lock); mutex_destroy(&priv->bfregs.wc_head.lock);
mutex_destroy(&priv->bfregs.reg_head.lock); mutex_destroy(&priv->bfregs.reg_head.lock);
mutex_destroy(&dev->wc_state_lock);
mutex_destroy(&dev->mlx5e_res.uplink_netdev_lock); mutex_destroy(&dev->mlx5e_res.uplink_netdev_lock);
mutex_destroy(&dev->intf_state_mutex); mutex_destroy(&dev->intf_state_mutex);
lockdep_unregister_key(&dev->lock_key); lockdep_unregister_key(&dev->lock_key);
......
This diff is collapsed.
...@@ -766,6 +766,12 @@ struct mlx5_hca_cap { ...@@ -766,6 +766,12 @@ struct mlx5_hca_cap {
u32 max[MLX5_UN_SZ_DW(hca_cap_union)]; u32 max[MLX5_UN_SZ_DW(hca_cap_union)];
}; };
enum mlx5_wc_state {
MLX5_WC_STATE_UNINITIALIZED,
MLX5_WC_STATE_UNSUPPORTED,
MLX5_WC_STATE_SUPPORTED,
};
struct mlx5_core_dev { struct mlx5_core_dev {
struct device *device; struct device *device;
enum mlx5_coredev_type coredev_type; enum mlx5_coredev_type coredev_type;
...@@ -824,6 +830,9 @@ struct mlx5_core_dev { ...@@ -824,6 +830,9 @@ struct mlx5_core_dev {
#endif #endif
u64 num_ipsec_offloads; u64 num_ipsec_offloads;
struct mlx5_sd *sd; struct mlx5_sd *sd;
enum mlx5_wc_state wc_state;
/* sync write combining state */
struct mutex wc_state_lock;
}; };
struct mlx5_db { struct mlx5_db {
...@@ -1375,4 +1384,6 @@ static inline bool mlx5_is_macsec_roce_supported(struct mlx5_core_dev *mdev) ...@@ -1375,4 +1384,6 @@ static inline bool mlx5_is_macsec_roce_supported(struct mlx5_core_dev *mdev)
enum { enum {
MLX5_OCTWORD = 16, MLX5_OCTWORD = 16,
}; };
bool mlx5_wc_support_get(struct mlx5_core_dev *mdev);
#endif /* MLX5_DRIVER_H */ #endif /* MLX5_DRIVER_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment