Commit d9aaed83 authored by Artemy Kovalyov's avatar Artemy Kovalyov Committed by David S. Miller

{net,IB}/mlx5: Refactor page fault handling

* Update page fault event according to last specification.
* Separate code path for page fault EQ, completion EQ and async EQ.
* Move page fault handling work queue from mlx5_ib static variable
  into mlx5_core page fault EQ.
* Allocate memory to store ODP event dynamically as the
  events arrive, since in atomic context - use mempool.
* Make mlx5_ib page fault handler run in process context.
Signed-off-by: default avatarArtemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 223cdc72
...@@ -3319,6 +3319,9 @@ static struct mlx5_interface mlx5_ib_interface = { ...@@ -3319,6 +3319,9 @@ static struct mlx5_interface mlx5_ib_interface = {
.add = mlx5_ib_add, .add = mlx5_ib_add,
.remove = mlx5_ib_remove, .remove = mlx5_ib_remove,
.event = mlx5_ib_event, .event = mlx5_ib_event,
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
.pfault = mlx5_ib_pfault,
#endif
.protocol = MLX5_INTERFACE_PROTOCOL_IB, .protocol = MLX5_INTERFACE_PROTOCOL_IB,
}; };
...@@ -3329,25 +3332,14 @@ static int __init mlx5_ib_init(void) ...@@ -3329,25 +3332,14 @@ static int __init mlx5_ib_init(void)
if (deprecated_prof_sel != 2) if (deprecated_prof_sel != 2)
pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n"); pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
err = mlx5_ib_odp_init();
if (err)
return err;
err = mlx5_register_interface(&mlx5_ib_interface); err = mlx5_register_interface(&mlx5_ib_interface);
if (err)
goto clean_odp;
return err;
clean_odp:
mlx5_ib_odp_cleanup();
return err; return err;
} }
static void __exit mlx5_ib_cleanup(void) static void __exit mlx5_ib_cleanup(void)
{ {
mlx5_unregister_interface(&mlx5_ib_interface); mlx5_unregister_interface(&mlx5_ib_interface);
mlx5_ib_odp_cleanup();
} }
module_init(mlx5_ib_init); module_init(mlx5_ib_init);
......
...@@ -277,29 +277,6 @@ struct mlx5_ib_rwq_ind_table { ...@@ -277,29 +277,6 @@ struct mlx5_ib_rwq_ind_table {
u32 rqtn; u32 rqtn;
}; };
/*
* Connect-IB can trigger up to four concurrent pagefaults
* per-QP.
*/
enum mlx5_ib_pagefault_context {
MLX5_IB_PAGEFAULT_RESPONDER_READ,
MLX5_IB_PAGEFAULT_REQUESTOR_READ,
MLX5_IB_PAGEFAULT_RESPONDER_WRITE,
MLX5_IB_PAGEFAULT_REQUESTOR_WRITE,
MLX5_IB_PAGEFAULT_CONTEXTS
};
static inline enum mlx5_ib_pagefault_context
mlx5_ib_get_pagefault_context(struct mlx5_pagefault *pagefault)
{
return pagefault->flags & (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE);
}
struct mlx5_ib_pfault {
struct work_struct work;
struct mlx5_pagefault mpfault;
};
struct mlx5_ib_ubuffer { struct mlx5_ib_ubuffer {
struct ib_umem *umem; struct ib_umem *umem;
int buf_size; int buf_size;
...@@ -385,20 +362,6 @@ struct mlx5_ib_qp { ...@@ -385,20 +362,6 @@ struct mlx5_ib_qp {
/* Store signature errors */ /* Store signature errors */
bool signature_en; bool signature_en;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
/*
* A flag that is true for QP's that are in a state that doesn't
* allow page faults, and shouldn't schedule any more faults.
*/
int disable_page_faults;
/*
* The disable_page_faults_lock protects a QP's disable_page_faults
* field, allowing for a thread to atomically check whether the QP
* allows page faults, and if so schedule a page fault.
*/
spinlock_t disable_page_faults_lock;
struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS];
#endif
struct list_head qps_list; struct list_head qps_list;
struct list_head cq_recv_list; struct list_head cq_recv_list;
struct list_head cq_send_list; struct list_head cq_send_list;
...@@ -869,18 +832,13 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, ...@@ -869,18 +832,13 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
extern struct workqueue_struct *mlx5_ib_page_fault_wq;
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev); void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev);
void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp, void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
struct mlx5_ib_pfault *pfault); struct mlx5_pagefault *pfault);
void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp);
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev); int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev); void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void); int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void); void mlx5_ib_odp_cleanup(void);
void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp);
void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);
void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start, void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
unsigned long end); unsigned long end);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
...@@ -889,13 +847,10 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) ...@@ -889,13 +847,10 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
return; return;
} }
static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) {}
static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; } static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {} static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; } static inline int mlx5_ib_odp_init(void) { return 0; }
static inline void mlx5_ib_odp_cleanup(void) {} static inline void mlx5_ib_odp_cleanup(void) {}
static inline void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) {}
static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
......
This diff is collapsed.
...@@ -1526,9 +1526,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, ...@@ -1526,9 +1526,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
&qp->raw_packet_qp.rq.base : &qp->raw_packet_qp.rq.base :
&qp->trans_qp.base; &qp->trans_qp.base;
if (init_attr->qp_type != IB_QPT_RAW_PACKET)
mlx5_ib_odp_create_qp(qp);
mutex_init(&qp->mutex); mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock); spin_lock_init(&qp->rq.lock);
...@@ -1923,7 +1920,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) ...@@ -1923,7 +1920,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
if (qp->state != IB_QPS_RESET) { if (qp->state != IB_QPS_RESET) {
if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) { if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
mlx5_ib_qp_disable_pagefaults(qp);
err = mlx5_core_qp_modify(dev->mdev, err = mlx5_core_qp_modify(dev->mdev,
MLX5_CMD_OP_2RST_QP, 0, MLX5_CMD_OP_2RST_QP, 0,
NULL, &base->mqp); NULL, &base->mqp);
...@@ -2823,16 +2819,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, ...@@ -2823,16 +2819,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (mlx5_st < 0) if (mlx5_st < 0)
goto out; goto out;
/* If moving to a reset or error state, we must disable page faults on
* this QP and flush all current page faults. Otherwise a stale page
* fault may attempt to work on this QP after it is reset and moved
* again to RTS, and may cause the driver and the device to get out of
* sync. */
if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
(new_state == IB_QPS_RESET || new_state == IB_QPS_ERR) &&
(qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
mlx5_ib_qp_disable_pagefaults(qp);
if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE || if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
!optab[mlx5_cur][mlx5_new]) !optab[mlx5_cur][mlx5_new])
goto out; goto out;
...@@ -2864,10 +2850,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, ...@@ -2864,10 +2850,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (err) if (err)
goto out; goto out;
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT &&
(qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
mlx5_ib_qp_enable_pagefaults(qp);
qp->state = new_state; qp->state = new_state;
if (attr_mask & IB_QP_ACCESS_FLAGS) if (attr_mask & IB_QP_ACCESS_FLAGS)
...@@ -4533,14 +4515,6 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, ...@@ -4533,14 +4515,6 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask, return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
qp_init_attr); qp_init_attr);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
/*
* Wait for any outstanding page faults, in case the user frees memory
* based upon this query's result.
*/
flush_workqueue(mlx5_ib_page_fault_wq);
#endif
mutex_lock(&qp->mutex); mutex_lock(&qp->mutex);
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
......
...@@ -71,6 +71,16 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) ...@@ -71,6 +71,16 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
if (dev_ctx->context) { if (dev_ctx->context) {
spin_lock_irq(&priv->ctx_lock); spin_lock_irq(&priv->ctx_lock);
list_add_tail(&dev_ctx->list, &priv->ctx_list); list_add_tail(&dev_ctx->list, &priv->ctx_list);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
if (dev_ctx->intf->pfault) {
if (priv->pfault) {
mlx5_core_err(dev, "multiple page fault handlers not supported");
} else {
priv->pfault_ctx = dev_ctx->context;
priv->pfault = dev_ctx->intf->pfault;
}
}
#endif
spin_unlock_irq(&priv->ctx_lock); spin_unlock_irq(&priv->ctx_lock);
} else { } else {
kfree(dev_ctx); kfree(dev_ctx);
...@@ -97,6 +107,15 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) ...@@ -97,6 +107,15 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
if (!dev_ctx) if (!dev_ctx)
return; return;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
spin_lock_irq(&priv->ctx_lock);
if (priv->pfault == dev_ctx->intf->pfault)
priv->pfault = NULL;
spin_unlock_irq(&priv->ctx_lock);
synchronize_srcu(&priv->pfault_srcu);
#endif
spin_lock_irq(&priv->ctx_lock); spin_lock_irq(&priv->ctx_lock);
list_del(&dev_ctx->list); list_del(&dev_ctx->list);
spin_unlock_irq(&priv->ctx_lock); spin_unlock_irq(&priv->ctx_lock);
...@@ -329,6 +348,20 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, ...@@ -329,6 +348,20 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
spin_unlock_irqrestore(&priv->ctx_lock, flags); spin_unlock_irqrestore(&priv->ctx_lock, flags);
} }
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void mlx5_core_page_fault(struct mlx5_core_dev *dev,
struct mlx5_pagefault *pfault)
{
struct mlx5_priv *priv = &dev->priv;
int srcu_idx;
srcu_idx = srcu_read_lock(&priv->pfault_srcu);
if (priv->pfault)
priv->pfault(dev, priv->pfault_ctx, pfault);
srcu_read_unlock(&priv->pfault_srcu, srcu_idx);
}
#endif
void mlx5_dev_list_lock(void) void mlx5_dev_list_lock(void)
{ {
mutex_lock(&mlx5_intf_mutex); mutex_lock(&mlx5_intf_mutex);
......
...@@ -753,7 +753,8 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) ...@@ -753,7 +753,8 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev)
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
err = mlx5_create_map_eq(dev, eq, err = mlx5_create_map_eq(dev, eq,
i + MLX5_EQ_VEC_COMP_BASE, nent, 0, i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
name, &dev->priv.uuari.uars[0]); name, &dev->priv.uuari.uars[0],
MLX5_EQ_TYPE_COMP);
if (err) { if (err) {
kfree(eq); kfree(eq);
goto clean; goto clean;
...@@ -1295,10 +1296,19 @@ static int init_one(struct pci_dev *pdev, ...@@ -1295,10 +1296,19 @@ static int init_one(struct pci_dev *pdev,
spin_lock_init(&priv->ctx_lock); spin_lock_init(&priv->ctx_lock);
mutex_init(&dev->pci_status_mutex); mutex_init(&dev->pci_status_mutex);
mutex_init(&dev->intf_state_mutex); mutex_init(&dev->intf_state_mutex);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
err = init_srcu_struct(&priv->pfault_srcu);
if (err) {
dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n",
err);
goto clean_dev;
}
#endif
err = mlx5_pci_init(dev, priv); err = mlx5_pci_init(dev, priv);
if (err) { if (err) {
dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err); dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err);
goto clean_dev; goto clean_srcu;
} }
err = mlx5_health_init(dev); err = mlx5_health_init(dev);
...@@ -1332,7 +1342,11 @@ static int init_one(struct pci_dev *pdev, ...@@ -1332,7 +1342,11 @@ static int init_one(struct pci_dev *pdev,
mlx5_health_cleanup(dev); mlx5_health_cleanup(dev);
close_pci: close_pci:
mlx5_pci_close(dev, priv); mlx5_pci_close(dev, priv);
clean_srcu:
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
cleanup_srcu_struct(&priv->pfault_srcu);
clean_dev: clean_dev:
#endif
pci_set_drvdata(pdev, NULL); pci_set_drvdata(pdev, NULL);
devlink_free(devlink); devlink_free(devlink);
...@@ -1357,6 +1371,9 @@ static void remove_one(struct pci_dev *pdev) ...@@ -1357,6 +1371,9 @@ static void remove_one(struct pci_dev *pdev)
mlx5_pagealloc_cleanup(dev); mlx5_pagealloc_cleanup(dev);
mlx5_health_cleanup(dev); mlx5_health_cleanup(dev);
mlx5_pci_close(dev, priv); mlx5_pci_close(dev, priv);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
cleanup_srcu_struct(&priv->pfault_srcu);
#endif
pci_set_drvdata(pdev, NULL); pci_set_drvdata(pdev, NULL);
devlink_free(devlink); devlink_free(devlink);
} }
......
...@@ -86,6 +86,8 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); ...@@ -86,6 +86,8 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev);
int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
unsigned long param); unsigned long param);
void mlx5_core_page_fault(struct mlx5_core_dev *dev,
struct mlx5_pagefault *pfault);
void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
void mlx5_enter_error_state(struct mlx5_core_dev *dev); void mlx5_enter_error_state(struct mlx5_core_dev *dev);
void mlx5_disable_device(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev);
......
...@@ -143,95 +143,6 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) ...@@ -143,95 +143,6 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
mlx5_core_put_rsc(common); mlx5_core_put_rsc(common);
} }
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
{
struct mlx5_eqe_page_fault *pf_eqe = &eqe->data.page_fault;
int qpn = be32_to_cpu(pf_eqe->flags_qpn) & MLX5_QPN_MASK;
struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, qpn);
struct mlx5_core_qp *qp =
container_of(common, struct mlx5_core_qp, common);
struct mlx5_pagefault pfault;
if (!qp) {
mlx5_core_warn(dev, "ODP event for non-existent QP %06x\n",
qpn);
return;
}
pfault.event_subtype = eqe->sub_type;
pfault.flags = (be32_to_cpu(pf_eqe->flags_qpn) >> MLX5_QPN_BITS) &
(MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE | MLX5_PFAULT_RDMA);
pfault.bytes_committed = be32_to_cpu(
pf_eqe->bytes_committed);
mlx5_core_dbg(dev,
"PAGE_FAULT: subtype: 0x%02x, flags: 0x%02x,\n",
eqe->sub_type, pfault.flags);
switch (eqe->sub_type) {
case MLX5_PFAULT_SUBTYPE_RDMA:
/* RDMA based event */
pfault.rdma.r_key =
be32_to_cpu(pf_eqe->rdma.r_key);
pfault.rdma.packet_size =
be16_to_cpu(pf_eqe->rdma.packet_length);
pfault.rdma.rdma_op_len =
be32_to_cpu(pf_eqe->rdma.rdma_op_len);
pfault.rdma.rdma_va =
be64_to_cpu(pf_eqe->rdma.rdma_va);
mlx5_core_dbg(dev,
"PAGE_FAULT: qpn: 0x%06x, r_key: 0x%08x,\n",
qpn, pfault.rdma.r_key);
mlx5_core_dbg(dev,
"PAGE_FAULT: rdma_op_len: 0x%08x,\n",
pfault.rdma.rdma_op_len);
mlx5_core_dbg(dev,
"PAGE_FAULT: rdma_va: 0x%016llx,\n",
pfault.rdma.rdma_va);
mlx5_core_dbg(dev,
"PAGE_FAULT: bytes_committed: 0x%06x\n",
pfault.bytes_committed);
break;
case MLX5_PFAULT_SUBTYPE_WQE:
/* WQE based event */
pfault.wqe.wqe_index =
be16_to_cpu(pf_eqe->wqe.wqe_index);
pfault.wqe.packet_size =
be16_to_cpu(pf_eqe->wqe.packet_length);
mlx5_core_dbg(dev,
"PAGE_FAULT: qpn: 0x%06x, wqe_index: 0x%04x,\n",
qpn, pfault.wqe.wqe_index);
mlx5_core_dbg(dev,
"PAGE_FAULT: bytes_committed: 0x%06x\n",
pfault.bytes_committed);
break;
default:
mlx5_core_warn(dev,
"Unsupported page fault event sub-type: 0x%02hhx, QP %06x\n",
eqe->sub_type, qpn);
/* Unsupported page faults should still be resolved by the
* page fault handler
*/
}
if (qp->pfault_handler) {
qp->pfault_handler(qp, &pfault);
} else {
mlx5_core_err(dev,
"ODP event for QP %08x, without a fault handler in QP\n",
qpn);
/* Page fault will remain unresolved. QP will hang until it is
* destroyed
*/
}
mlx5_core_put_rsc(common);
}
#endif
static int create_qprqsq_common(struct mlx5_core_dev *dev, static int create_qprqsq_common(struct mlx5_core_dev *dev,
struct mlx5_core_qp *qp, struct mlx5_core_qp *qp,
int rsc_type) int rsc_type)
...@@ -506,25 +417,6 @@ int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn) ...@@ -506,25 +417,6 @@ int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
} }
EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc); EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
u8 flags, int error)
{
u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0};
MLX5_SET(page_fault_resume_in, in, opcode,
MLX5_CMD_OP_PAGE_FAULT_RESUME);
MLX5_SET(page_fault_resume_in, in, wq_number, qpn);
MLX5_SET(page_fault_resume_in, in, page_fault_type, flags);
if (error)
MLX5_SET(page_fault_resume_in, in, error, 1);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
#endif
int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
struct mlx5_core_qp *rq) struct mlx5_core_qp *rq)
{ {
......
...@@ -534,7 +534,9 @@ struct mlx5_eqe_page_fault { ...@@ -534,7 +534,9 @@ struct mlx5_eqe_page_fault {
__be16 wqe_index; __be16 wqe_index;
u16 reserved2; u16 reserved2;
__be16 packet_length; __be16 packet_length;
u8 reserved3[12]; __be32 token;
u8 reserved4[8];
__be32 pftype_wq;
} __packed wqe; } __packed wqe;
struct { struct {
__be32 r_key; __be32 r_key;
...@@ -542,9 +544,9 @@ struct mlx5_eqe_page_fault { ...@@ -542,9 +544,9 @@ struct mlx5_eqe_page_fault {
__be16 packet_length; __be16 packet_length;
__be32 rdma_op_len; __be32 rdma_op_len;
__be64 rdma_va; __be64 rdma_va;
__be32 pftype_token;
} __packed rdma; } __packed rdma;
} __packed; } __packed;
__be32 flags_qpn;
} __packed; } __packed;
struct mlx5_eqe_vport_change { struct mlx5_eqe_vport_change {
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/radix-tree.h> #include <linux/radix-tree.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/mempool.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/mlx5/device.h> #include <linux/mlx5/device.h>
...@@ -83,6 +84,7 @@ enum { ...@@ -83,6 +84,7 @@ enum {
MLX5_EQ_VEC_PAGES = 0, MLX5_EQ_VEC_PAGES = 0,
MLX5_EQ_VEC_CMD = 1, MLX5_EQ_VEC_CMD = 1,
MLX5_EQ_VEC_ASYNC = 2, MLX5_EQ_VEC_ASYNC = 2,
MLX5_EQ_VEC_PFAULT = 3,
MLX5_EQ_VEC_COMP_BASE, MLX5_EQ_VEC_COMP_BASE,
}; };
...@@ -178,6 +180,14 @@ enum mlx5_port_status { ...@@ -178,6 +180,14 @@ enum mlx5_port_status {
MLX5_PORT_DOWN = 2, MLX5_PORT_DOWN = 2,
}; };
enum mlx5_eq_type {
MLX5_EQ_TYPE_COMP,
MLX5_EQ_TYPE_ASYNC,
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
MLX5_EQ_TYPE_PF,
#endif
};
struct mlx5_uuar_info { struct mlx5_uuar_info {
struct mlx5_uar *uars; struct mlx5_uar *uars;
int num_uars; int num_uars;
...@@ -333,6 +343,14 @@ struct mlx5_eq_tasklet { ...@@ -333,6 +343,14 @@ struct mlx5_eq_tasklet {
spinlock_t lock; spinlock_t lock;
}; };
struct mlx5_eq_pagefault {
struct work_struct work;
/* Pagefaults lock */
spinlock_t lock;
struct workqueue_struct *wq;
mempool_t *pool;
};
struct mlx5_eq { struct mlx5_eq {
struct mlx5_core_dev *dev; struct mlx5_core_dev *dev;
__be32 __iomem *doorbell; __be32 __iomem *doorbell;
...@@ -346,7 +364,13 @@ struct mlx5_eq { ...@@ -346,7 +364,13 @@ struct mlx5_eq {
struct list_head list; struct list_head list;
int index; int index;
struct mlx5_rsc_debug *dbg; struct mlx5_rsc_debug *dbg;
enum mlx5_eq_type type;
union {
struct mlx5_eq_tasklet tasklet_ctx; struct mlx5_eq_tasklet tasklet_ctx;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct mlx5_eq_pagefault pf_ctx;
#endif
};
}; };
struct mlx5_core_psv { struct mlx5_core_psv {
...@@ -377,6 +401,8 @@ struct mlx5_core_mkey { ...@@ -377,6 +401,8 @@ struct mlx5_core_mkey {
u32 pd; u32 pd;
}; };
#define MLX5_24BIT_MASK ((1 << 24) - 1)
enum mlx5_res_type { enum mlx5_res_type {
MLX5_RES_QP = MLX5_EVENT_QUEUE_TYPE_QP, MLX5_RES_QP = MLX5_EVENT_QUEUE_TYPE_QP,
MLX5_RES_RQ = MLX5_EVENT_QUEUE_TYPE_RQ, MLX5_RES_RQ = MLX5_EVENT_QUEUE_TYPE_RQ,
...@@ -411,6 +437,9 @@ struct mlx5_eq_table { ...@@ -411,6 +437,9 @@ struct mlx5_eq_table {
struct mlx5_eq pages_eq; struct mlx5_eq pages_eq;
struct mlx5_eq async_eq; struct mlx5_eq async_eq;
struct mlx5_eq cmd_eq; struct mlx5_eq cmd_eq;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct mlx5_eq pfault_eq;
#endif
int num_comp_vectors; int num_comp_vectors;
/* protect EQs list /* protect EQs list
*/ */
...@@ -497,6 +526,7 @@ struct mlx5_fc_stats { ...@@ -497,6 +526,7 @@ struct mlx5_fc_stats {
struct mlx5_eswitch; struct mlx5_eswitch;
struct mlx5_lag; struct mlx5_lag;
struct mlx5_pagefault;
struct mlx5_rl_entry { struct mlx5_rl_entry {
u32 rate; u32 rate;
...@@ -601,6 +631,14 @@ struct mlx5_priv { ...@@ -601,6 +631,14 @@ struct mlx5_priv {
struct mlx5_rl_table rl_table; struct mlx5_rl_table rl_table;
struct mlx5_port_module_event_stats pme_stats; struct mlx5_port_module_event_stats pme_stats;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void (*pfault)(struct mlx5_core_dev *dev,
void *context,
struct mlx5_pagefault *pfault);
void *pfault_ctx;
struct srcu_struct pfault_srcu;
#endif
}; };
enum mlx5_device_state { enum mlx5_device_state {
...@@ -619,6 +657,50 @@ enum mlx5_pci_status { ...@@ -619,6 +657,50 @@ enum mlx5_pci_status {
MLX5_PCI_STATUS_ENABLED, MLX5_PCI_STATUS_ENABLED,
}; };
enum mlx5_pagefault_type_flags {
MLX5_PFAULT_REQUESTOR = 1 << 0,
MLX5_PFAULT_WRITE = 1 << 1,
MLX5_PFAULT_RDMA = 1 << 2,
};
/* Contains the details of a pagefault. */
struct mlx5_pagefault {
u32 bytes_committed;
u32 token;
u8 event_subtype;
u8 type;
union {
/* Initiator or send message responder pagefault details. */
struct {
/* Received packet size, only valid for responders. */
u32 packet_size;
/*
* Number of resource holding WQE, depends on type.
*/
u32 wq_num;
/*
* WQE index. Refers to either the send queue or
* receive queue, according to event_subtype.
*/
u16 wqe_index;
} wqe;
/* RDMA responder pagefault details */
struct {
u32 r_key;
/*
* Received packet size, minimal size page fault
* resolution required for forward progress.
*/
u32 packet_size;
u32 rdma_op_len;
u64 rdma_va;
} rdma;
};
struct mlx5_eq *eq;
struct work_struct work;
};
struct mlx5_td { struct mlx5_td {
struct list_head tirs_list; struct list_head tirs_list;
u32 tdn; u32 tdn;
...@@ -879,15 +961,13 @@ void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); ...@@ -879,15 +961,13 @@ void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn); void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
#endif
void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec); void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec);
void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type); void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
int nent, u64 mask, const char *name, struct mlx5_uar *uar); int nent, u64 mask, const char *name,
struct mlx5_uar *uar, enum mlx5_eq_type type);
int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
int mlx5_start_eqs(struct mlx5_core_dev *dev); int mlx5_start_eqs(struct mlx5_core_dev *dev);
int mlx5_stop_eqs(struct mlx5_core_dev *dev); int mlx5_stop_eqs(struct mlx5_core_dev *dev);
...@@ -926,6 +1006,10 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev, ...@@ -926,6 +1006,10 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
struct mlx5_odp_caps *odp_caps); struct mlx5_odp_caps *odp_caps);
int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
u8 port_num, void *out, size_t sz); u8 port_num, void *out, size_t sz);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
u32 wq_num, u8 type, int error);
#endif
int mlx5_init_rl_table(struct mlx5_core_dev *dev); int mlx5_init_rl_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev); void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
...@@ -974,6 +1058,9 @@ struct mlx5_interface { ...@@ -974,6 +1058,9 @@ struct mlx5_interface {
void (*detach)(struct mlx5_core_dev *dev, void *context); void (*detach)(struct mlx5_core_dev *dev, void *context);
void (*event)(struct mlx5_core_dev *dev, void *context, void (*event)(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param); enum mlx5_dev_event event, unsigned long param);
void (*pfault)(struct mlx5_core_dev *dev,
void *context,
struct mlx5_pagefault *pfault);
void * (*get_dev)(void *context); void * (*get_dev)(void *context);
int protocol; int protocol;
struct list_head list; struct list_head list;
......
...@@ -50,9 +50,6 @@ ...@@ -50,9 +50,6 @@
#define MLX5_BSF_APPTAG_ESCAPE 0x1 #define MLX5_BSF_APPTAG_ESCAPE 0x1
#define MLX5_BSF_APPREF_ESCAPE 0x2 #define MLX5_BSF_APPREF_ESCAPE 0x2
#define MLX5_QPN_BITS 24
#define MLX5_QPN_MASK ((1 << MLX5_QPN_BITS) - 1)
enum mlx5_qp_optpar { enum mlx5_qp_optpar {
MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0, MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0,
MLX5_QP_OPTPAR_RRE = 1 << 1, MLX5_QP_OPTPAR_RRE = 1 << 1,
...@@ -418,46 +415,9 @@ struct mlx5_stride_block_ctrl_seg { ...@@ -418,46 +415,9 @@ struct mlx5_stride_block_ctrl_seg {
__be16 num_entries; __be16 num_entries;
}; };
enum mlx5_pagefault_flags {
MLX5_PFAULT_REQUESTOR = 1 << 0,
MLX5_PFAULT_WRITE = 1 << 1,
MLX5_PFAULT_RDMA = 1 << 2,
};
/* Contains the details of a pagefault. */
struct mlx5_pagefault {
u32 bytes_committed;
u8 event_subtype;
enum mlx5_pagefault_flags flags;
union {
/* Initiator or send message responder pagefault details. */
struct {
/* Received packet size, only valid for responders. */
u32 packet_size;
/*
* WQE index. Refers to either the send queue or
* receive queue, according to event_subtype.
*/
u16 wqe_index;
} wqe;
/* RDMA responder pagefault details */
struct {
u32 r_key;
/*
* Received packet size, minimal size page fault
* resolution required for forward progress.
*/
u32 packet_size;
u32 rdma_op_len;
u64 rdma_va;
} rdma;
};
};
struct mlx5_core_qp { struct mlx5_core_qp {
struct mlx5_core_rsc_common common; /* must be first */ struct mlx5_core_rsc_common common; /* must be first */
void (*event) (struct mlx5_core_qp *, int); void (*event) (struct mlx5_core_qp *, int);
void (*pfault_handler)(struct mlx5_core_qp *, struct mlx5_pagefault *);
int qpn; int qpn;
struct mlx5_rsc_debug *dbg; struct mlx5_rsc_debug *dbg;
int pid; int pid;
...@@ -557,10 +517,6 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev); ...@@ -557,10 +517,6 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev);
void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev); void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev);
int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
u8 context, int error);
#endif
int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
struct mlx5_core_qp *rq); struct mlx5_core_qp *rq);
void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev, void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment