Commit ccc67ef5 authored by Tomer Tayar's avatar Tomer Tayar Committed by David S. Miller

qede: Error recovery process

This patch adds the error recovery process in the qede driver.
The process includes a partial/customized driver unload and load, which
allows it to look like a short suspend period to the kernel while
preserving the net devices' state.
Signed-off-by: default avatarTomer Tayar <tomer.tayar@cavium.com>
Signed-off-by: default avatarAriel Elior <ariel.elior@cavium.com>
Signed-off-by: default avatarMichal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 64515dc8
......@@ -162,6 +162,7 @@ struct qede_rdma_dev {
struct list_head entry;
struct list_head rdma_event_list;
struct workqueue_struct *rdma_wq;
bool exp_recovery;
};
struct qede_ptp;
......@@ -264,6 +265,7 @@ struct qede_dev {
enum QEDE_STATE {
QEDE_STATE_CLOSED,
QEDE_STATE_OPEN,
QEDE_STATE_RECOVERY,
};
#define HILO_U64(hi, lo) ((((u64)(hi)) << 32) + (lo))
......@@ -462,6 +464,7 @@ struct qede_fastpath {
#define QEDE_CSUM_UNNECESSARY BIT(1)
#define QEDE_TUNN_CSUM_UNNECESSARY BIT(2)
#define QEDE_SP_RECOVERY 0
#define QEDE_SP_RX_MODE 1
#ifdef CONFIG_RFS_ACCEL
......
......@@ -50,6 +50,8 @@ static void _qede_rdma_dev_add(struct qede_dev *edev)
if (!qedr_drv)
return;
/* Leftovers from previous error recovery */
edev->rdma_info.exp_recovery = false;
edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev,
edev->ndev);
}
......@@ -87,11 +89,17 @@ static void qede_rdma_destroy_wq(struct qede_dev *edev)
destroy_workqueue(edev->rdma_info.rdma_wq);
}
int qede_rdma_dev_add(struct qede_dev *edev)
int qede_rdma_dev_add(struct qede_dev *edev, bool recovery)
{
int rc = 0;
int rc;
if (!qede_rdma_supported(edev))
return 0;
/* Cannot start qedr while recovering since it wasn't fully stopped */
if (recovery)
return 0;
if (qede_rdma_supported(edev)) {
rc = qede_rdma_create_wq(edev);
if (rc)
return rc;
......@@ -101,7 +109,6 @@ int qede_rdma_dev_add(struct qede_dev *edev)
list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
_qede_rdma_dev_add(edev);
mutex_unlock(&qedr_dev_list_lock);
}
return rc;
}
......@@ -110,19 +117,30 @@ static void _qede_rdma_dev_remove(struct qede_dev *edev)
{
if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev)
qedr_drv->remove(edev->rdma_info.qedr_dev);
edev->rdma_info.qedr_dev = NULL;
}
void qede_rdma_dev_remove(struct qede_dev *edev)
void qede_rdma_dev_remove(struct qede_dev *edev, bool recovery)
{
if (!qede_rdma_supported(edev))
return;
/* Cannot remove qedr while recovering since it wasn't fully stopped */
if (!recovery) {
qede_rdma_destroy_wq(edev);
mutex_lock(&qedr_dev_list_lock);
if (!edev->rdma_info.exp_recovery)
_qede_rdma_dev_remove(edev);
edev->rdma_info.qedr_dev = NULL;
list_del(&edev->rdma_info.entry);
mutex_unlock(&qedr_dev_list_lock);
} else {
if (!edev->rdma_info.exp_recovery) {
mutex_lock(&qedr_dev_list_lock);
_qede_rdma_dev_remove(edev);
mutex_unlock(&qedr_dev_list_lock);
}
edev->rdma_info.exp_recovery = true;
}
}
static void _qede_rdma_dev_open(struct qede_dev *edev)
......@@ -204,7 +222,8 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv)
mutex_lock(&qedr_dev_list_lock);
list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) {
if (edev->rdma_info.qedr_dev)
/* If device has experienced recovery it was already removed */
if (edev->rdma_info.qedr_dev && !edev->rdma_info.exp_recovery)
_qede_rdma_dev_remove(edev);
}
qedr_drv = NULL;
......@@ -284,6 +303,10 @@ static void qede_rdma_add_event(struct qede_dev *edev,
{
struct qede_rdma_event_work *event_node;
/* If a recovery was experienced avoid adding the event */
if (edev->rdma_info.exp_recovery)
return;
if (!edev->rdma_info.qedr_dev)
return;
......
......@@ -74,21 +74,23 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv);
bool qede_rdma_supported(struct qede_dev *dev);
#if IS_ENABLED(CONFIG_QED_RDMA)
int qede_rdma_dev_add(struct qede_dev *dev);
int qede_rdma_dev_add(struct qede_dev *dev, bool recovery);
void qede_rdma_dev_event_open(struct qede_dev *dev);
void qede_rdma_dev_event_close(struct qede_dev *dev);
void qede_rdma_dev_remove(struct qede_dev *dev);
void qede_rdma_dev_remove(struct qede_dev *dev, bool recovery);
void qede_rdma_event_changeaddr(struct qede_dev *edr);
#else
static inline int qede_rdma_dev_add(struct qede_dev *dev)
static inline int qede_rdma_dev_add(struct qede_dev *dev,
bool recovery)
{
return 0;
}
static inline void qede_rdma_dev_event_open(struct qede_dev *dev) {}
static inline void qede_rdma_dev_event_close(struct qede_dev *dev) {}
static inline void qede_rdma_dev_remove(struct qede_dev *dev) {}
static inline void qede_rdma_dev_remove(struct qede_dev *dev,
bool recovery) {}
static inline void qede_rdma_event_changeaddr(struct qede_dev *edr) {}
#endif
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment