Commit ccc67ef5 authored by Tomer Tayar's avatar Tomer Tayar Committed by David S. Miller

qede: Error recovery process

This patch adds the error recovery process in the qede driver.
The process includes a partial/customized driver unload and load, which
allows it to look like a short suspend period to the kernel while
preserving the net devices' state.
Signed-off-by: default avatarTomer Tayar <tomer.tayar@cavium.com>
Signed-off-by: default avatarAriel Elior <ariel.elior@cavium.com>
Signed-off-by: default avatarMichal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 64515dc8
...@@ -162,6 +162,7 @@ struct qede_rdma_dev { ...@@ -162,6 +162,7 @@ struct qede_rdma_dev {
struct list_head entry; struct list_head entry;
struct list_head rdma_event_list; struct list_head rdma_event_list;
struct workqueue_struct *rdma_wq; struct workqueue_struct *rdma_wq;
bool exp_recovery;
}; };
struct qede_ptp; struct qede_ptp;
...@@ -264,6 +265,7 @@ struct qede_dev { ...@@ -264,6 +265,7 @@ struct qede_dev {
enum QEDE_STATE { enum QEDE_STATE {
QEDE_STATE_CLOSED, QEDE_STATE_CLOSED,
QEDE_STATE_OPEN, QEDE_STATE_OPEN,
QEDE_STATE_RECOVERY,
}; };
#define HILO_U64(hi, lo) ((((u64)(hi)) << 32) + (lo)) #define HILO_U64(hi, lo) ((((u64)(hi)) << 32) + (lo))
...@@ -462,6 +464,7 @@ struct qede_fastpath { ...@@ -462,6 +464,7 @@ struct qede_fastpath {
#define QEDE_CSUM_UNNECESSARY BIT(1) #define QEDE_CSUM_UNNECESSARY BIT(1)
#define QEDE_TUNN_CSUM_UNNECESSARY BIT(2) #define QEDE_TUNN_CSUM_UNNECESSARY BIT(2)
#define QEDE_SP_RECOVERY 0
#define QEDE_SP_RX_MODE 1 #define QEDE_SP_RX_MODE 1
#ifdef CONFIG_RFS_ACCEL #ifdef CONFIG_RFS_ACCEL
......
...@@ -50,6 +50,8 @@ static void _qede_rdma_dev_add(struct qede_dev *edev) ...@@ -50,6 +50,8 @@ static void _qede_rdma_dev_add(struct qede_dev *edev)
if (!qedr_drv) if (!qedr_drv)
return; return;
/* Leftovers from previous error recovery */
edev->rdma_info.exp_recovery = false;
edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev, edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev,
edev->ndev); edev->ndev);
} }
...@@ -87,21 +89,26 @@ static void qede_rdma_destroy_wq(struct qede_dev *edev) ...@@ -87,21 +89,26 @@ static void qede_rdma_destroy_wq(struct qede_dev *edev)
destroy_workqueue(edev->rdma_info.rdma_wq); destroy_workqueue(edev->rdma_info.rdma_wq);
} }
int qede_rdma_dev_add(struct qede_dev *edev) int qede_rdma_dev_add(struct qede_dev *edev, bool recovery)
{ {
int rc = 0; int rc;
if (qede_rdma_supported(edev)) { if (!qede_rdma_supported(edev))
rc = qede_rdma_create_wq(edev); return 0;
if (rc)
return rc;
INIT_LIST_HEAD(&edev->rdma_info.entry); /* Cannot start qedr while recovering since it wasn't fully stopped */
mutex_lock(&qedr_dev_list_lock); if (recovery)
list_add_tail(&edev->rdma_info.entry, &qedr_dev_list); return 0;
_qede_rdma_dev_add(edev);
mutex_unlock(&qedr_dev_list_lock); rc = qede_rdma_create_wq(edev);
} if (rc)
return rc;
INIT_LIST_HEAD(&edev->rdma_info.entry);
mutex_lock(&qedr_dev_list_lock);
list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
_qede_rdma_dev_add(edev);
mutex_unlock(&qedr_dev_list_lock);
return rc; return rc;
} }
...@@ -110,19 +117,30 @@ static void _qede_rdma_dev_remove(struct qede_dev *edev) ...@@ -110,19 +117,30 @@ static void _qede_rdma_dev_remove(struct qede_dev *edev)
{ {
if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev) if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev)
qedr_drv->remove(edev->rdma_info.qedr_dev); qedr_drv->remove(edev->rdma_info.qedr_dev);
edev->rdma_info.qedr_dev = NULL;
} }
void qede_rdma_dev_remove(struct qede_dev *edev) void qede_rdma_dev_remove(struct qede_dev *edev, bool recovery)
{ {
if (!qede_rdma_supported(edev)) if (!qede_rdma_supported(edev))
return; return;
qede_rdma_destroy_wq(edev); /* Cannot remove qedr while recovering since it wasn't fully stopped */
mutex_lock(&qedr_dev_list_lock); if (!recovery) {
_qede_rdma_dev_remove(edev); qede_rdma_destroy_wq(edev);
list_del(&edev->rdma_info.entry); mutex_lock(&qedr_dev_list_lock);
mutex_unlock(&qedr_dev_list_lock); if (!edev->rdma_info.exp_recovery)
_qede_rdma_dev_remove(edev);
edev->rdma_info.qedr_dev = NULL;
list_del(&edev->rdma_info.entry);
mutex_unlock(&qedr_dev_list_lock);
} else {
if (!edev->rdma_info.exp_recovery) {
mutex_lock(&qedr_dev_list_lock);
_qede_rdma_dev_remove(edev);
mutex_unlock(&qedr_dev_list_lock);
}
edev->rdma_info.exp_recovery = true;
}
} }
static void _qede_rdma_dev_open(struct qede_dev *edev) static void _qede_rdma_dev_open(struct qede_dev *edev)
...@@ -204,7 +222,8 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv) ...@@ -204,7 +222,8 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv)
mutex_lock(&qedr_dev_list_lock); mutex_lock(&qedr_dev_list_lock);
list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) { list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) {
if (edev->rdma_info.qedr_dev) /* If device has experienced recovery it was already removed */
if (edev->rdma_info.qedr_dev && !edev->rdma_info.exp_recovery)
_qede_rdma_dev_remove(edev); _qede_rdma_dev_remove(edev);
} }
qedr_drv = NULL; qedr_drv = NULL;
...@@ -284,6 +303,10 @@ static void qede_rdma_add_event(struct qede_dev *edev, ...@@ -284,6 +303,10 @@ static void qede_rdma_add_event(struct qede_dev *edev,
{ {
struct qede_rdma_event_work *event_node; struct qede_rdma_event_work *event_node;
/* If a recovery was experienced avoid adding the event */
if (edev->rdma_info.exp_recovery)
return;
if (!edev->rdma_info.qedr_dev) if (!edev->rdma_info.qedr_dev)
return; return;
......
...@@ -74,21 +74,23 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv); ...@@ -74,21 +74,23 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv);
bool qede_rdma_supported(struct qede_dev *dev); bool qede_rdma_supported(struct qede_dev *dev);
#if IS_ENABLED(CONFIG_QED_RDMA) #if IS_ENABLED(CONFIG_QED_RDMA)
int qede_rdma_dev_add(struct qede_dev *dev); int qede_rdma_dev_add(struct qede_dev *dev, bool recovery);
void qede_rdma_dev_event_open(struct qede_dev *dev); void qede_rdma_dev_event_open(struct qede_dev *dev);
void qede_rdma_dev_event_close(struct qede_dev *dev); void qede_rdma_dev_event_close(struct qede_dev *dev);
void qede_rdma_dev_remove(struct qede_dev *dev); void qede_rdma_dev_remove(struct qede_dev *dev, bool recovery);
void qede_rdma_event_changeaddr(struct qede_dev *edr); void qede_rdma_event_changeaddr(struct qede_dev *edr);
#else #else
static inline int qede_rdma_dev_add(struct qede_dev *dev) static inline int qede_rdma_dev_add(struct qede_dev *dev,
bool recovery)
{ {
return 0; return 0;
} }
static inline void qede_rdma_dev_event_open(struct qede_dev *dev) {} static inline void qede_rdma_dev_event_open(struct qede_dev *dev) {}
static inline void qede_rdma_dev_event_close(struct qede_dev *dev) {} static inline void qede_rdma_dev_event_close(struct qede_dev *dev) {}
static inline void qede_rdma_dev_remove(struct qede_dev *dev) {} static inline void qede_rdma_dev_remove(struct qede_dev *dev,
bool recovery) {}
static inline void qede_rdma_event_changeaddr(struct qede_dev *edr) {} static inline void qede_rdma_event_changeaddr(struct qede_dev *edr) {}
#endif #endif
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment