Commit a955d71d authored by Shiju Jose's avatar Shiju Jose Committed by David S. Miller

net: hns3: fix avoid unnecessary resetting for the H/W errors which do not require reset

HNS does not need to be reset when errors occur in some bits.
However presently the HNAE3_FUNC_RESET is set in this case and
as a result the default_reset is done when these errors are reported.
This patch fix this issue. Also patch does some optimization
in setting the reset level for the error recovery.
Reported-by: default avatarWeihang Li <liweihang@hisilicon.com>
Signed-off-by: default avatarShiju Jose <shiju.jose@huawei.com>
Signed-off-by: default avatarPeng Li <lipeng321@huawei.com>
Signed-off-by: default avatarHuazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 123297b7
...@@ -631,29 +631,20 @@ static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = { ...@@ -631,29 +631,20 @@ static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = {
{ /* sentinel */ } { /* sentinel */ }
}; };
static enum hnae3_reset_type hclge_log_error(struct device *dev, char *reg, static void hclge_log_error(struct device *dev, char *reg,
const struct hclge_hw_error *err, const struct hclge_hw_error *err,
u32 err_sts) u32 err_sts, unsigned long *reset_requests)
{ {
enum hnae3_reset_type reset_level = HNAE3_FUNC_RESET;
bool need_reset = false;
while (err->msg) { while (err->msg) {
if (err->int_msk & err_sts) { if (err->int_msk & err_sts) {
dev_warn(dev, "%s %s found [error status=0x%x]\n", dev_warn(dev, "%s %s found [error status=0x%x]\n",
reg, err->msg, err_sts); reg, err->msg, err_sts);
if (err->reset_level != HNAE3_NONE_RESET && if (err->reset_level &&
err->reset_level >= reset_level) { err->reset_level != HNAE3_NONE_RESET)
reset_level = err->reset_level; set_bit(err->reset_level, reset_requests);
need_reset = true;
}
} }
err++; err++;
} }
if (need_reset)
return reset_level;
else
return HNAE3_NONE_RESET;
} }
/* hclge_cmd_query_error: read the error information /* hclge_cmd_query_error: read the error information
...@@ -1082,7 +1073,6 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev, ...@@ -1082,7 +1073,6 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
int num) int num)
{ {
struct hnae3_ae_dev *ae_dev = hdev->ae_dev; struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
enum hnae3_reset_type reset_level;
struct device *dev = &hdev->pdev->dev; struct device *dev = &hdev->pdev->dev;
__le32 *desc_data; __le32 *desc_data;
u32 status; u32 status;
...@@ -1099,49 +1089,39 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev, ...@@ -1099,49 +1089,39 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
/* log HNS common errors */ /* log HNS common errors */
status = le32_to_cpu(desc[0].data[0]); status = le32_to_cpu(desc[0].data[0]);
if (status) { if (status)
reset_level = hclge_log_error(dev, "IMP_TCM_ECC_INT_STS", hclge_log_error(dev, "IMP_TCM_ECC_INT_STS",
&hclge_imp_tcm_ecc_int[0], &hclge_imp_tcm_ecc_int[0], status,
status); &ae_dev->hw_err_reset_req);
set_bit(reset_level, &ae_dev->hw_err_reset_req);
}
status = le32_to_cpu(desc[0].data[1]); status = le32_to_cpu(desc[0].data[1]);
if (status) { if (status)
reset_level = hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS", hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS",
&hclge_cmdq_nic_mem_ecc_int[0], &hclge_cmdq_nic_mem_ecc_int[0], status,
status); &ae_dev->hw_err_reset_req);
set_bit(reset_level, &ae_dev->hw_err_reset_req);
}
if ((le32_to_cpu(desc[0].data[2])) & BIT(0)) if ((le32_to_cpu(desc[0].data[2])) & BIT(0))
dev_warn(dev, "imp_rd_data_poison_err found\n"); dev_warn(dev, "imp_rd_data_poison_err found\n");
status = le32_to_cpu(desc[0].data[3]); status = le32_to_cpu(desc[0].data[3]);
if (status) { if (status)
reset_level = hclge_log_error(dev, "TQP_INT_ECC_INT_STS", hclge_log_error(dev, "TQP_INT_ECC_INT_STS",
&hclge_tqp_int_ecc_int[0], &hclge_tqp_int_ecc_int[0], status,
status); &ae_dev->hw_err_reset_req);
set_bit(reset_level, &ae_dev->hw_err_reset_req);
}
status = le32_to_cpu(desc[0].data[4]); status = le32_to_cpu(desc[0].data[4]);
if (status) { if (status)
reset_level = hclge_log_error(dev, "MSIX_ECC_INT_STS", hclge_log_error(dev, "MSIX_ECC_INT_STS",
&hclge_msix_sram_ecc_int[0], &hclge_msix_sram_ecc_int[0], status,
status); &ae_dev->hw_err_reset_req);
set_bit(reset_level, &ae_dev->hw_err_reset_req);
}
/* log SSU(Storage Switch Unit) errors */ /* log SSU(Storage Switch Unit) errors */
desc_data = (__le32 *)&desc[2]; desc_data = (__le32 *)&desc[2];
status = le32_to_cpu(*(desc_data + 2)); status = le32_to_cpu(*(desc_data + 2));
if (status) { if (status)
reset_level = hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0", hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0",
&hclge_ssu_mem_ecc_err_int[0], &hclge_ssu_mem_ecc_err_int[0], status,
status); &ae_dev->hw_err_reset_req);
set_bit(reset_level, &ae_dev->hw_err_reset_req);
}
status = le32_to_cpu(*(desc_data + 3)) & BIT(0); status = le32_to_cpu(*(desc_data + 3)) & BIT(0);
if (status) { if (status) {
...@@ -1151,41 +1131,32 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev, ...@@ -1151,41 +1131,32 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
} }
status = le32_to_cpu(*(desc_data + 4)) & HCLGE_SSU_COMMON_ERR_INT_MASK; status = le32_to_cpu(*(desc_data + 4)) & HCLGE_SSU_COMMON_ERR_INT_MASK;
if (status) { if (status)
reset_level = hclge_log_error(dev, "SSU_COMMON_ERR_INT", hclge_log_error(dev, "SSU_COMMON_ERR_INT",
&hclge_ssu_com_err_int[0], &hclge_ssu_com_err_int[0], status,
status); &ae_dev->hw_err_reset_req);
set_bit(reset_level, &ae_dev->hw_err_reset_req);
}
/* log IGU(Ingress Unit) errors */ /* log IGU(Ingress Unit) errors */
desc_data = (__le32 *)&desc[3]; desc_data = (__le32 *)&desc[3];
status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK; status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK;
if (status) { if (status)
reset_level = hclge_log_error(dev, "IGU_INT_STS", hclge_log_error(dev, "IGU_INT_STS",
&hclge_igu_int[0], status); &hclge_igu_int[0], status,
set_bit(reset_level, &ae_dev->hw_err_reset_req); &ae_dev->hw_err_reset_req);
}
/* log PPP(Programmable Packet Process) errors */ /* log PPP(Programmable Packet Process) errors */
desc_data = (__le32 *)&desc[4]; desc_data = (__le32 *)&desc[4];
status = le32_to_cpu(*(desc_data + 1)); status = le32_to_cpu(*(desc_data + 1));
if (status) { if (status)
reset_level = hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1",
hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1", &hclge_ppp_mpf_abnormal_int_st1[0], status,
&hclge_ppp_mpf_abnormal_int_st1[0], &ae_dev->hw_err_reset_req);
status);
set_bit(reset_level, &ae_dev->hw_err_reset_req);
}
status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK; status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK;
if (status) { if (status)
reset_level = hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3",
hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3", &hclge_ppp_mpf_abnormal_int_st3[0], status,
&hclge_ppp_mpf_abnormal_int_st3[0], &ae_dev->hw_err_reset_req);
status);
set_bit(reset_level, &ae_dev->hw_err_reset_req);
}
/* log PPU(RCB) errors */ /* log PPU(RCB) errors */
desc_data = (__le32 *)&desc[5]; desc_data = (__le32 *)&desc[5];
...@@ -1197,57 +1168,46 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev, ...@@ -1197,57 +1168,46 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
} }
status = le32_to_cpu(*(desc_data + 2)); status = le32_to_cpu(*(desc_data + 2));
if (status) { if (status)
reset_level = hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2", &hclge_ppu_mpf_abnormal_int_st2[0], status,
&hclge_ppu_mpf_abnormal_int_st2[0], &ae_dev->hw_err_reset_req);
status);
set_bit(reset_level, &ae_dev->hw_err_reset_req);
}
status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPU_MPF_INT_ST3_MASK; status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPU_MPF_INT_ST3_MASK;
if (status) { if (status)
reset_level = hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3",
hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3", &hclge_ppu_mpf_abnormal_int_st3[0], status,
&hclge_ppu_mpf_abnormal_int_st3[0], &ae_dev->hw_err_reset_req);
status);
set_bit(reset_level, &ae_dev->hw_err_reset_req);
}
/* log TM(Traffic Manager) errors */ /* log TM(Traffic Manager) errors */
desc_data = (__le32 *)&desc[6]; desc_data = (__le32 *)&desc[6];
status = le32_to_cpu(*desc_data); status = le32_to_cpu(*desc_data);
if (status) { if (status)
reset_level = hclge_log_error(dev, "TM_SCH_RINT", hclge_log_error(dev, "TM_SCH_RINT",
&hclge_tm_sch_rint[0], status); &hclge_tm_sch_rint[0], status,
set_bit(reset_level, &ae_dev->hw_err_reset_req); &ae_dev->hw_err_reset_req);
}
/* log QCN(Quantized Congestion Control) errors */ /* log QCN(Quantized Congestion Control) errors */
desc_data = (__le32 *)&desc[7]; desc_data = (__le32 *)&desc[7];
status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK; status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK;
if (status) { if (status)
reset_level = hclge_log_error(dev, "QCN_FIFO_RINT", hclge_log_error(dev, "QCN_FIFO_RINT",
&hclge_qcn_fifo_rint[0], status); &hclge_qcn_fifo_rint[0], status,
set_bit(reset_level, &ae_dev->hw_err_reset_req); &ae_dev->hw_err_reset_req);
}
status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK; status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK;
if (status) { if (status)
reset_level = hclge_log_error(dev, "QCN_ECC_RINT", hclge_log_error(dev, "QCN_ECC_RINT",
&hclge_qcn_ecc_rint[0], &hclge_qcn_ecc_rint[0], status,
status); &ae_dev->hw_err_reset_req);
set_bit(reset_level, &ae_dev->hw_err_reset_req);
}
/* log NCSI errors */ /* log NCSI errors */
desc_data = (__le32 *)&desc[9]; desc_data = (__le32 *)&desc[9];
status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK; status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK;
if (status) { if (status)
reset_level = hclge_log_error(dev, "NCSI_ECC_INT_RPT", hclge_log_error(dev, "NCSI_ECC_INT_RPT",
&hclge_ncsi_err_int[0], status); &hclge_ncsi_err_int[0], status,
set_bit(reset_level, &ae_dev->hw_err_reset_req); &ae_dev->hw_err_reset_req);
}
/* clear all main PF RAS errors */ /* clear all main PF RAS errors */
hclge_cmd_reuse_desc(&desc[0], false); hclge_cmd_reuse_desc(&desc[0], false);
...@@ -1272,7 +1232,6 @@ static int hclge_handle_pf_ras_error(struct hclge_dev *hdev, ...@@ -1272,7 +1232,6 @@ static int hclge_handle_pf_ras_error(struct hclge_dev *hdev,
{ {
struct hnae3_ae_dev *ae_dev = hdev->ae_dev; struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
struct device *dev = &hdev->pdev->dev; struct device *dev = &hdev->pdev->dev;
enum hnae3_reset_type reset_level;
__le32 *desc_data; __le32 *desc_data;
u32 status; u32 status;
int ret; int ret;
...@@ -1288,48 +1247,38 @@ static int hclge_handle_pf_ras_error(struct hclge_dev *hdev, ...@@ -1288,48 +1247,38 @@ static int hclge_handle_pf_ras_error(struct hclge_dev *hdev,
/* log SSU(Storage Switch Unit) errors */ /* log SSU(Storage Switch Unit) errors */
status = le32_to_cpu(desc[0].data[0]); status = le32_to_cpu(desc[0].data[0]);
if (status) { if (status)
reset_level = hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
&hclge_ssu_port_based_err_int[0], &hclge_ssu_port_based_err_int[0], status,
status); &ae_dev->hw_err_reset_req);
set_bit(reset_level, &ae_dev->hw_err_reset_req);
}
status = le32_to_cpu(desc[0].data[1]); status = le32_to_cpu(desc[0].data[1]);
if (status) { if (status)
reset_level = hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT", hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT",
&hclge_ssu_fifo_overflow_int[0], &hclge_ssu_fifo_overflow_int[0], status,
status); &ae_dev->hw_err_reset_req);
set_bit(reset_level, &ae_dev->hw_err_reset_req);
}
status = le32_to_cpu(desc[0].data[2]); status = le32_to_cpu(desc[0].data[2]);
if (status) { if (status)
reset_level = hclge_log_error(dev, "SSU_ETS_TCG_INT", hclge_log_error(dev, "SSU_ETS_TCG_INT",
&hclge_ssu_ets_tcg_int[0], &hclge_ssu_ets_tcg_int[0], status,
status); &ae_dev->hw_err_reset_req);
set_bit(reset_level, &ae_dev->hw_err_reset_req);
}
/* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */ /* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */
desc_data = (__le32 *)&desc[1]; desc_data = (__le32 *)&desc[1];
status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK; status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK;
if (status) { if (status)
reset_level = hclge_log_error(dev, "IGU_EGU_TNL_INT_STS", hclge_log_error(dev, "IGU_EGU_TNL_INT_STS",
&hclge_igu_egu_tnl_int[0], &hclge_igu_egu_tnl_int[0], status,
status); &ae_dev->hw_err_reset_req);
set_bit(reset_level, &ae_dev->hw_err_reset_req);
}
/* log PPU(RCB) errors */ /* log PPU(RCB) errors */
desc_data = (__le32 *)&desc[3]; desc_data = (__le32 *)&desc[3];
status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_RAS_MASK; status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_RAS_MASK;
if (status) { if (status)
reset_level = hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0", hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0",
&hclge_ppu_pf_abnormal_int[0], &hclge_ppu_pf_abnormal_int[0], status,
status); &ae_dev->hw_err_reset_req);
set_bit(reset_level, &ae_dev->hw_err_reset_req);
}
/* clear all PF RAS errors */ /* clear all PF RAS errors */
hclge_cmd_reuse_desc(&desc[0], false); hclge_cmd_reuse_desc(&desc[0], false);
...@@ -1671,8 +1620,9 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev) ...@@ -1671,8 +1620,9 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev)
hclge_handle_rocee_ras_error(ae_dev); hclge_handle_rocee_ras_error(ae_dev);
} }
if (status & HCLGE_RAS_REG_NFE_MASK || if ((status & HCLGE_RAS_REG_NFE_MASK ||
status & HCLGE_RAS_REG_ROCEE_ERR_MASK) { status & HCLGE_RAS_REG_ROCEE_ERR_MASK) &&
ae_dev->hw_err_reset_req) {
ae_dev->override_pci_need_reset = 0; ae_dev->override_pci_need_reset = 0;
return PCI_ERS_RESULT_NEED_RESET; return PCI_ERS_RESULT_NEED_RESET;
} }
...@@ -1762,7 +1712,6 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev, ...@@ -1762,7 +1712,6 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
struct hclge_mac_tnl_stats mac_tnl_stats; struct hclge_mac_tnl_stats mac_tnl_stats;
struct device *dev = &hdev->pdev->dev; struct device *dev = &hdev->pdev->dev;
u32 mpf_bd_num, pf_bd_num, bd_num; u32 mpf_bd_num, pf_bd_num, bd_num;
enum hnae3_reset_type reset_level;
struct hclge_desc desc_bd; struct hclge_desc desc_bd;
struct hclge_desc *desc; struct hclge_desc *desc;
__le32 *desc_data; __le32 *desc_data;
...@@ -1800,24 +1749,19 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev, ...@@ -1800,24 +1749,19 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
/* log MAC errors */ /* log MAC errors */
desc_data = (__le32 *)&desc[1]; desc_data = (__le32 *)&desc[1];
status = le32_to_cpu(*desc_data); status = le32_to_cpu(*desc_data);
if (status) { if (status)
reset_level = hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R", hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R",
&hclge_mac_afifo_tnl_int[0], &hclge_mac_afifo_tnl_int[0], status,
status); reset_requests);
set_bit(reset_level, reset_requests);
}
/* log PPU(RCB) MPF errors */ /* log PPU(RCB) MPF errors */
desc_data = (__le32 *)&desc[5]; desc_data = (__le32 *)&desc[5];
status = le32_to_cpu(*(desc_data + 2)) & status = le32_to_cpu(*(desc_data + 2)) &
HCLGE_PPU_MPF_INT_ST2_MSIX_MASK; HCLGE_PPU_MPF_INT_ST2_MSIX_MASK;
if (status) { if (status)
reset_level = hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2", &hclge_ppu_mpf_abnormal_int_st2[0],
&hclge_ppu_mpf_abnormal_int_st2[0], status, reset_requests);
status);
set_bit(reset_level, reset_requests);
}
/* clear all main PF MSIx errors */ /* clear all main PF MSIx errors */
hclge_cmd_reuse_desc(&desc[0], false); hclge_cmd_reuse_desc(&desc[0], false);
...@@ -1841,32 +1785,26 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev, ...@@ -1841,32 +1785,26 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
/* log SSU PF errors */ /* log SSU PF errors */
status = le32_to_cpu(desc[0].data[0]) & HCLGE_SSU_PORT_INT_MSIX_MASK; status = le32_to_cpu(desc[0].data[0]) & HCLGE_SSU_PORT_INT_MSIX_MASK;
if (status) { if (status)
reset_level = hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
&hclge_ssu_port_based_pf_int[0], &hclge_ssu_port_based_pf_int[0],
status); status, reset_requests);
set_bit(reset_level, reset_requests);
}
/* read and log PPP PF errors */ /* read and log PPP PF errors */
desc_data = (__le32 *)&desc[2]; desc_data = (__le32 *)&desc[2];
status = le32_to_cpu(*desc_data); status = le32_to_cpu(*desc_data);
if (status) { if (status)
reset_level = hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0", hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0",
&hclge_ppp_pf_abnormal_int[0], &hclge_ppp_pf_abnormal_int[0],
status); status, reset_requests);
set_bit(reset_level, reset_requests);
}
/* log PPU(RCB) PF errors */ /* log PPU(RCB) PF errors */
desc_data = (__le32 *)&desc[3]; desc_data = (__le32 *)&desc[3];
status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK; status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK;
if (status) { if (status)
reset_level = hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST", hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST",
&hclge_ppu_pf_abnormal_int[0], &hclge_ppu_pf_abnormal_int[0],
status); status, reset_requests);
set_bit(reset_level, reset_requests);
}
status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_OVER_8BD_ERR_MASK; status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_OVER_8BD_ERR_MASK;
if (status) if (status)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment