Commit 1c753652 authored by David S. Miller's avatar David S. Miller

Merge branch 'hns3-RAS'

Guangbin Huang says:

====================
net: hns3: add RAS compatibility adaptation solution

This patchset adds RAS compatibility adaptation solution for new devices.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e0eb625a 1c360a4a
......@@ -91,6 +91,7 @@ enum HNAE3_DEV_CAP_BITS {
HNAE3_DEV_SUPPORT_STASH_B,
HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B,
HNAE3_DEV_SUPPORT_PAUSE_B,
HNAE3_DEV_SUPPORT_RAS_IMP_B,
HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B,
HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B,
HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B,
......@@ -129,6 +130,9 @@ enum HNAE3_DEV_CAP_BITS {
#define hnae3_dev_phy_imp_supported(hdev) \
test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, (hdev)->ae_dev->caps)
#define hnae3_dev_ras_imp_supported(hdev) \
test_bit(HNAE3_DEV_SUPPORT_RAS_IMP_B, (hdev)->ae_dev->caps)
#define hnae3_dev_tqp_txrx_indep_supported(hdev) \
test_bit(HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B, (hdev)->ae_dev->caps)
......
......@@ -349,6 +349,9 @@ static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
}, {
.name = "support imp-controlled PHY",
.cap_bit = HNAE3_DEV_SUPPORT_PHY_IMP_B,
}, {
.name = "support imp-controlled RAS",
.cap_bit = HNAE3_DEV_SUPPORT_RAS_IMP_B,
}, {
.name = "support rxd advanced layout",
.cap_bit = HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B,
......
......@@ -178,7 +178,8 @@ static bool hclge_is_special_opcode(u16 opcode)
HCLGE_QUERY_CLEAR_MPF_RAS_INT,
HCLGE_QUERY_CLEAR_PF_RAS_INT,
HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT,
HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT};
HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT,
HCLGE_QUERY_ALL_ERR_INFO};
int i;
for (i = 0; i < ARRAY_SIZE(spec_opcode); i++) {
......@@ -386,6 +387,8 @@ static void hclge_parse_capability(struct hclge_dev *hdev,
set_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps);
if (hnae3_get_bit(caps, HCLGE_CAP_PHY_IMP_B))
set_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps);
if (hnae3_get_bit(caps, HCLGE_CAP_RAS_IMP_B))
set_bit(HNAE3_DEV_SUPPORT_RAS_IMP_B, ae_dev->caps);
if (hnae3_get_bit(caps, HCLGE_CAP_RXD_ADV_LAYOUT_B))
set_bit(HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B, ae_dev->caps);
if (hnae3_get_bit(caps, HCLGE_CAP_PORT_VLAN_BYPASS_B)) {
......
......@@ -293,6 +293,8 @@ enum hclge_opcode_type {
HCLGE_QUERY_MSIX_INT_STS_BD_NUM = 0x1513,
HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT = 0x1514,
HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT = 0x1515,
HCLGE_QUERY_ALL_ERR_BD_NUM = 0x1516,
HCLGE_QUERY_ALL_ERR_INFO = 0x1517,
HCLGE_CONFIG_ROCEE_RAS_INT_EN = 0x1580,
HCLGE_QUERY_CLEAR_ROCEE_RAS_INT = 0x1581,
HCLGE_ROCEE_PF_RAS_INT_CMD = 0x1584,
......@@ -390,6 +392,7 @@ enum HCLGE_CAP_BITS {
HCLGE_CAP_HW_PAD_B,
HCLGE_CAP_STASH_B,
HCLGE_CAP_UDP_TUNNEL_CSUM_B,
HCLGE_CAP_RAS_IMP_B = 12,
HCLGE_CAP_FEC_B = 13,
HCLGE_CAP_PAUSE_B = 14,
HCLGE_CAP_RXD_ADV_LAYOUT_B = 15,
......
......@@ -15,6 +15,8 @@
#define HCLGE_RAS_PF_OTHER_INT_STS_REG 0x20B00
#define HCLGE_RAS_REG_NFE_MASK 0xFF00
#define HCLGE_RAS_REG_ROCEE_ERR_MASK 0x3000000
#define HCLGE_RAS_REG_ERR_MASK \
(HCLGE_RAS_REG_NFE_MASK | HCLGE_RAS_REG_ROCEE_ERR_MASK)
#define HCLGE_VECTOR0_REG_MSIX_MASK 0x1FF00
......@@ -107,6 +109,10 @@
#define HCLGE_ROCEE_OVF_ERR_INT_MASK 0x10000
#define HCLGE_ROCEE_OVF_ERR_TYPE_MASK 0x3F
#define HCLGE_DESC_DATA_MAX 8
#define HCLGE_REG_NUM_MAX 256
#define HCLGE_DESC_NO_DATA_LEN 8
enum hclge_err_int_type {
HCLGE_ERR_INT_MSIX = 0,
HCLGE_ERR_INT_RAS_CE = 1,
......@@ -114,6 +120,56 @@ enum hclge_err_int_type {
HCLGE_ERR_INT_RAS_FE = 3,
};
enum hclge_mod_name_list {
MODULE_NONE = 0,
MODULE_BIOS_COMMON = 1,
MODULE_GE = 2,
MODULE_IGU_EGU = 3,
MODULE_LGE = 4,
MODULE_NCSI = 5,
MODULE_PPP = 6,
MODULE_QCN = 7,
MODULE_RCB_RX = 8,
MODULE_RTC = 9,
MODULE_SSU = 10,
MODULE_TM = 11,
MODULE_RCB_TX = 12,
MODULE_TXDMA = 13,
MODULE_MASTER = 14,
/* add new MODULE NAME for NIC here in order */
MODULE_ROCEE_TOP = 40,
MODULE_ROCEE_TIMER = 41,
MODULE_ROCEE_MDB = 42,
MODULE_ROCEE_TSP = 43,
MODULE_ROCEE_TRP = 44,
MODULE_ROCEE_SCC = 45,
MODULE_ROCEE_CAEP = 46,
MODULE_ROCEE_GEN_AC = 47,
MODULE_ROCEE_QMM = 48,
MODULE_ROCEE_LSAN = 49,
/* add new MODULE NAME for RoCEE here in order */
};
enum hclge_err_type_list {
NONE_ERROR = 0,
FIFO_ERROR = 1,
MEMORY_ERROR = 2,
POISON_ERROR = 3,
MSIX_ECC_ERROR = 4,
TQP_INT_ECC_ERROR = 5,
PF_ABNORMAL_INT_ERROR = 6,
MPF_ABNORMAL_INT_ERROR = 7,
COMMON_ERROR = 8,
PORT_ERROR = 9,
ETS_ERROR = 10,
NCSI_ERROR = 11,
GLB_ERROR = 12,
/* add new ERROR TYPE for NIC here in order */
ROCEE_NORMAL_ERR = 40,
ROCEE_OVF_ERR = 41,
/* add new ERROR TYPE for ROCEE here in order */
};
struct hclge_hw_blk {
u32 msk;
const char *name;
......@@ -126,11 +182,44 @@ struct hclge_hw_error {
enum hnae3_reset_type reset_level;
};
struct hclge_hw_module_id {
enum hclge_mod_name_list module_id;
const char *msg;
};
struct hclge_hw_type_id {
enum hclge_err_type_list type_id;
const char *msg;
};
struct hclge_sum_err_info {
u8 reset_type;
u8 mod_num;
u8 rsv[2];
};
struct hclge_mod_err_info {
u8 mod_id;
u8 err_num;
u8 rsv[2];
};
struct hclge_type_reg_err_info {
u8 type_id;
u8 reg_num;
u8 rsv[2];
u32 hclge_reg[HCLGE_REG_NUM_MAX];
};
int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en);
int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state);
int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en);
void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev);
bool hclge_find_error_source(struct hclge_dev *hdev);
void hclge_handle_occurred_error(struct hclge_dev *hdev);
pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev);
int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
unsigned long *reset_requests);
int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev);
int hclge_handle_mac_tnl(struct hclge_dev *hdev);
#endif
......@@ -3307,11 +3307,13 @@ static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf,
static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
{
u32 cmdq_src_reg, msix_src_reg;
u32 cmdq_src_reg, msix_src_reg, hw_err_src_reg;
/* fetch the events from their corresponding regs */
cmdq_src_reg = hclge_read_dev(&hdev->hw, HCLGE_VECTOR0_CMDQ_SRC_REG);
msix_src_reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS);
hw_err_src_reg = hclge_read_dev(&hdev->hw,
HCLGE_RAS_PF_OTHER_INT_STS_REG);
/* Assumption: If by any chance reset and mailbox events are reported
* together then we will only process reset event in this go and will
......@@ -3339,11 +3341,10 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
return HCLGE_VECTOR0_EVENT_RST;
}
/* check for vector0 msix event source */
if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK) {
*clearval = msix_src_reg;
/* check for vector0 msix event and hardware error event source */
if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK ||
hw_err_src_reg & HCLGE_RAS_REG_ERR_MASK)
return HCLGE_VECTOR0_EVENT_ERR;
}
/* check for vector0 mailbox(=CMDQ RX) event source */
if (BIT(HCLGE_VECTOR0_RX_CMDQ_INT_B) & cmdq_src_reg) {
......@@ -3354,9 +3355,8 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
/* print other vector0 event source */
dev_info(&hdev->pdev->dev,
"CMDQ INT status:0x%x, other INT status:0x%x\n",
cmdq_src_reg, msix_src_reg);
*clearval = msix_src_reg;
"INT status: CMDQ(%#x) HW errors(%#x) other(%#x)\n",
cmdq_src_reg, hw_err_src_reg, msix_src_reg);
return HCLGE_VECTOR0_EVENT_OTHER;
}
......@@ -3427,15 +3427,10 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
hclge_clear_event_cause(hdev, event_cause, clearval);
/* Enable interrupt if it is not cause by reset. And when
* clearval equal to 0, it means interrupt status may be
* cleared by hardware before driver reads status register.
* For this case, vector0 interrupt also should be enabled.
*/
if (!clearval ||
event_cause == HCLGE_VECTOR0_EVENT_MBX) {
/* Enable interrupt if it is not caused by reset event or error event */
if (event_cause == HCLGE_VECTOR0_EVENT_MBX ||
event_cause == HCLGE_VECTOR0_EVENT_OTHER)
hclge_enable_vector(&hdev->misc_vector, true);
}
return IRQ_HANDLED;
}
......@@ -4240,6 +4235,38 @@ static void hclge_reset_subtask(struct hclge_dev *hdev)
hdev->reset_type = HNAE3_NONE_RESET;
}
static void hclge_handle_err_reset_request(struct hclge_dev *hdev)
{
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
enum hnae3_reset_type reset_type;
if (ae_dev->hw_err_reset_req) {
reset_type = hclge_get_reset_level(ae_dev,
&ae_dev->hw_err_reset_req);
hclge_set_def_reset_request(ae_dev, reset_type);
}
if (hdev->default_reset_request && ae_dev->ops->reset_event)
ae_dev->ops->reset_event(hdev->pdev, NULL);
/* enable interrupt after error handling complete */
hclge_enable_vector(&hdev->misc_vector, true);
}
static void hclge_handle_err_recovery(struct hclge_dev *hdev)
{
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
ae_dev->hw_err_reset_req = 0;
if (hclge_find_error_source(hdev)) {
hclge_handle_error_info_log(ae_dev);
hclge_handle_mac_tnl(hdev);
}
hclge_handle_err_reset_request(hdev);
}
static void hclge_misc_err_recovery(struct hclge_dev *hdev)
{
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
......@@ -4247,19 +4274,16 @@ static void hclge_misc_err_recovery(struct hclge_dev *hdev)
u32 msix_sts_reg;
msix_sts_reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS);
if (msix_sts_reg & HCLGE_VECTOR0_REG_MSIX_MASK) {
if (hclge_handle_hw_msix_error(hdev,
&hdev->default_reset_request))
if (hclge_handle_hw_msix_error
(hdev, &hdev->default_reset_request))
dev_info(dev, "received msix interrupt 0x%x\n",
msix_sts_reg);
if (hdev->default_reset_request)
if (ae_dev->ops->reset_event)
ae_dev->ops->reset_event(hdev->pdev, NULL);
}
hclge_enable_vector(&hdev->misc_vector, true);
hclge_handle_hw_ras_error(ae_dev);
hclge_handle_err_reset_request(hdev);
}
static void hclge_errhand_service_task(struct hclge_dev *hdev)
......@@ -4267,6 +4291,9 @@ static void hclge_errhand_service_task(struct hclge_dev *hdev)
if (!test_and_clear_bit(HCLGE_STATE_ERR_SERVICE_SCHED, &hdev->state))
return;
if (hnae3_dev_ras_imp_supported(hdev))
hclge_handle_err_recovery(hdev);
else
hclge_misc_err_recovery(hdev);
}
......@@ -11524,6 +11551,9 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
hclge_clear_resetting_state(hdev);
/* Log and clear the hw errors those already occurred */
if (hnae3_dev_ras_imp_supported(hdev))
hclge_handle_occurred_error(hdev);
else
hclge_handle_all_hns_hw_errors(ae_dev);
/* request delayed reset for the error recovery because an immediate
......@@ -11877,6 +11907,9 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
}
/* Log and clear the hw errors those already occurred */
if (hnae3_dev_ras_imp_supported(hdev))
hclge_handle_occurred_error(hdev);
else
hclge_handle_all_hns_hw_errors(ae_dev);
/* Re-enable the hw error interrupts because
......
......@@ -190,6 +190,7 @@ enum HLCGE_PORT_TYPE {
#define HCLGE_VECTOR0_IMP_RESET_INT_B 1
#define HCLGE_VECTOR0_IMP_CMDQ_ERR_B 4U
#define HCLGE_VECTOR0_IMP_RD_POISON_B 5U
#define HCLGE_VECTOR0_ALL_MSIX_ERR_B 6U
#define HCLGE_MAC_DEFAULT_FRAME \
(ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN + ETH_DATA_LEN)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment