summary refs log tree commit diff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-12-07 15:57:02 -0800
committerDavid S. Miller <davem@davemloft.net>2018-12-07 15:57:02 -0800
commit12edfdfc79860f42fa493f81518e040376b6a5bc (patch)
tree4deed43287cdaa618d43380586064b51b8e28306
parent9f4c2cffd08c102107992cd01c12c4a53475baf6 (diff)
parent630ba007f4750722bc56dfebfaf1c0316c2fcb69 (diff)
downloadlinux-12edfdfc79860f42fa493f81518e040376b6a5bc.tar.gz
Merge branch 'hns3-error-handling'
Salil Mehta says:

====================
net: hns3: Additions/optimizations related to HNS3 H/W err handling

This patch set primarily does following addtions and optimizations
related to error handling in HNS3 Ethernet driver:

 1. Name changes for enable and process functions and minor loop
    optimizations. [PATCH 1-6]
 2. Modify query and clearing of RAS errors using new set of commands
    because modules specific commands for clearing RCB PPP PF, SSU are
    obselete. [PATCH 7]
 3. Deletes logging 1-bit errors for RAS in HNS3 driver as these never
    get reported to the driver. [PATCH 8]
 4. Add handling of NIC hw errors reported through MSIx rather than
    PCIe AER channel. [PATCH 9]
 5. Add handling for the HW RAS and MSIx errors in the modules MAC, PPP
    PF, MSIx SRAM, RCB and SSU. [PATCH 10-13]
 6. Add handling of RoCEE RAS errors. [PATCH 14]
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h3
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h27
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c1553
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h79
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c55
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h1
7 files changed, 1066 insertions, 656 deletions
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index a1707b77c47f..294e725c7c44 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -136,6 +136,7 @@ enum hnae3_reset_type {
 	HNAE3_CORE_RESET,
 	HNAE3_GLOBAL_RESET,
 	HNAE3_IMP_RESET,
+	HNAE3_UNKNOWN_RESET,
 	HNAE3_NONE_RESET,
 };
 
@@ -454,7 +455,7 @@ struct hnae3_ae_ops {
 	int (*restore_fd_rules)(struct hnae3_handle *handle);
 	void (*enable_fd)(struct hnae3_handle *handle, bool enable);
 	int (*dbg_run_cmd)(struct hnae3_handle *handle, char *cmd_buf);
-	pci_ers_result_t (*process_hw_error)(struct hnae3_ae_dev *ae_dev);
+	pci_ers_result_t (*handle_hw_ras_error)(struct hnae3_ae_dev *ae_dev);
 	bool (*get_hw_reset_stat)(struct hnae3_handle *handle);
 	bool (*ae_dev_resetting)(struct hnae3_handle *handle);
 	unsigned long (*ae_dev_reset_cnt)(struct hnae3_handle *handle);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index d1b2de2ecc89..69142a381064 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1828,8 +1828,8 @@ static pci_ers_result_t hns3_error_detected(struct pci_dev *pdev,
 		return PCI_ERS_RESULT_NONE;
 	}
 
-	if (ae_dev->ops->process_hw_error)
-		ret = ae_dev->ops->process_hw_error(ae_dev);
+	if (ae_dev->ops->handle_hw_ras_error)
+		ret = ae_dev->ops->handle_hw_ras_error(ae_dev);
 	else
 		return PCI_ERS_RESULT_NONE;
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index e1805b972628..b1ee6feb0caf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -215,26 +215,29 @@ enum hclge_opcode_type {
 	HCLGE_OPC_SFP_GET_SPEED		= 0x7104,
 
 	/* Error INT commands */
+	HCLGE_MAC_COMMON_INT_EN		= 0x030E,
 	HCLGE_TM_SCH_ECC_INT_EN		= 0x0829,
-	HCLGE_TM_SCH_ECC_ERR_RINT_CMD	= 0x082d,
-	HCLGE_TM_SCH_ECC_ERR_RINT_CE	= 0x082f,
-	HCLGE_TM_SCH_ECC_ERR_RINT_NFE	= 0x0830,
-	HCLGE_TM_SCH_ECC_ERR_RINT_FE	= 0x0831,
-	HCLGE_TM_SCH_MBIT_ECC_INFO_CMD	= 0x0833,
+	HCLGE_SSU_ECC_INT_CMD		= 0x0989,
+	HCLGE_SSU_COMMON_INT_CMD	= 0x098C,
+	HCLGE_PPU_MPF_ECC_INT_CMD	= 0x0B40,
+	HCLGE_PPU_MPF_OTHER_INT_CMD	= 0x0B41,
+	HCLGE_PPU_PF_OTHER_INT_CMD	= 0x0B42,
 	HCLGE_COMMON_ECC_INT_CFG	= 0x1505,
-	HCLGE_IGU_EGU_TNL_INT_QUERY	= 0x1802,
+	HCLGE_QUERY_RAS_INT_STS_BD_NUM	= 0x1510,
+	HCLGE_QUERY_CLEAR_MPF_RAS_INT	= 0x1511,
+	HCLGE_QUERY_CLEAR_PF_RAS_INT	= 0x1512,
+	HCLGE_QUERY_MSIX_INT_STS_BD_NUM	= 0x1513,
+	HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT	= 0x1514,
+	HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT	= 0x1515,
+	HCLGE_CONFIG_ROCEE_RAS_INT_EN	= 0x1580,
+	HCLGE_QUERY_CLEAR_ROCEE_RAS_INT = 0x1581,
+	HCLGE_ROCEE_PF_RAS_INT_CMD	= 0x1584,
 	HCLGE_IGU_EGU_TNL_INT_EN	= 0x1803,
-	HCLGE_IGU_EGU_TNL_INT_CLR	= 0x1804,
-	HCLGE_IGU_COMMON_INT_QUERY	= 0x1805,
 	HCLGE_IGU_COMMON_INT_EN		= 0x1806,
-	HCLGE_IGU_COMMON_INT_CLR	= 0x1807,
 	HCLGE_TM_QCN_MEM_INT_CFG	= 0x1A14,
-	HCLGE_TM_QCN_MEM_INT_INFO_CMD	= 0x1A17,
 	HCLGE_PPP_CMD0_INT_CMD		= 0x2100,
 	HCLGE_PPP_CMD1_INT_CMD		= 0x2101,
-	HCLGE_NCSI_INT_QUERY		= 0x2400,
 	HCLGE_NCSI_INT_EN		= 0x2401,
-	HCLGE_NCSI_INT_CLR		= 0x2402,
 };
 
 #define HCLGE_TQP_REG_OFFSET		0x80000
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index 6da9e22d82d0..77deea0beeba 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -4,78 +4,39 @@
 #include "hclge_err.h"
 
 static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = {
-	{ .int_msk = BIT(0), .msg = "imp_itcm0_ecc_1bit_err" },
 	{ .int_msk = BIT(1), .msg = "imp_itcm0_ecc_mbit_err" },
-	{ .int_msk = BIT(2), .msg = "imp_itcm1_ecc_1bit_err" },
 	{ .int_msk = BIT(3), .msg = "imp_itcm1_ecc_mbit_err" },
-	{ .int_msk = BIT(4), .msg = "imp_itcm2_ecc_1bit_err" },
 	{ .int_msk = BIT(5), .msg = "imp_itcm2_ecc_mbit_err" },
-	{ .int_msk = BIT(6), .msg = "imp_itcm3_ecc_1bit_err" },
 	{ .int_msk = BIT(7), .msg = "imp_itcm3_ecc_mbit_err" },
-	{ .int_msk = BIT(8), .msg = "imp_dtcm0_mem0_ecc_1bit_err" },
 	{ .int_msk = BIT(9), .msg = "imp_dtcm0_mem0_ecc_mbit_err" },
-	{ .int_msk = BIT(10), .msg = "imp_dtcm0_mem1_ecc_1bit_err" },
 	{ .int_msk = BIT(11), .msg = "imp_dtcm0_mem1_ecc_mbit_err" },
-	{ .int_msk = BIT(12), .msg = "imp_dtcm1_mem0_ecc_1bit_err" },
 	{ .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err" },
-	{ .int_msk = BIT(14), .msg = "imp_dtcm1_mem1_ecc_1bit_err" },
 	{ .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err" },
-	{ /* sentinel */ }
-};
-
-static const struct hclge_hw_error hclge_imp_itcm4_ecc_int[] = {
-	{ .int_msk = BIT(0), .msg = "imp_itcm4_ecc_1bit_err" },
-	{ .int_msk = BIT(1), .msg = "imp_itcm4_ecc_mbit_err" },
+	{ .int_msk = BIT(17), .msg = "imp_itcm4_ecc_mbit_err" },
 	{ /* sentinel */ }
 };
 
 static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = {
-	{ .int_msk = BIT(0), .msg = "cmdq_nic_rx_depth_ecc_1bit_err" },
 	{ .int_msk = BIT(1), .msg = "cmdq_nic_rx_depth_ecc_mbit_err" },
-	{ .int_msk = BIT(2), .msg = "cmdq_nic_tx_depth_ecc_1bit_err" },
 	{ .int_msk = BIT(3), .msg = "cmdq_nic_tx_depth_ecc_mbit_err" },
-	{ .int_msk = BIT(4), .msg = "cmdq_nic_rx_tail_ecc_1bit_err" },
 	{ .int_msk = BIT(5), .msg = "cmdq_nic_rx_tail_ecc_mbit_err" },
-	{ .int_msk = BIT(6), .msg = "cmdq_nic_tx_tail_ecc_1bit_err" },
 	{ .int_msk = BIT(7), .msg = "cmdq_nic_tx_tail_ecc_mbit_err" },
-	{ .int_msk = BIT(8), .msg = "cmdq_nic_rx_head_ecc_1bit_err" },
 	{ .int_msk = BIT(9), .msg = "cmdq_nic_rx_head_ecc_mbit_err" },
-	{ .int_msk = BIT(10), .msg = "cmdq_nic_tx_head_ecc_1bit_err" },
 	{ .int_msk = BIT(11), .msg = "cmdq_nic_tx_head_ecc_mbit_err" },
-	{ .int_msk = BIT(12), .msg = "cmdq_nic_rx_addr_ecc_1bit_err" },
 	{ .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err" },
-	{ .int_msk = BIT(14), .msg = "cmdq_nic_tx_addr_ecc_1bit_err" },
 	{ .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err" },
-	{ /* sentinel */ }
-};
-
-static const struct hclge_hw_error hclge_cmdq_rocee_mem_ecc_int[] = {
-	{ .int_msk = BIT(0), .msg = "cmdq_rocee_rx_depth_ecc_1bit_err" },
-	{ .int_msk = BIT(1), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err" },
-	{ .int_msk = BIT(2), .msg = "cmdq_rocee_tx_depth_ecc_1bit_err" },
-	{ .int_msk = BIT(3), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err" },
-	{ .int_msk = BIT(4), .msg = "cmdq_rocee_rx_tail_ecc_1bit_err" },
-	{ .int_msk = BIT(5), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err" },
-	{ .int_msk = BIT(6), .msg = "cmdq_rocee_tx_tail_ecc_1bit_err" },
-	{ .int_msk = BIT(7), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err" },
-	{ .int_msk = BIT(8), .msg = "cmdq_rocee_rx_head_ecc_1bit_err" },
-	{ .int_msk = BIT(9), .msg = "cmdq_rocee_rx_head_ecc_mbit_err" },
-	{ .int_msk = BIT(10), .msg = "cmdq_rocee_tx_head_ecc_1bit_err" },
-	{ .int_msk = BIT(11), .msg = "cmdq_rocee_tx_head_ecc_mbit_err" },
-	{ .int_msk = BIT(12), .msg = "cmdq_rocee_rx_addr_ecc_1bit_err" },
-	{ .int_msk = BIT(13), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err" },
-	{ .int_msk = BIT(14), .msg = "cmdq_rocee_tx_addr_ecc_1bit_err" },
-	{ .int_msk = BIT(15), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err" },
+	{ .int_msk = BIT(17), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err" },
+	{ .int_msk = BIT(19), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err" },
+	{ .int_msk = BIT(21), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err" },
+	{ .int_msk = BIT(23), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err" },
+	{ .int_msk = BIT(25), .msg = "cmdq_rocee_rx_head_ecc_mbit_err" },
+	{ .int_msk = BIT(27), .msg = "cmdq_rocee_tx_head_ecc_mbit_err" },
+	{ .int_msk = BIT(29), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err" },
+	{ .int_msk = BIT(31), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err" },
 	{ /* sentinel */ }
 };
 
 static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = {
-	{ .int_msk = BIT(0), .msg = "tqp_int_cfg_even_ecc_1bit_err" },
-	{ .int_msk = BIT(1), .msg = "tqp_int_cfg_odd_ecc_1bit_err" },
-	{ .int_msk = BIT(2), .msg = "tqp_int_ctrl_even_ecc_1bit_err" },
-	{ .int_msk = BIT(3), .msg = "tqp_int_ctrl_odd_ecc_1bit_err" },
-	{ .int_msk = BIT(4), .msg = "tx_que_scan_int_ecc_1bit_err" },
-	{ .int_msk = BIT(5), .msg = "rx_que_scan_int_ecc_1bit_err" },
 	{ .int_msk = BIT(6), .msg = "tqp_int_cfg_even_ecc_mbit_err" },
 	{ .int_msk = BIT(7), .msg = "tqp_int_cfg_odd_ecc_mbit_err" },
 	{ .int_msk = BIT(8), .msg = "tqp_int_ctrl_even_ecc_mbit_err" },
@@ -85,15 +46,19 @@ static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = {
 	{ /* sentinel */ }
 };
 
-static const struct hclge_hw_error hclge_igu_com_err_int[] = {
+static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = {
+	{ .int_msk = BIT(1), .msg = "msix_nic_ecc_mbit_err" },
+	{ .int_msk = BIT(3), .msg = "msix_rocee_ecc_mbit_err" },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_igu_int[] = {
 	{ .int_msk = BIT(0), .msg = "igu_rx_buf0_ecc_mbit_err" },
-	{ .int_msk = BIT(1), .msg = "igu_rx_buf0_ecc_1bit_err" },
 	{ .int_msk = BIT(2), .msg = "igu_rx_buf1_ecc_mbit_err" },
-	{ .int_msk = BIT(3), .msg = "igu_rx_buf1_ecc_1bit_err" },
 	{ /* sentinel */ }
 };
 
-static const struct hclge_hw_error hclge_igu_egu_tnl_err_int[] = {
+static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = {
 	{ .int_msk = BIT(0), .msg = "rx_buf_overflow" },
 	{ .int_msk = BIT(1), .msg = "rx_stp_fifo_overflow" },
 	{ .int_msk = BIT(2), .msg = "rx_stp_fifo_undeflow" },
@@ -104,51 +69,11 @@ static const struct hclge_hw_error hclge_igu_egu_tnl_err_int[] = {
 };
 
 static const struct hclge_hw_error hclge_ncsi_err_int[] = {
-	{ .int_msk = BIT(0), .msg = "ncsi_tx_ecc_1bit_err" },
 	{ .int_msk = BIT(1), .msg = "ncsi_tx_ecc_mbit_err" },
 	{ /* sentinel */ }
 };
 
-static const struct hclge_hw_error hclge_ppp_mpf_int0[] = {
-	{ .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_1bit_err" },
-	{ .int_msk = BIT(3), .msg = "umv_key_mem1_ecc_1bit_err" },
-	{ .int_msk = BIT(4), .msg = "umv_key_mem2_ecc_1bit_err" },
-	{ .int_msk = BIT(5), .msg = "umv_key_mem3_ecc_1bit_err" },
-	{ .int_msk = BIT(6), .msg = "umv_ad_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(7), .msg = "rss_tc_mode_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(8), .msg = "rss_idt_mem0_ecc_1bit_err" },
-	{ .int_msk = BIT(9), .msg = "rss_idt_mem1_ecc_1bit_err" },
-	{ .int_msk = BIT(10), .msg = "rss_idt_mem2_ecc_1bit_err" },
-	{ .int_msk = BIT(11), .msg = "rss_idt_mem3_ecc_1bit_err" },
-	{ .int_msk = BIT(12), .msg = "rss_idt_mem4_ecc_1bit_err" },
-	{ .int_msk = BIT(13), .msg = "rss_idt_mem5_ecc_1bit_err" },
-	{ .int_msk = BIT(14), .msg = "rss_idt_mem6_ecc_1bit_err" },
-	{ .int_msk = BIT(15), .msg = "rss_idt_mem7_ecc_1bit_err" },
-	{ .int_msk = BIT(16), .msg = "rss_idt_mem8_ecc_1bit_err" },
-	{ .int_msk = BIT(17), .msg = "rss_idt_mem9_ecc_1bit_err" },
-	{ .int_msk = BIT(18), .msg = "rss_idt_mem10_ecc_1bit_err" },
-	{ .int_msk = BIT(19), .msg = "rss_idt_mem11_ecc_1bit_err" },
-	{ .int_msk = BIT(20), .msg = "rss_idt_mem12_ecc_1bit_err" },
-	{ .int_msk = BIT(21), .msg = "rss_idt_mem13_ecc_1bit_err" },
-	{ .int_msk = BIT(22), .msg = "rss_idt_mem14_ecc_1bit_err" },
-	{ .int_msk = BIT(23), .msg = "rss_idt_mem15_ecc_1bit_err" },
-	{ .int_msk = BIT(24), .msg = "port_vlan_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(25), .msg = "mcast_linear_table_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(26), .msg = "mcast_result_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(27),
-		.msg = "flow_director_ad_mem0_ecc_1bit_err" },
-	{ .int_msk = BIT(28),
-		.msg = "flow_director_ad_mem1_ecc_1bit_err" },
-	{ .int_msk = BIT(29),
-		.msg = "rx_vlan_tag_memory_ecc_1bit_err" },
-	{ .int_msk = BIT(30),
-		.msg = "Tx_UP_mapping_config_mem_ecc_1bit_err" },
-	{ /* sentinel */ }
-};
-
-static const struct hclge_hw_error hclge_ppp_mpf_int1[] = {
+static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st1[] = {
 	{ .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_mbit_err" },
 	{ .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_mbit_err" },
 	{ .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_mbit_err" },
@@ -187,23 +112,13 @@ static const struct hclge_hw_error hclge_ppp_mpf_int1[] = {
 	{ /* sentinel */ }
 };
 
-static const struct hclge_hw_error hclge_ppp_pf_int[] = {
-	{ .int_msk = BIT(0), .msg = "Tx_vlan_tag_err" },
+static const struct hclge_hw_error hclge_ppp_pf_abnormal_int[] = {
+	{ .int_msk = BIT(0), .msg = "tx_vlan_tag_err" },
 	{ .int_msk = BIT(1), .msg = "rss_list_tc_unassigned_queue_err" },
 	{ /* sentinel */ }
 };
 
-static const struct hclge_hw_error hclge_ppp_mpf_int2[] = {
-	{ .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_1bit_err" },
-	{ .int_msk = BIT(3), .msg = "FD_CN0_memory_ecc_1bit_err" },
-	{ .int_msk = BIT(4), .msg = "FD_CN1_memory_ecc_1bit_err" },
-	{ .int_msk = BIT(5), .msg = "GRO_AD_memory_ecc_1bit_err" },
-	{ /* sentinel */ }
-};
-
-static const struct hclge_hw_error hclge_ppp_mpf_int3[] = {
+static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st3[] = {
 	{ .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_mbit_err" },
 	{ .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_mbit_err" },
 	{ .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_mbit_err" },
@@ -213,145 +128,248 @@ static const struct hclge_hw_error hclge_ppp_mpf_int3[] = {
 	{ /* sentinel */ }
 };
 
-struct hclge_tm_sch_ecc_info {
-	const char *name;
-};
-
-static const struct hclge_tm_sch_ecc_info hclge_tm_sch_ecc_err[7][15] = {
-	{
-		{ .name = "QSET_QUEUE_CTRL:PRI_LEN TAB" },
-		{ .name = "QSET_QUEUE_CTRL:SPA_LEN TAB" },
-		{ .name = "QSET_QUEUE_CTRL:SPB_LEN TAB" },
-		{ .name = "QSET_QUEUE_CTRL:WRRA_LEN TAB" },
-		{ .name = "QSET_QUEUE_CTRL:WRRB_LEN TAB" },
-		{ .name = "QSET_QUEUE_CTRL:SPA_HPTR TAB" },
-		{ .name = "QSET_QUEUE_CTRL:SPB_HPTR TAB" },
-		{ .name = "QSET_QUEUE_CTRL:WRRA_HPTR TAB" },
-		{ .name = "QSET_QUEUE_CTRL:WRRB_HPTR TAB" },
-		{ .name = "QSET_QUEUE_CTRL:QS_LINKLIST TAB" },
-		{ .name = "QSET_QUEUE_CTRL:SPA_TPTR TAB" },
-		{ .name = "QSET_QUEUE_CTRL:SPB_TPTR TAB" },
-		{ .name = "QSET_QUEUE_CTRL:WRRA_TPTR TAB" },
-		{ .name = "QSET_QUEUE_CTRL:WRRB_TPTR TAB" },
-		{ .name = "QSET_QUEUE_CTRL:QS_DEFICITCNT TAB" },
-	},
-	{
-		{ .name = "ROCE_QUEUE_CTRL:QS_LEN TAB" },
-		{ .name = "ROCE_QUEUE_CTRL:QS_TPTR TAB" },
-		{ .name = "ROCE_QUEUE_CTRL:QS_HPTR TAB" },
-		{ .name = "ROCE_QUEUE_CTRL:QLINKLIST TAB" },
-		{ .name = "ROCE_QUEUE_CTRL:QCLEN TAB" },
-	},
-	{
-		{ .name = "NIC_QUEUE_CTRL:QS_LEN TAB" },
-		{ .name = "NIC_QUEUE_CTRL:QS_TPTR TAB" },
-		{ .name = "NIC_QUEUE_CTRL:QS_HPTR TAB" },
-		{ .name = "NIC_QUEUE_CTRL:QLINKLIST TAB" },
-		{ .name = "NIC_QUEUE_CTRL:QCLEN TAB" },
-	},
-	{
-		{ .name = "RAM_CFG_CTRL:CSHAP TAB" },
-		{ .name = "RAM_CFG_CTRL:PSHAP TAB" },
-	},
-	{
-		{ .name = "SHAPER_CTRL:PSHAP TAB" },
-	},
-	{
-		{ .name = "MSCH_CTRL" },
-	},
-	{
-		{ .name = "TOP_CTRL" },
-	},
-};
-
-static const struct hclge_hw_error hclge_tm_sch_err_int[] = {
-	{ .int_msk = BIT(0), .msg = "tm_sch_ecc_1bit_err" },
+static const struct hclge_hw_error hclge_tm_sch_rint[] = {
 	{ .int_msk = BIT(1), .msg = "tm_sch_ecc_mbit_err" },
-	{ .int_msk = BIT(2), .msg = "tm_sch_port_shap_sub_fifo_wr_full_err" },
-	{ .int_msk = BIT(3), .msg = "tm_sch_port_shap_sub_fifo_rd_empty_err" },
-	{ .int_msk = BIT(4), .msg = "tm_sch_pg_pshap_sub_fifo_wr_full_err" },
-	{ .int_msk = BIT(5), .msg = "tm_sch_pg_pshap_sub_fifo_rd_empty_err" },
-	{ .int_msk = BIT(6), .msg = "tm_sch_pg_cshap_sub_fifo_wr_full_err" },
-	{ .int_msk = BIT(7), .msg = "tm_sch_pg_cshap_sub_fifo_rd_empty_err" },
-	{ .int_msk = BIT(8), .msg = "tm_sch_pri_pshap_sub_fifo_wr_full_err" },
-	{ .int_msk = BIT(9), .msg = "tm_sch_pri_pshap_sub_fifo_rd_empty_err" },
-	{ .int_msk = BIT(10), .msg = "tm_sch_pri_cshap_sub_fifo_wr_full_err" },
-	{ .int_msk = BIT(11), .msg = "tm_sch_pri_cshap_sub_fifo_rd_empty_err" },
+	{ .int_msk = BIT(2), .msg = "tm_sch_port_shap_sub_fifo_wr_err" },
+	{ .int_msk = BIT(3), .msg = "tm_sch_port_shap_sub_fifo_rd_err" },
+	{ .int_msk = BIT(4), .msg = "tm_sch_pg_pshap_sub_fifo_wr_err" },
+	{ .int_msk = BIT(5), .msg = "tm_sch_pg_pshap_sub_fifo_rd_err" },
+	{ .int_msk = BIT(6), .msg = "tm_sch_pg_cshap_sub_fifo_wr_err" },
+	{ .int_msk = BIT(7), .msg = "tm_sch_pg_cshap_sub_fifo_rd_err" },
+	{ .int_msk = BIT(8), .msg = "tm_sch_pri_pshap_sub_fifo_wr_err" },
+	{ .int_msk = BIT(9), .msg = "tm_sch_pri_pshap_sub_fifo_rd_err" },
+	{ .int_msk = BIT(10), .msg = "tm_sch_pri_cshap_sub_fifo_wr_err" },
+	{ .int_msk = BIT(11), .msg = "tm_sch_pri_cshap_sub_fifo_rd_err" },
 	{ .int_msk = BIT(12),
-	  .msg = "tm_sch_port_shap_offset_fifo_wr_full_err" },
+	  .msg = "tm_sch_port_shap_offset_fifo_wr_err" },
 	{ .int_msk = BIT(13),
-	  .msg = "tm_sch_port_shap_offset_fifo_rd_empty_err" },
+	  .msg = "tm_sch_port_shap_offset_fifo_rd_err" },
 	{ .int_msk = BIT(14),
-	  .msg = "tm_sch_pg_pshap_offset_fifo_wr_full_err" },
+	  .msg = "tm_sch_pg_pshap_offset_fifo_wr_err" },
 	{ .int_msk = BIT(15),
-	  .msg = "tm_sch_pg_pshap_offset_fifo_rd_empty_err" },
+	  .msg = "tm_sch_pg_pshap_offset_fifo_rd_err" },
 	{ .int_msk = BIT(16),
-	  .msg = "tm_sch_pg_cshap_offset_fifo_wr_full_err" },
+	  .msg = "tm_sch_pg_cshap_offset_fifo_wr_err" },
 	{ .int_msk = BIT(17),
-	  .msg = "tm_sch_pg_cshap_offset_fifo_rd_empty_err" },
+	  .msg = "tm_sch_pg_cshap_offset_fifo_rd_err" },
 	{ .int_msk = BIT(18),
-	  .msg = "tm_sch_pri_pshap_offset_fifo_wr_full_err" },
+	  .msg = "tm_sch_pri_pshap_offset_fifo_wr_err" },
 	{ .int_msk = BIT(19),
-	  .msg = "tm_sch_pri_pshap_offset_fifo_rd_empty_err" },
+	  .msg = "tm_sch_pri_pshap_offset_fifo_rd_err" },
 	{ .int_msk = BIT(20),
-	  .msg = "tm_sch_pri_cshap_offset_fifo_wr_full_err" },
+	  .msg = "tm_sch_pri_cshap_offset_fifo_wr_err" },
 	{ .int_msk = BIT(21),
-	  .msg = "tm_sch_pri_cshap_offset_fifo_rd_empty_err" },
-	{ .int_msk = BIT(22), .msg = "tm_sch_rq_fifo_wr_full_err" },
-	{ .int_msk = BIT(23), .msg = "tm_sch_rq_fifo_rd_empty_err" },
-	{ .int_msk = BIT(24), .msg = "tm_sch_nq_fifo_wr_full_err" },
-	{ .int_msk = BIT(25), .msg = "tm_sch_nq_fifo_rd_empty_err" },
-	{ .int_msk = BIT(26), .msg = "tm_sch_roce_up_fifo_wr_full_err" },
-	{ .int_msk = BIT(27), .msg = "tm_sch_roce_up_fifo_rd_empty_err" },
-	{ .int_msk = BIT(28), .msg = "tm_sch_rcb_byte_fifo_wr_full_err" },
-	{ .int_msk = BIT(29), .msg = "tm_sch_rcb_byte_fifo_rd_empty_err" },
-	{ .int_msk = BIT(30), .msg = "tm_sch_ssu_byte_fifo_wr_full_err" },
-	{ .int_msk = BIT(31), .msg = "tm_sch_ssu_byte_fifo_rd_empty_err" },
+	  .msg = "tm_sch_pri_cshap_offset_fifo_rd_err" },
+	{ .int_msk = BIT(22), .msg = "tm_sch_rq_fifo_wr_err" },
+	{ .int_msk = BIT(23), .msg = "tm_sch_rq_fifo_rd_err" },
+	{ .int_msk = BIT(24), .msg = "tm_sch_nq_fifo_wr_err" },
+	{ .int_msk = BIT(25), .msg = "tm_sch_nq_fifo_rd_err" },
+	{ .int_msk = BIT(26), .msg = "tm_sch_roce_up_fifo_wr_err" },
+	{ .int_msk = BIT(27), .msg = "tm_sch_roce_up_fifo_rd_err" },
+	{ .int_msk = BIT(28), .msg = "tm_sch_rcb_byte_fifo_wr_err" },
+	{ .int_msk = BIT(29), .msg = "tm_sch_rcb_byte_fifo_rd_err" },
+	{ .int_msk = BIT(30), .msg = "tm_sch_ssu_byte_fifo_wr_err" },
+	{ .int_msk = BIT(31), .msg = "tm_sch_ssu_byte_fifo_rd_err" },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_qcn_fifo_rint[] = {
+	{ .int_msk = BIT(0), .msg = "qcn_shap_gp0_sch_fifo_rd_err" },
+	{ .int_msk = BIT(1), .msg = "qcn_shap_gp0_sch_fifo_wr_err" },
+	{ .int_msk = BIT(2), .msg = "qcn_shap_gp1_sch_fifo_rd_err" },
+	{ .int_msk = BIT(3), .msg = "qcn_shap_gp1_sch_fifo_wr_err" },
+	{ .int_msk = BIT(4), .msg = "qcn_shap_gp2_sch_fifo_rd_err" },
+	{ .int_msk = BIT(5), .msg = "qcn_shap_gp2_sch_fifo_wr_err" },
+	{ .int_msk = BIT(6), .msg = "qcn_shap_gp3_sch_fifo_rd_err" },
+	{ .int_msk = BIT(7), .msg = "qcn_shap_gp3_sch_fifo_wr_err" },
+	{ .int_msk = BIT(8), .msg = "qcn_shap_gp0_offset_fifo_rd_err" },
+	{ .int_msk = BIT(9), .msg = "qcn_shap_gp0_offser_fifo_wr_err" },
+	{ .int_msk = BIT(10), .msg = "qcn_shap_gp1_offset_fifo_rd_err" },
+	{ .int_msk = BIT(11), .msg = "qcn_shap_gp1_offset_fifo_wr_err" },
+	{ .int_msk = BIT(12), .msg = "qcn_shap_gp2_offset_fifo_rd_err" },
+	{ .int_msk = BIT(13), .msg = "qcn_shap_gp2_offset_fifo_wr_err" },
+	{ .int_msk = BIT(14), .msg = "qcn_shap_gp3_offset_fifo_rd_err" },
+	{ .int_msk = BIT(15), .msg = "qcn_shap_gp3_offset_fifo_wr_err" },
+	{ .int_msk = BIT(16), .msg = "qcn_byte_info_fifo_rd_err" },
+	{ .int_msk = BIT(17), .msg = "qcn_byte_info_fifo_wr_err" },
 	{ /* sentinel */ }
 };
 
-static const struct hclge_hw_error hclge_qcn_ecc_err_int[] = {
-	{ .int_msk = BIT(0), .msg = "qcn_byte_mem_ecc_1bit_err" },
+static const struct hclge_hw_error hclge_qcn_ecc_rint[] = {
 	{ .int_msk = BIT(1), .msg = "qcn_byte_mem_ecc_mbit_err" },
-	{ .int_msk = BIT(2), .msg = "qcn_time_mem_ecc_1bit_err" },
 	{ .int_msk = BIT(3), .msg = "qcn_time_mem_ecc_mbit_err" },
-	{ .int_msk = BIT(4), .msg = "qcn_fb_mem_ecc_1bit_err" },
 	{ .int_msk = BIT(5), .msg = "qcn_fb_mem_ecc_mbit_err" },
-	{ .int_msk = BIT(6), .msg = "qcn_link_mem_ecc_1bit_err" },
 	{ .int_msk = BIT(7), .msg = "qcn_link_mem_ecc_mbit_err" },
-	{ .int_msk = BIT(8), .msg = "qcn_rate_mem_ecc_1bit_err" },
 	{ .int_msk = BIT(9), .msg = "qcn_rate_mem_ecc_mbit_err" },
-	{ .int_msk = BIT(10), .msg = "qcn_tmplt_mem_ecc_1bit_err" },
 	{ .int_msk = BIT(11), .msg = "qcn_tmplt_mem_ecc_mbit_err" },
-	{ .int_msk = BIT(12), .msg = "qcn_shap_cfg_mem_ecc_1bit_err" },
 	{ .int_msk = BIT(13), .msg = "qcn_shap_cfg_mem_ecc_mbit_err" },
-	{ .int_msk = BIT(14), .msg = "qcn_gp0_barrel_mem_ecc_1bit_err" },
 	{ .int_msk = BIT(15), .msg = "qcn_gp0_barrel_mem_ecc_mbit_err" },
-	{ .int_msk = BIT(16), .msg = "qcn_gp1_barrel_mem_ecc_1bit_err" },
 	{ .int_msk = BIT(17), .msg = "qcn_gp1_barrel_mem_ecc_mbit_err" },
-	{ .int_msk = BIT(18), .msg = "qcn_gp2_barrel_mem_ecc_1bit_err" },
 	{ .int_msk = BIT(19), .msg = "qcn_gp2_barrel_mem_ecc_mbit_err" },
-	{ .int_msk = BIT(20), .msg = "qcn_gp3_barral_mem_ecc_1bit_err" },
 	{ .int_msk = BIT(21), .msg = "qcn_gp3_barral_mem_ecc_mbit_err" },
 	{ /* sentinel */ }
 };
 
-static void hclge_log_error(struct device *dev,
-			    const struct hclge_hw_error *err_list,
+static const struct hclge_hw_error hclge_mac_afifo_tnl_int[] = {
+	{ .int_msk = BIT(0), .msg = "egu_cge_afifo_ecc_1bit_err" },
+	{ .int_msk = BIT(1), .msg = "egu_cge_afifo_ecc_mbit_err" },
+	{ .int_msk = BIT(2), .msg = "egu_lge_afifo_ecc_1bit_err" },
+	{ .int_msk = BIT(3), .msg = "egu_lge_afifo_ecc_mbit_err" },
+	{ .int_msk = BIT(4), .msg = "cge_igu_afifo_ecc_1bit_err" },
+	{ .int_msk = BIT(5), .msg = "cge_igu_afifo_ecc_mbit_err" },
+	{ .int_msk = BIT(6), .msg = "lge_igu_afifo_ecc_1bit_err" },
+	{ .int_msk = BIT(7), .msg = "lge_igu_afifo_ecc_mbit_err" },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st2[] = {
+	{ .int_msk = BIT(13), .msg = "rpu_rx_pkt_bit32_ecc_mbit_err" },
+	{ .int_msk = BIT(14), .msg = "rpu_rx_pkt_bit33_ecc_mbit_err" },
+	{ .int_msk = BIT(15), .msg = "rpu_rx_pkt_bit34_ecc_mbit_err" },
+	{ .int_msk = BIT(16), .msg = "rpu_rx_pkt_bit35_ecc_mbit_err" },
+	{ .int_msk = BIT(17), .msg = "rcb_tx_ring_ecc_mbit_err" },
+	{ .int_msk = BIT(18), .msg = "rcb_rx_ring_ecc_mbit_err" },
+	{ .int_msk = BIT(19), .msg = "rcb_tx_fbd_ecc_mbit_err" },
+	{ .int_msk = BIT(20), .msg = "rcb_rx_ebd_ecc_mbit_err" },
+	{ .int_msk = BIT(21), .msg = "rcb_tso_info_ecc_mbit_err" },
+	{ .int_msk = BIT(22), .msg = "rcb_tx_int_info_ecc_mbit_err" },
+	{ .int_msk = BIT(23), .msg = "rcb_rx_int_info_ecc_mbit_err" },
+	{ .int_msk = BIT(24), .msg = "tpu_tx_pkt_0_ecc_mbit_err" },
+	{ .int_msk = BIT(25), .msg = "tpu_tx_pkt_1_ecc_mbit_err" },
+	{ .int_msk = BIT(26), .msg = "rd_bus_err" },
+	{ .int_msk = BIT(27), .msg = "wr_bus_err" },
+	{ .int_msk = BIT(28), .msg = "reg_search_miss" },
+	{ .int_msk = BIT(29), .msg = "rx_q_search_miss" },
+	{ .int_msk = BIT(30), .msg = "ooo_ecc_err_detect" },
+	{ .int_msk = BIT(31), .msg = "ooo_ecc_err_multpl" },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st3[] = {
+	{ .int_msk = BIT(4), .msg = "gro_bd_ecc_mbit_err" },
+	{ .int_msk = BIT(5), .msg = "gro_context_ecc_mbit_err" },
+	{ .int_msk = BIT(6), .msg = "rx_stash_cfg_ecc_mbit_err" },
+	{ .int_msk = BIT(7), .msg = "axi_rd_fbd_ecc_mbit_err" },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ppu_pf_abnormal_int[] = {
+	{ .int_msk = BIT(0), .msg = "over_8bd_no_fe" },
+	{ .int_msk = BIT(1), .msg = "tso_mss_cmp_min_err" },
+	{ .int_msk = BIT(2), .msg = "tso_mss_cmp_max_err" },
+	{ .int_msk = BIT(3), .msg = "tx_rd_fbd_poison" },
+	{ .int_msk = BIT(4), .msg = "rx_rd_ebd_poison" },
+	{ .int_msk = BIT(5), .msg = "buf_wait_timeout" },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ssu_com_err_int[] = {
+	{ .int_msk = BIT(0), .msg = "buf_sum_err" },
+	{ .int_msk = BIT(1), .msg = "ppp_mb_num_err" },
+	{ .int_msk = BIT(2), .msg = "ppp_mbid_err" },
+	{ .int_msk = BIT(3), .msg = "ppp_rlt_mac_err" },
+	{ .int_msk = BIT(4), .msg = "ppp_rlt_host_err" },
+	{ .int_msk = BIT(5), .msg = "cks_edit_position_err" },
+	{ .int_msk = BIT(6), .msg = "cks_edit_condition_err" },
+	{ .int_msk = BIT(7), .msg = "vlan_edit_condition_err" },
+	{ .int_msk = BIT(8), .msg = "vlan_num_ot_err" },
+	{ .int_msk = BIT(9), .msg = "vlan_num_in_err" },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = {
+	{ .int_msk = BIT(0), .msg = "roc_pkt_without_key_port" },
+	{ .int_msk = BIT(1), .msg = "tpu_pkt_without_key_port" },
+	{ .int_msk = BIT(2), .msg = "igu_pkt_without_key_port" },
+	{ .int_msk = BIT(3), .msg = "roc_eof_mis_match_port" },
+	{ .int_msk = BIT(4), .msg = "tpu_eof_mis_match_port" },
+	{ .int_msk = BIT(5), .msg = "igu_eof_mis_match_port" },
+	{ .int_msk = BIT(6), .msg = "roc_sof_mis_match_port" },
+	{ .int_msk = BIT(7), .msg = "tpu_sof_mis_match_port" },
+	{ .int_msk = BIT(8), .msg = "igu_sof_mis_match_port" },
+	{ .int_msk = BIT(11), .msg = "ets_rd_int_rx_port" },
+	{ .int_msk = BIT(12), .msg = "ets_wr_int_rx_port" },
+	{ .int_msk = BIT(13), .msg = "ets_rd_int_tx_port" },
+	{ .int_msk = BIT(14), .msg = "ets_wr_int_tx_port" },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ssu_fifo_overflow_int[] = {
+	{ .int_msk = BIT(0), .msg = "ig_mac_inf_int" },
+	{ .int_msk = BIT(1), .msg = "ig_host_inf_int" },
+	{ .int_msk = BIT(2), .msg = "ig_roc_buf_int" },
+	{ .int_msk = BIT(3), .msg = "ig_host_data_fifo_int" },
+	{ .int_msk = BIT(4), .msg = "ig_host_key_fifo_int" },
+	{ .int_msk = BIT(5), .msg = "tx_qcn_fifo_int" },
+	{ .int_msk = BIT(6), .msg = "rx_qcn_fifo_int" },
+	{ .int_msk = BIT(7), .msg = "tx_pf_rd_fifo_int" },
+	{ .int_msk = BIT(8), .msg = "rx_pf_rd_fifo_int" },
+	{ .int_msk = BIT(9), .msg = "qm_eof_fifo_int" },
+	{ .int_msk = BIT(10), .msg = "mb_rlt_fifo_int" },
+	{ .int_msk = BIT(11), .msg = "dup_uncopy_fifo_int" },
+	{ .int_msk = BIT(12), .msg = "dup_cnt_rd_fifo_int" },
+	{ .int_msk = BIT(13), .msg = "dup_cnt_drop_fifo_int" },
+	{ .int_msk = BIT(14), .msg = "dup_cnt_wrb_fifo_int" },
+	{ .int_msk = BIT(15), .msg = "host_cmd_fifo_int" },
+	{ .int_msk = BIT(16), .msg = "mac_cmd_fifo_int" },
+	{ .int_msk = BIT(17), .msg = "host_cmd_bitmap_empty_int" },
+	{ .int_msk = BIT(18), .msg = "mac_cmd_bitmap_empty_int" },
+	{ .int_msk = BIT(19), .msg = "dup_bitmap_empty_int" },
+	{ .int_msk = BIT(20), .msg = "out_queue_bitmap_empty_int" },
+	{ .int_msk = BIT(21), .msg = "bank2_bitmap_empty_int" },
+	{ .int_msk = BIT(22), .msg = "bank1_bitmap_empty_int" },
+	{ .int_msk = BIT(23), .msg = "bank0_bitmap_empty_int" },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ssu_ets_tcg_int[] = {
+	{ .int_msk = BIT(0), .msg = "ets_rd_int_rx_tcg" },
+	{ .int_msk = BIT(1), .msg = "ets_wr_int_rx_tcg" },
+	{ .int_msk = BIT(2), .msg = "ets_rd_int_tx_tcg" },
+	{ .int_msk = BIT(3), .msg = "ets_wr_int_tx_tcg" },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_ssu_port_based_pf_int[] = {
+	{ .int_msk = BIT(0), .msg = "roc_pkt_without_key_port" },
+	{ .int_msk = BIT(9), .msg = "low_water_line_err_port" },
+	{ .int_msk = BIT(10), .msg = "hi_water_line_err_port" },
+	{ /* sentinel */ }
+};
+
+static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = {
+	{ .int_msk = 0, .msg = "rocee qmm ovf: sgid invalid err" },
+	{ .int_msk = 0x4, .msg = "rocee qmm ovf: sgid ovf err" },
+	{ .int_msk = 0x8, .msg = "rocee qmm ovf: smac invalid err" },
+	{ .int_msk = 0xC, .msg = "rocee qmm ovf: smac ovf err" },
+	{ .int_msk = 0x10, .msg = "rocee qmm ovf: cqc invalid err" },
+	{ .int_msk = 0x11, .msg = "rocee qmm ovf: cqc ovf err" },
+	{ .int_msk = 0x12, .msg = "rocee qmm ovf: cqc hopnum err" },
+	{ .int_msk = 0x13, .msg = "rocee qmm ovf: cqc ba0 err" },
+	{ .int_msk = 0x14, .msg = "rocee qmm ovf: srqc invalid err" },
+	{ .int_msk = 0x15, .msg = "rocee qmm ovf: srqc ovf err" },
+	{ .int_msk = 0x16, .msg = "rocee qmm ovf: srqc hopnum err" },
+	{ .int_msk = 0x17, .msg = "rocee qmm ovf: srqc ba0 err" },
+	{ .int_msk = 0x18, .msg = "rocee qmm ovf: mpt invalid err" },
+	{ .int_msk = 0x19, .msg = "rocee qmm ovf: mpt ovf err" },
+	{ .int_msk = 0x1A, .msg = "rocee qmm ovf: mpt hopnum err" },
+	{ .int_msk = 0x1B, .msg = "rocee qmm ovf: mpt ba0 err" },
+	{ .int_msk = 0x1C, .msg = "rocee qmm ovf: qpc invalid err" },
+	{ .int_msk = 0x1D, .msg = "rocee qmm ovf: qpc ovf err" },
+	{ .int_msk = 0x1E, .msg = "rocee qmm ovf: qpc hopnum err" },
+	{ .int_msk = 0x1F, .msg = "rocee qmm ovf: qpc ba0 err" },
+	{ /* sentinel */ }
+};
+
+static void hclge_log_error(struct device *dev, char *reg,
+			    const struct hclge_hw_error *err,
 			    u32 err_sts)
 {
-	const struct hclge_hw_error *err;
-	int i = 0;
-
-	while (err_list[i].msg) {
-		err = &err_list[i];
-		if (!(err->int_msk & err_sts)) {
-			i++;
-			continue;
-		}
-		dev_warn(dev, "%s [error status=0x%x] found\n",
-			 err->msg, err_sts);
-		i++;
+	while (err->msg) {
+		if (err->int_msk & err_sts)
+			dev_warn(dev, "%s %s found [error status=0x%x]\n",
+				 reg, err->msg, err_sts);
+		err++;
 	}
 }
 
@@ -391,96 +409,44 @@ static int hclge_cmd_query_error(struct hclge_dev *hdev,
 	return ret;
 }
 
-/* hclge_cmd_clear_error: clear the error status
- * @hdev: pointer to struct hclge_dev
- * @desc: descriptor for describing the command
- * @desc_src: prefilled descriptor from the previous command for reusing
- * @cmd:  command opcode
- * @flag: flag for extended command structure
- *
- * This function clear the error status in the hw register/s using command
- */
-static int hclge_cmd_clear_error(struct hclge_dev *hdev,
-				 struct hclge_desc *desc,
-				 struct hclge_desc *desc_src,
-				 u32 cmd, u16 flag)
-{
-	struct device *dev = &hdev->pdev->dev;
-	int num = 1;
-	int ret, i;
-
-	if (cmd) {
-		hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
-		if (flag) {
-			desc[0].flag |= cpu_to_le16(flag);
-			hclge_cmd_setup_basic_desc(&desc[1], cmd, false);
-			num = 2;
-		}
-		if (desc_src) {
-			for (i = 0; i < 6; i++) {
-				desc[0].data[i] = desc_src[0].data[i];
-				if (flag)
-					desc[1].data[i] = desc_src[1].data[i];
-			}
-		}
-	} else {
-		hclge_cmd_reuse_desc(&desc[0], false);
-		if (flag) {
-			desc[0].flag |= cpu_to_le16(flag);
-			hclge_cmd_reuse_desc(&desc[1], false);
-			num = 2;
-		}
-	}
-	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
-	if (ret)
-		dev_err(dev, "clear error cmd failed (%d)\n", ret);
-
-	return ret;
-}
-
-static int hclge_enable_common_error(struct hclge_dev *hdev, bool en)
+static int hclge_config_common_hw_err_int(struct hclge_dev *hdev, bool en)
 {
 	struct device *dev = &hdev->pdev->dev;
 	struct hclge_desc desc[2];
 	int ret;
 
+	/* configure common error interrupts */
 	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_COMMON_ECC_INT_CFG, false);
 	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
 	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_COMMON_ECC_INT_CFG, false);
 
 	if (en) {
-		/* enable COMMON error interrupts */
 		desc[0].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN);
 		desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN |
 					HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN);
 		desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN);
-		desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN);
+		desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN |
+					      HCLGE_MSIX_SRAM_ECC_ERR_INT_EN);
 		desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN);
-	} else {
-		/* disable COMMON error interrupts */
-		desc[0].data[0] = 0;
-		desc[0].data[2] = 0;
-		desc[0].data[3] = 0;
-		desc[0].data[4] = 0;
-		desc[0].data[5] = 0;
 	}
+
 	desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK);
 	desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK |
 				HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK);
 	desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK);
-	desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK);
+	desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK |
+				      HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK);
 	desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK);
 
 	ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
 	if (ret)
 		dev_err(dev,
-			"failed(%d) to enable/disable COMMON err interrupts\n",
-			ret);
+			"fail(%d) to configure common err interrupts\n", ret);
 
 	return ret;
 }
 
-static int hclge_enable_ncsi_error(struct hclge_dev *hdev, bool en)
+static int hclge_config_ncsi_hw_err_int(struct hclge_dev *hdev, bool en)
 {
 	struct device *dev = &hdev->pdev->dev;
 	struct hclge_desc desc;
@@ -489,74 +455,65 @@ static int hclge_enable_ncsi_error(struct hclge_dev *hdev, bool en)
 	if (hdev->pdev->revision < 0x21)
 		return 0;
 
-	/* enable/disable NCSI  error interrupts */
+	/* configure NCSI error interrupts */
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_NCSI_INT_EN, false);
 	if (en)
 		desc.data[0] = cpu_to_le32(HCLGE_NCSI_ERR_INT_EN);
-	else
-		desc.data[0] = 0;
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret)
 		dev_err(dev,
-			"failed(%d) to enable/disable NCSI error interrupts\n",
-			ret);
+			"fail(%d) to configure  NCSI error interrupts\n", ret);
 
 	return ret;
 }
 
-static int hclge_enable_igu_egu_error(struct hclge_dev *hdev, bool en)
+static int hclge_config_igu_egu_hw_err_int(struct hclge_dev *hdev, bool en)
 {
 	struct device *dev = &hdev->pdev->dev;
 	struct hclge_desc desc;
 	int ret;
 
-	/* enable/disable error interrupts */
+	/* configure IGU,EGU error interrupts */
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_COMMON_INT_EN, false);
 	if (en)
 		desc.data[0] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN);
-	else
-		desc.data[0] = 0;
+
 	desc.data[1] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN_MASK);
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
 		dev_err(dev,
-			"failed(%d) to enable/disable IGU common interrupts\n",
-			ret);
+			"fail(%d) to configure IGU common interrupts\n", ret);
 		return ret;
 	}
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_EGU_TNL_INT_EN, false);
 	if (en)
 		desc.data[0] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN);
-	else
-		desc.data[0] = 0;
+
 	desc.data[1] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN_MASK);
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
 		dev_err(dev,
-			"failed(%d) to enable/disable IGU-EGU TNL interrupts\n",
-			ret);
+			"fail(%d) to configure IGU-EGU TNL interrupts\n", ret);
 		return ret;
 	}
 
-	ret = hclge_enable_ncsi_error(hdev, en);
-	if (ret)
-		dev_err(dev, "fail(%d) to en/disable err int\n", ret);
+	ret = hclge_config_ncsi_hw_err_int(hdev, en);
 
 	return ret;
 }
 
-static int hclge_enable_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd,
+static int hclge_config_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd,
 					    bool en)
 {
 	struct device *dev = &hdev->pdev->dev;
 	struct hclge_desc desc[2];
 	int ret;
 
-	/* enable/disable PPP error interrupts */
+	/* configure PPP error interrupts */
 	hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
 	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
 	hclge_cmd_setup_basic_desc(&desc[1], cmd, false);
@@ -567,24 +524,24 @@ static int hclge_enable_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd,
 				cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN);
 			desc[0].data[1] =
 				cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN);
-		} else {
-			desc[0].data[0] = 0;
-			desc[0].data[1] = 0;
+			desc[0].data[4] = cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN);
 		}
+
 		desc[1].data[0] =
 			cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN_MASK);
 		desc[1].data[1] =
 			cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN_MASK);
+		if (hdev->pdev->revision >= 0x21)
+			desc[1].data[2] =
+				cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN_MASK);
 	} else if (cmd == HCLGE_PPP_CMD1_INT_CMD) {
 		if (en) {
 			desc[0].data[0] =
 				cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN);
 			desc[0].data[1] =
 				cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN);
-		} else {
-			desc[0].data[0] = 0;
-			desc[0].data[1] = 0;
 		}
+
 		desc[1].data[0] =
 				cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN_MASK);
 		desc[1].data[1] =
@@ -593,491 +550,863 @@ static int hclge_enable_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd,
 
 	ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
 	if (ret)
-		dev_err(dev,
-			"failed(%d) to enable/disable PPP error interrupts\n",
-			ret);
+		dev_err(dev, "fail(%d) to configure PPP error intr\n", ret);
 
 	return ret;
 }
 
-static int hclge_enable_ppp_error(struct hclge_dev *hdev, bool en)
+static int hclge_config_ppp_hw_err_int(struct hclge_dev *hdev, bool en)
 {
-	struct device *dev = &hdev->pdev->dev;
 	int ret;
 
-	ret = hclge_enable_ppp_error_interrupt(hdev, HCLGE_PPP_CMD0_INT_CMD,
+	ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD0_INT_CMD,
 					       en);
-	if (ret) {
-		dev_err(dev,
-			"failed(%d) to enable/disable PPP error intr 0,1\n",
-			ret);
+	if (ret)
 		return ret;
-	}
 
-	ret = hclge_enable_ppp_error_interrupt(hdev, HCLGE_PPP_CMD1_INT_CMD,
+	ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD1_INT_CMD,
 					       en);
-	if (ret)
-		dev_err(dev,
-			"failed(%d) to enable/disable PPP error intr 2,3\n",
-			ret);
 
 	return ret;
 }
 
-int hclge_enable_tm_hw_error(struct hclge_dev *hdev, bool en)
+static int hclge_config_tm_hw_err_int(struct hclge_dev *hdev, bool en)
 {
 	struct device *dev = &hdev->pdev->dev;
 	struct hclge_desc desc;
 	int ret;
 
-	/* enable TM SCH hw errors */
+	/* configure TM SCH hw errors */
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_SCH_ECC_INT_EN, false);
 	if (en)
 		desc.data[0] = cpu_to_le32(HCLGE_TM_SCH_ECC_ERR_INT_EN);
-	else
-		desc.data[0] = 0;
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
-		dev_err(dev, "failed(%d) to configure TM SCH errors\n", ret);
+		dev_err(dev, "fail(%d) to configure TM SCH errors\n", ret);
 		return ret;
 	}
 
-	/* enable TM QCN hw errors */
+	/* configure TM QCN hw errors */
 	ret = hclge_cmd_query_error(hdev, &desc, HCLGE_TM_QCN_MEM_INT_CFG,
 				    0, 0, 0);
 	if (ret) {
-		dev_err(dev, "failed(%d) to read TM QCN CFG status\n", ret);
+		dev_err(dev, "fail(%d) to read TM QCN CFG status\n", ret);
 		return ret;
 	}
 
 	hclge_cmd_reuse_desc(&desc, false);
 	if (en)
 		desc.data[1] = cpu_to_le32(HCLGE_TM_QCN_MEM_ERR_INT_EN);
-	else
-		desc.data[1] = 0;
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret)
 		dev_err(dev,
-			"failed(%d) to configure TM QCN mem errors\n", ret);
+			"fail(%d) to configure TM QCN mem errors\n", ret);
 
 	return ret;
 }
 
-static void hclge_process_common_error(struct hclge_dev *hdev,
-				       enum hclge_err_int_type type)
+static int hclge_config_mac_err_int(struct hclge_dev *hdev, bool en)
 {
 	struct device *dev = &hdev->pdev->dev;
-	struct hclge_desc desc[2];
-	u32 err_sts;
+	struct hclge_desc desc;
 	int ret;
 
-	/* read err sts */
-	ret = hclge_cmd_query_error(hdev, &desc[0],
-				    HCLGE_COMMON_ECC_INT_CFG,
-				    HCLGE_CMD_FLAG_NEXT, 0, 0);
-	if (ret) {
+	/* configure MAC common error interrupts */
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_MAC_COMMON_INT_EN, false);
+	if (en)
+		desc.data[0] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN);
+
+	desc.data[1] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN_MASK);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret)
 		dev_err(dev,
-			"failed(=%d) to query COMMON error interrupt status\n",
-			ret);
-		return;
-	}
+			"fail(%d) to configure MAC COMMON error intr\n", ret);
 
-	/* log err */
-	err_sts = (le32_to_cpu(desc[0].data[0])) & HCLGE_IMP_TCM_ECC_INT_MASK;
-	hclge_log_error(dev, &hclge_imp_tcm_ecc_int[0], err_sts);
+	return ret;
+}
 
-	err_sts = (le32_to_cpu(desc[0].data[1])) & HCLGE_CMDQ_ECC_INT_MASK;
-	hclge_log_error(dev, &hclge_cmdq_nic_mem_ecc_int[0], err_sts);
+static int hclge_config_ppu_error_interrupts(struct hclge_dev *hdev, u32 cmd,
+					     bool en)
+{
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc[2];
+	int num = 1;
+	int ret;
 
-	err_sts = (le32_to_cpu(desc[0].data[1]) >> HCLGE_CMDQ_ROC_ECC_INT_SHIFT)
-		   & HCLGE_CMDQ_ECC_INT_MASK;
-	hclge_log_error(dev, &hclge_cmdq_rocee_mem_ecc_int[0], err_sts);
+	/* configure PPU error interrupts */
+	if (cmd == HCLGE_PPU_MPF_ECC_INT_CMD) {
+		hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
+		desc[0].flag |= HCLGE_CMD_FLAG_NEXT;
+		hclge_cmd_setup_basic_desc(&desc[1], cmd, false);
+		if (en) {
+			desc[0].data[0] = HCLGE_PPU_MPF_ABNORMAL_INT0_EN;
+			desc[0].data[1] = HCLGE_PPU_MPF_ABNORMAL_INT1_EN;
+			desc[1].data[3] = HCLGE_PPU_MPF_ABNORMAL_INT3_EN;
+			desc[1].data[4] = HCLGE_PPU_MPF_ABNORMAL_INT2_EN;
+		}
 
-	if ((le32_to_cpu(desc[0].data[3])) & BIT(0))
-		dev_warn(dev, "imp_rd_data_poison_err found\n");
+		desc[1].data[0] = HCLGE_PPU_MPF_ABNORMAL_INT0_EN_MASK;
+		desc[1].data[1] = HCLGE_PPU_MPF_ABNORMAL_INT1_EN_MASK;
+		desc[1].data[2] = HCLGE_PPU_MPF_ABNORMAL_INT2_EN_MASK;
+		desc[1].data[3] |= HCLGE_PPU_MPF_ABNORMAL_INT3_EN_MASK;
+		num = 2;
+	} else if (cmd == HCLGE_PPU_MPF_OTHER_INT_CMD) {
+		hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
+		if (en)
+			desc[0].data[0] = HCLGE_PPU_MPF_ABNORMAL_INT2_EN2;
 
-	err_sts = (le32_to_cpu(desc[0].data[3]) >> HCLGE_TQP_ECC_INT_SHIFT) &
-		   HCLGE_TQP_ECC_INT_MASK;
-	hclge_log_error(dev, &hclge_tqp_int_ecc_int[0], err_sts);
+		desc[0].data[2] = HCLGE_PPU_MPF_ABNORMAL_INT2_EN2_MASK;
+	} else if (cmd == HCLGE_PPU_PF_OTHER_INT_CMD) {
+		hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
+		if (en)
+			desc[0].data[0] = HCLGE_PPU_PF_ABNORMAL_INT_EN;
 
-	err_sts = (le32_to_cpu(desc[0].data[5])) &
-		   HCLGE_IMP_ITCM4_ECC_INT_MASK;
-	hclge_log_error(dev, &hclge_imp_itcm4_ecc_int[0], err_sts);
+		desc[0].data[2] = HCLGE_PPU_PF_ABNORMAL_INT_EN_MASK;
+	} else {
+		dev_err(dev, "Invalid cmd to configure PPU error interrupts\n");
+		return -EINVAL;
+	}
 
-	/* clear error interrupts */
-	desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_CLR_MASK);
-	desc[1].data[1] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_CLR_MASK |
-				HCLGE_CMDQ_ROCEE_ECC_CLR_MASK);
-	desc[1].data[3] = cpu_to_le32(HCLGE_TQP_IMP_ERR_CLR_MASK);
-	desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_CLR_MASK);
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
 
-	ret = hclge_cmd_clear_error(hdev, &desc[0], NULL, 0,
-				    HCLGE_CMD_FLAG_NEXT);
-	if (ret)
-		dev_err(dev,
-			"failed(%d) to clear COMMON error interrupt status\n",
-			ret);
+	return ret;
 }
 
-static void hclge_process_ncsi_error(struct hclge_dev *hdev,
-				     enum hclge_err_int_type type)
+static int hclge_config_ppu_hw_err_int(struct hclge_dev *hdev, bool en)
 {
 	struct device *dev = &hdev->pdev->dev;
-	struct hclge_desc desc_rd;
-	struct hclge_desc desc_wr;
-	u32 err_sts;
 	int ret;
 
-	if (hdev->pdev->revision < 0x21)
-		return;
-
-	/* read NCSI error status */
-	ret = hclge_cmd_query_error(hdev, &desc_rd, HCLGE_NCSI_INT_QUERY,
-				    0, 1, HCLGE_NCSI_ERR_INT_TYPE);
+	ret = hclge_config_ppu_error_interrupts(hdev, HCLGE_PPU_MPF_ECC_INT_CMD,
+						en);
 	if (ret) {
-		dev_err(dev,
-			"failed(=%d) to query NCSI error interrupt status\n",
+		dev_err(dev, "fail(%d) to configure PPU MPF ECC error intr\n",
 			ret);
-		return;
+		return ret;
 	}
 
-	/* log err */
-	err_sts = le32_to_cpu(desc_rd.data[0]);
-	hclge_log_error(dev, &hclge_ncsi_err_int[0], err_sts);
+	ret = hclge_config_ppu_error_interrupts(hdev,
+						HCLGE_PPU_MPF_OTHER_INT_CMD,
+						en);
+	if (ret) {
+		dev_err(dev, "fail(%d) to configure PPU MPF other intr\n", ret);
+		return ret;
+	}
 
-	/* clear err int */
-	ret = hclge_cmd_clear_error(hdev, &desc_wr, &desc_rd,
-				    HCLGE_NCSI_INT_CLR, 0);
+	ret = hclge_config_ppu_error_interrupts(hdev,
+						HCLGE_PPU_PF_OTHER_INT_CMD, en);
 	if (ret)
-		dev_err(dev, "failed(=%d) to clear NCSI interrupt status\n",
+		dev_err(dev, "fail(%d) to configure PPU PF error interrupts\n",
 			ret);
+	return ret;
 }
 
-static void hclge_process_igu_egu_error(struct hclge_dev *hdev,
-					enum hclge_err_int_type int_type)
+static int hclge_config_ssu_hw_err_int(struct hclge_dev *hdev, bool en)
 {
 	struct device *dev = &hdev->pdev->dev;
-	struct hclge_desc desc_rd;
-	struct hclge_desc desc_wr;
-	u32 err_sts;
+	struct hclge_desc desc[2];
 	int ret;
 
-	/* read IGU common err sts */
-	ret = hclge_cmd_query_error(hdev, &desc_rd,
-				    HCLGE_IGU_COMMON_INT_QUERY,
-				    0, 1, int_type);
-	if (ret) {
-		dev_err(dev, "failed(=%d) to query IGU common int status\n",
-			ret);
-		return;
+	/* configure SSU ecc error interrupts */
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_ECC_INT_CMD, false);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_ECC_INT_CMD, false);
+	if (en) {
+		desc[0].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN);
+		desc[0].data[1] =
+			cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN);
+		desc[0].data[4] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN);
 	}
 
-	/* log err */
-	err_sts = le32_to_cpu(desc_rd.data[0]) &
-				   HCLGE_IGU_COM_INT_MASK;
-	hclge_log_error(dev, &hclge_igu_com_err_int[0], err_sts);
+	desc[1].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN_MASK);
+	desc[1].data[1] = cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN_MASK);
+	desc[1].data[2] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN_MASK);
 
-	/* clear err int */
-	ret = hclge_cmd_clear_error(hdev, &desc_wr, &desc_rd,
-				    HCLGE_IGU_COMMON_INT_CLR, 0);
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
 	if (ret) {
-		dev_err(dev, "failed(=%d) to clear IGU common int status\n",
-			ret);
-		return;
+		dev_err(dev,
+			"fail(%d) to configure SSU ECC error interrupt\n", ret);
+		return ret;
 	}
 
-	/* read IGU-EGU TNL err sts */
-	ret = hclge_cmd_query_error(hdev, &desc_rd,
-				    HCLGE_IGU_EGU_TNL_INT_QUERY,
-				    0, 1, int_type);
-	if (ret) {
-		dev_err(dev, "failed(=%d) to query IGU-EGU TNL int status\n",
-			ret);
-		return;
+	/* configure SSU common error interrupts */
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_COMMON_INT_CMD, false);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_COMMON_INT_CMD, false);
+
+	if (en) {
+		if (hdev->pdev->revision >= 0x21)
+			desc[0].data[0] =
+				cpu_to_le32(HCLGE_SSU_COMMON_INT_EN);
+		else
+			desc[0].data[0] =
+				cpu_to_le32(HCLGE_SSU_COMMON_INT_EN & ~BIT(5));
+		desc[0].data[1] = cpu_to_le32(HCLGE_SSU_PORT_BASED_ERR_INT_EN);
+		desc[0].data[2] =
+			cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN);
 	}
 
-	/* log err */
-	err_sts = le32_to_cpu(desc_rd.data[0]) &
-				   HCLGE_IGU_EGU_TNL_INT_MASK;
-	hclge_log_error(dev, &hclge_igu_egu_tnl_err_int[0], err_sts);
+	desc[1].data[0] = cpu_to_le32(HCLGE_SSU_COMMON_INT_EN_MASK |
+				HCLGE_SSU_PORT_BASED_ERR_INT_EN_MASK);
+	desc[1].data[1] = cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN_MASK);
 
-	/* clear err int */
-	ret = hclge_cmd_clear_error(hdev, &desc_wr, &desc_rd,
-				    HCLGE_IGU_EGU_TNL_INT_CLR, 0);
-	if (ret) {
-		dev_err(dev, "failed(=%d) to clear IGU-EGU TNL int status\n",
-			ret);
-		return;
-	}
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
+	if (ret)
+		dev_err(dev,
+			"fail(%d) to configure SSU COMMON error intr\n", ret);
 
-	hclge_process_ncsi_error(hdev, HCLGE_ERR_INT_RAS_NFE);
+	return ret;
 }
 
-static int hclge_log_and_clear_ppp_error(struct hclge_dev *hdev, u32 cmd,
-					 enum hclge_err_int_type int_type)
+#define HCLGE_SET_DEFAULT_RESET_REQUEST(reset_type) \
+	do { \
+		if (ae_dev->ops->set_default_reset_request) \
+			ae_dev->ops->set_default_reset_request(ae_dev, \
+							       reset_type); \
+	} while (0)
+
+/* hclge_handle_mpf_ras_error: handle all main PF RAS errors
+ * @hdev: pointer to struct hclge_dev
+ * @desc: descriptor for describing the command
+ * @num:  number of extended command structures
+ *
+ * This function handles all the main PF RAS errors in the
+ * hw register/s using command.
+ */
+static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev,
+				      struct hclge_desc *desc,
+				      int num)
 {
+	struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
 	struct device *dev = &hdev->pdev->dev;
-	const struct hclge_hw_error *hw_err_lst1, *hw_err_lst2, *hw_err_lst3;
-	struct hclge_desc desc[2];
-	u32 err_sts;
+	__le32 *desc_data;
+	u32 status;
 	int ret;
 
-	/* read PPP INT sts */
-	ret = hclge_cmd_query_error(hdev, &desc[0], cmd,
-				    HCLGE_CMD_FLAG_NEXT, 5, int_type);
+	/* query all main PF RAS errors */
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_MPF_RAS_INT,
+				   true);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
 	if (ret) {
-		dev_err(dev, "failed(=%d) to query PPP interrupt status\n",
-			ret);
-		return -EIO;
+		dev_err(dev, "query all mpf ras int cmd failed (%d)\n", ret);
+		return ret;
 	}
 
-	/* log error */
-	if (cmd == HCLGE_PPP_CMD0_INT_CMD) {
-		hw_err_lst1 = &hclge_ppp_mpf_int0[0];
-		hw_err_lst2 = &hclge_ppp_mpf_int1[0];
-		hw_err_lst3 = &hclge_ppp_pf_int[0];
-	} else if (cmd == HCLGE_PPP_CMD1_INT_CMD) {
-		hw_err_lst1 = &hclge_ppp_mpf_int2[0];
-		hw_err_lst2 = &hclge_ppp_mpf_int3[0];
-	} else {
-		dev_err(dev, "invalid command(=%d)\n", cmd);
-		return -EINVAL;
+	/* log HNS common errors */
+	status = le32_to_cpu(desc[0].data[0]);
+	if (status) {
+		hclge_log_error(dev, "IMP_TCM_ECC_INT_STS",
+				&hclge_imp_tcm_ecc_int[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
 	}
 
-	err_sts = le32_to_cpu(desc[0].data[2]);
-	if (err_sts)
-		hclge_log_error(dev, hw_err_lst1, err_sts);
+	status = le32_to_cpu(desc[0].data[1]);
+	if (status) {
+		hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS",
+				&hclge_cmdq_nic_mem_ecc_int[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+	}
 
-	err_sts = le32_to_cpu(desc[0].data[3]);
-	if (err_sts)
-		hclge_log_error(dev, hw_err_lst2, err_sts);
+	if ((le32_to_cpu(desc[0].data[2])) & BIT(0)) {
+		dev_warn(dev, "imp_rd_data_poison_err found\n");
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+	}
 
-	if (cmd == HCLGE_PPP_CMD0_INT_CMD) {
-		err_sts = (le32_to_cpu(desc[0].data[4]) >> 8) & 0x3;
-		if (err_sts)
-			hclge_log_error(dev, hw_err_lst3, err_sts);
+	status = le32_to_cpu(desc[0].data[3]);
+	if (status) {
+		hclge_log_error(dev, "TQP_INT_ECC_INT_STS",
+				&hclge_tqp_int_ecc_int[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
 	}
 
-	/* clear PPP INT */
-	ret = hclge_cmd_clear_error(hdev, &desc[0], NULL, 0,
-				    HCLGE_CMD_FLAG_NEXT);
-	if (ret) {
-		dev_err(dev, "failed(=%d) to clear PPP interrupt status\n",
-			ret);
-		return -EIO;
+	status = le32_to_cpu(desc[0].data[4]);
+	if (status) {
+		hclge_log_error(dev, "MSIX_ECC_INT_STS",
+				&hclge_msix_sram_ecc_int[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
 	}
 
-	return 0;
+	/* log SSU(Storage Switch Unit) errors */
+	desc_data = (__le32 *)&desc[2];
+	status = le32_to_cpu(*(desc_data + 2));
+	if (status) {
+		dev_warn(dev, "SSU_ECC_MULTI_BIT_INT_0 ssu_ecc_mbit_int[31:0]\n");
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+	}
+
+	status = le32_to_cpu(*(desc_data + 3)) & BIT(0);
+	if (status) {
+		dev_warn(dev, "SSU_ECC_MULTI_BIT_INT_1 ssu_ecc_mbit_int[32]\n");
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+	}
+
+	status = le32_to_cpu(*(desc_data + 4)) & HCLGE_SSU_COMMON_ERR_INT_MASK;
+	if (status) {
+		hclge_log_error(dev, "SSU_COMMON_ERR_INT",
+				&hclge_ssu_com_err_int[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+	}
+
+	/* log IGU(Ingress Unit) errors */
+	desc_data = (__le32 *)&desc[3];
+	status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK;
+	if (status)
+		hclge_log_error(dev, "IGU_INT_STS",
+				&hclge_igu_int[0], status);
+
+	/* log PPP(Programmable Packet Process) errors */
+	desc_data = (__le32 *)&desc[4];
+	status = le32_to_cpu(*(desc_data + 1));
+	if (status)
+		hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1",
+				&hclge_ppp_mpf_abnormal_int_st1[0], status);
+
+	status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK;
+	if (status)
+		hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3",
+				&hclge_ppp_mpf_abnormal_int_st3[0], status);
+
+	/* log PPU(RCB) errors */
+	desc_data = (__le32 *)&desc[5];
+	status = le32_to_cpu(*(desc_data + 1));
+	if (status) {
+		dev_warn(dev, "PPU_MPF_ABNORMAL_INT_ST1 %s found\n",
+			 "rpu_rx_pkt_ecc_mbit_err");
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+	}
+
+	status = le32_to_cpu(*(desc_data + 2));
+	if (status) {
+		hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2",
+				&hclge_ppu_mpf_abnormal_int_st2[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+	}
+
+	status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPU_MPF_INT_ST3_MASK;
+	if (status) {
+		hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3",
+				&hclge_ppu_mpf_abnormal_int_st3[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+	}
+
+	/* log TM(Traffic Manager) errors */
+	desc_data = (__le32 *)&desc[6];
+	status = le32_to_cpu(*desc_data);
+	if (status) {
+		hclge_log_error(dev, "TM_SCH_RINT",
+				&hclge_tm_sch_rint[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+	}
+
+	/* log QCN(Quantized Congestion Control) errors */
+	desc_data = (__le32 *)&desc[7];
+	status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK;
+	if (status) {
+		hclge_log_error(dev, "QCN_FIFO_RINT",
+				&hclge_qcn_fifo_rint[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+	}
+
+	status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK;
+	if (status) {
+		hclge_log_error(dev, "QCN_ECC_RINT",
+				&hclge_qcn_ecc_rint[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+	}
+
+	/* log NCSI errors */
+	desc_data = (__le32 *)&desc[9];
+	status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK;
+	if (status) {
+		hclge_log_error(dev, "NCSI_ECC_INT_RPT",
+				&hclge_ncsi_err_int[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET);
+	}
+
+	/* clear all main PF RAS errors */
+	hclge_cmd_reuse_desc(&desc[0], false);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
+	if (ret)
+		dev_err(dev, "clear all mpf ras int cmd failed (%d)\n", ret);
+
+	return ret;
 }
 
-static void hclge_process_ppp_error(struct hclge_dev *hdev,
-				    enum hclge_err_int_type int_type)
+/* hclge_handle_pf_ras_error: handle all PF RAS errors
+ * @hdev: pointer to struct hclge_dev
+ * @desc: descriptor for describing the command
+ * @num:  number of extended command structures
+ *
+ * This function handles all the PF RAS errors in the
+ * hw register/s using command.
+ */
+static int hclge_handle_pf_ras_error(struct hclge_dev *hdev,
+				     struct hclge_desc *desc,
+				     int num)
 {
+	struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
 	struct device *dev = &hdev->pdev->dev;
+	__le32 *desc_data;
+	u32 status;
 	int ret;
 
-	/* read PPP INT0,1 sts */
-	ret = hclge_log_and_clear_ppp_error(hdev, HCLGE_PPP_CMD0_INT_CMD,
-					    int_type);
-	if (ret < 0) {
-		dev_err(dev, "failed(=%d) to clear PPP interrupt 0,1 status\n",
-			ret);
-		return;
+	/* query all PF RAS errors */
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_PF_RAS_INT,
+				   true);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
+	if (ret) {
+		dev_err(dev, "query all pf ras int cmd failed (%d)\n", ret);
+		return ret;
 	}
 
-	/* read err PPP INT2,3 sts */
-	ret = hclge_log_and_clear_ppp_error(hdev, HCLGE_PPP_CMD1_INT_CMD,
-					    int_type);
-	if (ret < 0)
-		dev_err(dev, "failed(=%d) to clear PPP interrupt 2,3 status\n",
-			ret);
+	/* log SSU(Storage Switch Unit) errors */
+	status = le32_to_cpu(desc[0].data[0]);
+	if (status) {
+		hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
+				&hclge_ssu_port_based_err_int[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+	}
+
+	status = le32_to_cpu(desc[0].data[1]);
+	if (status) {
+		hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT",
+				&hclge_ssu_fifo_overflow_int[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+	}
+
+	status = le32_to_cpu(desc[0].data[2]);
+	if (status) {
+		hclge_log_error(dev, "SSU_ETS_TCG_INT",
+				&hclge_ssu_ets_tcg_int[0], status);
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+	}
+
+	/* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */
+	desc_data = (__le32 *)&desc[1];
+	status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK;
+	if (status)
+		hclge_log_error(dev, "IGU_EGU_TNL_INT_STS",
+				&hclge_igu_egu_tnl_int[0], status);
+
+	/* clear all PF RAS errors */
+	hclge_cmd_reuse_desc(&desc[0], false);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
+	if (ret)
+		dev_err(dev, "clear all pf ras int cmd failed (%d)\n", ret);
+
+	return ret;
 }
 
-static void hclge_process_tm_sch_error(struct hclge_dev *hdev)
+static int hclge_handle_all_ras_errors(struct hclge_dev *hdev)
 {
 	struct device *dev = &hdev->pdev->dev;
-	const struct hclge_tm_sch_ecc_info *tm_sch_ecc_info;
-	struct hclge_desc desc;
-	u32 ecc_info;
-	u8 module_no;
-	u8 ram_no;
+	u32 mpf_bd_num, pf_bd_num, bd_num;
+	struct hclge_desc desc_bd;
+	struct hclge_desc *desc;
 	int ret;
 
-	/* read TM scheduler errors */
-	ret = hclge_cmd_query_error(hdev, &desc,
-				    HCLGE_TM_SCH_MBIT_ECC_INFO_CMD, 0, 0, 0);
+	/* query the number of registers in the RAS int status */
+	hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_RAS_INT_STS_BD_NUM,
+				   true);
+	ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1);
 	if (ret) {
-		dev_err(dev, "failed(%d) to read SCH mbit ECC err info\n", ret);
-		return;
+		dev_err(dev, "fail(%d) to query ras int status bd num\n", ret);
+		return ret;
 	}
-	ecc_info = le32_to_cpu(desc.data[0]);
+	mpf_bd_num = le32_to_cpu(desc_bd.data[0]);
+	pf_bd_num = le32_to_cpu(desc_bd.data[1]);
+	bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
+
+	desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
 
-	ret = hclge_cmd_query_error(hdev, &desc,
-				    HCLGE_TM_SCH_ECC_ERR_RINT_CMD, 0, 0, 0);
+	/* handle all main PF RAS errors */
+	ret = hclge_handle_mpf_ras_error(hdev, desc, mpf_bd_num);
 	if (ret) {
-		dev_err(dev, "failed(%d) to read SCH ECC err status\n", ret);
-		return;
+		kfree(desc);
+		return ret;
 	}
+	memset(desc, 0, bd_num * sizeof(struct hclge_desc));
 
-	/* log TM scheduler errors */
-	if (le32_to_cpu(desc.data[0])) {
-		hclge_log_error(dev, &hclge_tm_sch_err_int[0],
-				le32_to_cpu(desc.data[0]));
-		if (le32_to_cpu(desc.data[0]) & 0x2) {
-			module_no = (ecc_info >> 20) & 0xF;
-			ram_no = (ecc_info >> 16) & 0xF;
-			tm_sch_ecc_info =
-				&hclge_tm_sch_ecc_err[module_no][ram_no];
-			dev_warn(dev, "ecc err module:ram=%s\n",
-				 tm_sch_ecc_info->name);
-			dev_warn(dev, "ecc memory address = 0x%x\n",
-				 ecc_info & 0xFFFF);
+	/* handle all PF RAS errors */
+	ret = hclge_handle_pf_ras_error(hdev, desc, pf_bd_num);
+	kfree(desc);
+
+	return ret;
+}
+
+static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev)
+{
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc[2];
+	int ret;
+
+	/* read overflow error status */
+	ret = hclge_cmd_query_error(hdev, &desc[0],
+				    HCLGE_ROCEE_PF_RAS_INT_CMD,
+				    0, 0, 0);
+	if (ret) {
+		dev_err(dev, "failed(%d) to query ROCEE OVF error sts\n", ret);
+		return ret;
+	}
+
+	/* log overflow error */
+	if (le32_to_cpu(desc[0].data[0]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) {
+		const struct hclge_hw_error *err;
+		u32 err_sts;
+
+		err = &hclge_rocee_qmm_ovf_err_int[0];
+		err_sts = HCLGE_ROCEE_OVF_ERR_TYPE_MASK &
+			  le32_to_cpu(desc[0].data[0]);
+		while (err->msg) {
+			if (err->int_msk == err_sts) {
+				dev_warn(dev, "%s [error status=0x%x] found\n",
+					 err->msg,
+					 le32_to_cpu(desc[0].data[0]));
+				break;
+			}
+			err++;
 		}
 	}
 
-	/* clear TM scheduler errors */
-	ret = hclge_cmd_clear_error(hdev, &desc, NULL, 0, 0);
-	if (ret) {
-		dev_err(dev, "failed(%d) to clear TM SCH error status\n", ret);
-		return;
+	if (le32_to_cpu(desc[0].data[1]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) {
+		dev_warn(dev, "ROCEE TSP OVF [error status=0x%x] found\n",
+			 le32_to_cpu(desc[0].data[1]));
 	}
 
-	ret = hclge_cmd_query_error(hdev, &desc,
-				    HCLGE_TM_SCH_ECC_ERR_RINT_CE, 0, 0, 0);
-	if (ret) {
-		dev_err(dev, "failed(%d) to read SCH CE status\n", ret);
-		return;
+	if (le32_to_cpu(desc[0].data[2]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) {
+		dev_warn(dev, "ROCEE SCC OVF [error status=0x%x] found\n",
+			 le32_to_cpu(desc[0].data[2]));
 	}
 
-	ret = hclge_cmd_clear_error(hdev, &desc, NULL, 0, 0);
+	return 0;
+}
+
+static int hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev)
+{
+	enum hnae3_reset_type reset_type = HNAE3_FUNC_RESET;
+	struct hnae3_ae_dev *ae_dev = hdev->ae_dev;
+	struct device *dev = &hdev->pdev->dev;
+	struct hclge_desc desc[2];
+	unsigned int status;
+	int ret;
+
+	/* read RAS error interrupt status */
+	ret = hclge_cmd_query_error(hdev, &desc[0],
+				    HCLGE_QUERY_CLEAR_ROCEE_RAS_INT,
+				    0, 0, 0);
 	if (ret) {
-		dev_err(dev, "failed(%d) to clear TM SCH CE status\n", ret);
-		return;
+		dev_err(dev, "failed(%d) to query ROCEE RAS INT SRC\n", ret);
+		/* reset everything for now */
+		HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+		return ret;
 	}
 
-	ret = hclge_cmd_query_error(hdev, &desc,
-				    HCLGE_TM_SCH_ECC_ERR_RINT_NFE, 0, 0, 0);
-	if (ret) {
-		dev_err(dev, "failed(%d) to read SCH NFE status\n", ret);
-		return;
+	status = le32_to_cpu(desc[0].data[0]);
+
+	if (status & HCLGE_ROCEE_RERR_INT_MASK)
+		dev_warn(dev, "ROCEE RAS AXI rresp error\n");
+
+	if (status & HCLGE_ROCEE_BERR_INT_MASK)
+		dev_warn(dev, "ROCEE RAS AXI bresp error\n");
+
+	if (status & HCLGE_ROCEE_ECC_INT_MASK) {
+		dev_warn(dev, "ROCEE RAS 2bit ECC error\n");
+		reset_type = HNAE3_GLOBAL_RESET;
 	}
 
-	ret = hclge_cmd_clear_error(hdev, &desc, NULL, 0, 0);
-	if (ret) {
-		dev_err(dev, "failed(%d) to clear TM SCH NFE status\n", ret);
-		return;
+	if (status & HCLGE_ROCEE_OVF_INT_MASK) {
+		ret = hclge_log_rocee_ovf_error(hdev);
+		if (ret) {
+			dev_err(dev, "failed(%d) to process ovf error\n", ret);
+			/* reset everything for now */
+			HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET);
+			return ret;
+		}
 	}
 
-	ret = hclge_cmd_query_error(hdev, &desc,
-				    HCLGE_TM_SCH_ECC_ERR_RINT_FE, 0, 0, 0);
+	/* clear error status */
+	hclge_cmd_reuse_desc(&desc[0], false);
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], 1);
 	if (ret) {
-		dev_err(dev, "failed(%d) to read SCH FE status\n", ret);
-		return;
+		dev_err(dev, "failed(%d) to clear ROCEE RAS error\n", ret);
+		/* reset everything for now */
+		reset_type = HNAE3_GLOBAL_RESET;
 	}
 
-	ret = hclge_cmd_clear_error(hdev, &desc, NULL, 0, 0);
-	if (ret)
-		dev_err(dev, "failed(%d) to clear TM SCH FE status\n", ret);
+	HCLGE_SET_DEFAULT_RESET_REQUEST(reset_type);
+
+	return ret;
 }
 
-static void hclge_process_tm_qcn_error(struct hclge_dev *hdev)
+static int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en)
 {
 	struct device *dev = &hdev->pdev->dev;
 	struct hclge_desc desc;
 	int ret;
 
-	/* read QCN errors */
-	ret = hclge_cmd_query_error(hdev, &desc,
-				    HCLGE_TM_QCN_MEM_INT_INFO_CMD, 0, 0, 0);
-	if (ret) {
-		dev_err(dev, "failed(%d) to read QCN ECC err status\n", ret);
-		return;
-	}
+	if (hdev->pdev->revision < 0x21 || !hnae3_dev_roce_supported(hdev))
+		return 0;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_CONFIG_ROCEE_RAS_INT_EN, false);
+	if (en) {
+		/* enable ROCEE hw error interrupts */
+		desc.data[0] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN);
+		desc.data[1] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN);
 
-	/* log QCN errors */
-	if (le32_to_cpu(desc.data[0]))
-		hclge_log_error(dev, &hclge_qcn_ecc_err_int[0],
-				le32_to_cpu(desc.data[0]));
+		hclge_log_and_clear_rocee_ras_error(hdev);
+	}
+	desc.data[2] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN_MASK);
+	desc.data[3] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN_MASK);
 
-	/* clear QCN errors */
-	ret = hclge_cmd_clear_error(hdev, &desc, NULL, 0, 0);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret)
-		dev_err(dev, "failed(%d) to clear QCN error status\n", ret);
+		dev_err(dev, "failed(%d) to config ROCEE RAS interrupt\n", ret);
+
+	return ret;
 }
 
-static void hclge_process_tm_error(struct hclge_dev *hdev,
-				   enum hclge_err_int_type type)
+static int hclge_handle_rocee_ras_error(struct hnae3_ae_dev *ae_dev)
 {
-	hclge_process_tm_sch_error(hdev);
-	hclge_process_tm_qcn_error(hdev);
+	struct hclge_dev *hdev = ae_dev->priv;
+
+	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) ||
+	    hdev->pdev->revision < 0x21)
+		return HNAE3_NONE_RESET;
+
+	return hclge_log_and_clear_rocee_ras_error(hdev);
 }
 
 static const struct hclge_hw_blk hw_blk[] = {
-	{ .msk = BIT(0), .name = "IGU_EGU",
-	  .enable_error = hclge_enable_igu_egu_error,
-	  .process_error = hclge_process_igu_egu_error, },
-	{ .msk = BIT(5), .name = "COMMON",
-	  .enable_error = hclge_enable_common_error,
-	  .process_error = hclge_process_common_error, },
-	{ .msk = BIT(4), .name = "TM",
-	  .enable_error = hclge_enable_tm_hw_error,
-	  .process_error = hclge_process_tm_error, },
-	{ .msk = BIT(1), .name = "PPP",
-	  .enable_error = hclge_enable_ppp_error,
-	  .process_error = hclge_process_ppp_error, },
+	{
+	  .msk = BIT(0), .name = "IGU_EGU",
+	  .config_err_int = hclge_config_igu_egu_hw_err_int,
+	},
+	{
+	  .msk = BIT(1), .name = "PPP",
+	  .config_err_int = hclge_config_ppp_hw_err_int,
+	},
+	{
+	  .msk = BIT(2), .name = "SSU",
+	  .config_err_int = hclge_config_ssu_hw_err_int,
+	},
+	{
+	  .msk = BIT(3), .name = "PPU",
+	  .config_err_int = hclge_config_ppu_hw_err_int,
+	},
+	{
+	  .msk = BIT(4), .name = "TM",
+	  .config_err_int = hclge_config_tm_hw_err_int,
+	},
+	{
+	  .msk = BIT(5), .name = "COMMON",
+	  .config_err_int = hclge_config_common_hw_err_int,
+	},
+	{
+	  .msk = BIT(8), .name = "MAC",
+	  .config_err_int = hclge_config_mac_err_int,
+	},
 	{ /* sentinel */ }
 };
 
 int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state)
 {
+	const struct hclge_hw_blk *module = hw_blk;
 	struct device *dev = &hdev->pdev->dev;
 	int ret = 0;
-	int i = 0;
 
-	while (hw_blk[i].name) {
-		if (!hw_blk[i].enable_error) {
-			i++;
-			continue;
-		}
-		ret = hw_blk[i].enable_error(hdev, state);
-		if (ret) {
-			dev_err(dev, "fail(%d) to en/disable err int\n", ret);
-			return ret;
+	while (module->name) {
+		if (module->config_err_int) {
+			ret = module->config_err_int(hdev, state);
+			if (ret)
+				return ret;
 		}
-		i++;
+		module++;
 	}
 
+	ret = hclge_config_rocee_ras_interrupt(hdev, state);
+	if (ret)
+		dev_err(dev, "fail(%d) to configure ROCEE err int\n", ret);
+
 	return ret;
 }
 
-pci_ers_result_t hclge_process_ras_hw_error(struct hnae3_ae_dev *ae_dev)
+pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev)
 {
 	struct hclge_dev *hdev = ae_dev->priv;
 	struct device *dev = &hdev->pdev->dev;
-	u32 sts, val;
-	int i = 0;
-
-	sts = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG);
-
-	/* Processing Non-fatal errors */
-	if (sts & HCLGE_RAS_REG_NFE_MASK) {
-		val = (sts >> HCLGE_RAS_REG_NFE_SHIFT) & 0xFF;
-		i = 0;
-		while (hw_blk[i].name) {
-			if (!(hw_blk[i].msk & val)) {
-				i++;
-				continue;
-			}
-			dev_warn(dev, "%s ras non-fatal error identified\n",
-				 hw_blk[i].name);
-			if (hw_blk[i].process_error)
-				hw_blk[i].process_error(hdev,
-							 HCLGE_ERR_INT_RAS_NFE);
-			i++;
-		}
+	u32 status;
+
+	status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG);
+
+	/* Handling Non-fatal HNS RAS errors */
+	if (status & HCLGE_RAS_REG_NFE_MASK) {
+		dev_warn(dev,
+			 "HNS Non-Fatal RAS error(status=0x%x) identified\n",
+			 status);
+		hclge_handle_all_ras_errors(hdev);
+	} else {
+		if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) ||
+		    hdev->pdev->revision < 0x21)
+			return PCI_ERS_RESULT_RECOVERED;
 	}
 
-	return PCI_ERS_RESULT_NEED_RESET;
+	if (status & HCLGE_RAS_REG_ROCEE_ERR_MASK) {
+		dev_warn(dev, "ROCEE uncorrected RAS error identified\n");
+		hclge_handle_rocee_ras_error(ae_dev);
+	}
+
+	if (status & HCLGE_RAS_REG_NFE_MASK ||
+	    status & HCLGE_RAS_REG_ROCEE_ERR_MASK)
+		return PCI_ERS_RESULT_NEED_RESET;
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
+			       unsigned long *reset_requests)
+{
+	struct device *dev = &hdev->pdev->dev;
+	u32 mpf_bd_num, pf_bd_num, bd_num;
+	struct hclge_desc desc_bd;
+	struct hclge_desc *desc;
+	__le32 *desc_data;
+	int ret = 0;
+	u32 status;
+
+	/* set default handling */
+	set_bit(HNAE3_FUNC_RESET, reset_requests);
+
+	/* query the number of bds for the MSIx int status */
+	hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_MSIX_INT_STS_BD_NUM,
+				   true);
+	ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1);
+	if (ret) {
+		dev_err(dev, "fail(%d) to query msix int status bd num\n",
+			ret);
+		/* reset everything for now */
+		set_bit(HNAE3_GLOBAL_RESET, reset_requests);
+		return ret;
+	}
+
+	mpf_bd_num = le32_to_cpu(desc_bd.data[0]);
+	pf_bd_num = le32_to_cpu(desc_bd.data[1]);
+	bd_num = max_t(u32, mpf_bd_num, pf_bd_num);
+
+	desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL);
+	if (!desc)
+		goto out;
+
+	/* query all main PF MSIx errors */
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT,
+				   true);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], mpf_bd_num);
+	if (ret) {
+		dev_err(dev, "query all mpf msix int cmd failed (%d)\n",
+			ret);
+		/* reset everything for now */
+		set_bit(HNAE3_GLOBAL_RESET, reset_requests);
+		goto msi_error;
+	}
+
+	/* log MAC errors */
+	desc_data = (__le32 *)&desc[1];
+	status = le32_to_cpu(*desc_data);
+	if (status) {
+		hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R",
+				&hclge_mac_afifo_tnl_int[0], status);
+		set_bit(HNAE3_GLOBAL_RESET, reset_requests);
+	}
+
+	/* log PPU(RCB) errors */
+	desc_data = (__le32 *)&desc[5];
+	status = le32_to_cpu(*(desc_data + 2)) &
+			HCLGE_PPU_MPF_INT_ST2_MSIX_MASK;
+	if (status) {
+		dev_warn(dev,
+			 "PPU_MPF_ABNORMAL_INT_ST2[28:29], err_status(0x%x)\n",
+			 status);
+		set_bit(HNAE3_CORE_RESET, reset_requests);
+	}
+
+	/* clear all main PF MSIx errors */
+	hclge_cmd_reuse_desc(&desc[0], false);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], mpf_bd_num);
+	if (ret) {
+		dev_err(dev, "clear all mpf msix int cmd failed (%d)\n",
+			ret);
+		/* reset everything for now */
+		set_bit(HNAE3_GLOBAL_RESET, reset_requests);
+		goto msi_error;
+	}
+
+	/* query all PF MSIx errors */
+	memset(desc, 0, bd_num * sizeof(struct hclge_desc));
+	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT,
+				   true);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], pf_bd_num);
+	if (ret) {
+		dev_err(dev, "query all pf msix int cmd failed (%d)\n",
+			ret);
+		/* reset everything for now */
+		set_bit(HNAE3_GLOBAL_RESET, reset_requests);
+		goto msi_error;
+	}
+
+	/* log SSU PF errors */
+	status = le32_to_cpu(desc[0].data[0]) & HCLGE_SSU_PORT_INT_MSIX_MASK;
+	if (status) {
+		hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT",
+				&hclge_ssu_port_based_pf_int[0], status);
+		set_bit(HNAE3_GLOBAL_RESET, reset_requests);
+	}
+
+	/* read and log PPP PF errors */
+	desc_data = (__le32 *)&desc[2];
+	status = le32_to_cpu(*desc_data);
+	if (status)
+		hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0",
+				&hclge_ppp_pf_abnormal_int[0], status);
+
+	/* PPU(RCB) PF errors */
+	desc_data = (__le32 *)&desc[3];
+	status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK;
+	if (status)
+		hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST",
+				&hclge_ppu_pf_abnormal_int[0], status);
+
+	/* clear all PF MSIx errors */
+	hclge_cmd_reuse_desc(&desc[0], false);
+	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc[0], pf_bd_num);
+	if (ret) {
+		dev_err(dev, "clear all pf msix int cmd failed (%d)\n",
+			ret);
+		/* reset everything for now */
+		set_bit(HNAE3_GLOBAL_RESET, reset_requests);
+	}
+
+msi_error:
+	kfree(desc);
+out:
+	return ret;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
index e0e3b5861495..51a7d4eb066a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
@@ -7,9 +7,11 @@
 #include "hclge_main.h"
 
 #define HCLGE_RAS_PF_OTHER_INT_STS_REG   0x20B00
-#define HCLGE_RAS_REG_FE_MASK    0xFF
 #define HCLGE_RAS_REG_NFE_MASK   0xFF00
-#define HCLGE_RAS_REG_NFE_SHIFT	8
+#define HCLGE_RAS_REG_ROCEE_ERR_MASK   0x3000000
+
+#define HCLGE_VECTOR0_PF_OTHER_INT_STS_REG   0x20800
+#define HCLGE_VECTOR0_REG_MSIX_MASK   0x1FF00
 
 #define HCLGE_IMP_TCM_ECC_ERR_INT_EN	0xFFFF0000
 #define HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK	0xFFFF0000
@@ -23,6 +25,8 @@
 #define HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK	0x0100
 #define HCLGE_TQP_ECC_ERR_INT_EN	0x0FFF
 #define HCLGE_TQP_ECC_ERR_INT_EN_MASK	0x0FFF
+#define HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK	0x0F000000
+#define HCLGE_MSIX_SRAM_ECC_ERR_INT_EN	0x0F000000
 #define HCLGE_IGU_ERR_INT_EN	0x0000066F
 #define HCLGE_IGU_ERR_INT_EN_MASK	0x000F
 #define HCLGE_IGU_TNL_ERR_INT_EN    0x0002AABF
@@ -41,21 +45,55 @@
 #define HCLGE_TM_QCN_MEM_ERR_INT_EN	0xFFFFFF
 #define HCLGE_NCSI_ERR_INT_EN	0x3
 #define HCLGE_NCSI_ERR_INT_TYPE	0x9
+#define HCLGE_MAC_COMMON_ERR_INT_EN		GENMASK(7, 0)
+#define HCLGE_MAC_COMMON_ERR_INT_EN_MASK	GENMASK(7, 0)
+#define HCLGE_PPU_MPF_ABNORMAL_INT0_EN		GENMASK(31, 0)
+#define HCLGE_PPU_MPF_ABNORMAL_INT0_EN_MASK	GENMASK(31, 0)
+#define HCLGE_PPU_MPF_ABNORMAL_INT1_EN		GENMASK(31, 0)
+#define HCLGE_PPU_MPF_ABNORMAL_INT1_EN_MASK	GENMASK(31, 0)
+#define HCLGE_PPU_MPF_ABNORMAL_INT2_EN		0x3FFF3FFF
+#define HCLGE_PPU_MPF_ABNORMAL_INT2_EN_MASK	0x3FFF3FFF
+#define HCLGE_PPU_MPF_ABNORMAL_INT2_EN2		0xB
+#define HCLGE_PPU_MPF_ABNORMAL_INT2_EN2_MASK	0xB
+#define HCLGE_PPU_MPF_ABNORMAL_INT3_EN		GENMASK(7, 0)
+#define HCLGE_PPU_MPF_ABNORMAL_INT3_EN_MASK	GENMASK(23, 16)
+#define HCLGE_PPU_PF_ABNORMAL_INT_EN		GENMASK(5, 0)
+#define HCLGE_PPU_PF_ABNORMAL_INT_EN_MASK	GENMASK(5, 0)
+#define HCLGE_SSU_1BIT_ECC_ERR_INT_EN		GENMASK(31, 0)
+#define HCLGE_SSU_1BIT_ECC_ERR_INT_EN_MASK	GENMASK(31, 0)
+#define HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN	GENMASK(31, 0)
+#define HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN_MASK	GENMASK(31, 0)
+#define HCLGE_SSU_BIT32_ECC_ERR_INT_EN		0x0101
+#define HCLGE_SSU_BIT32_ECC_ERR_INT_EN_MASK	0x0101
+#define HCLGE_SSU_COMMON_INT_EN			GENMASK(9, 0)
+#define HCLGE_SSU_COMMON_INT_EN_MASK		GENMASK(9, 0)
+#define HCLGE_SSU_PORT_BASED_ERR_INT_EN		0x0BFF
+#define HCLGE_SSU_PORT_BASED_ERR_INT_EN_MASK	0x0BFF0000
+#define HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN	GENMASK(23, 0)
+#define HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN_MASK	GENMASK(23, 0)
+
+#define HCLGE_SSU_COMMON_ERR_INT_MASK	GENMASK(9, 0)
+#define HCLGE_SSU_PORT_INT_MSIX_MASK	0x7BFF
+#define HCLGE_IGU_INT_MASK		GENMASK(3, 0)
+#define HCLGE_IGU_EGU_TNL_INT_MASK	GENMASK(5, 0)
+#define HCLGE_PPP_MPF_INT_ST3_MASK	GENMASK(5, 0)
+#define HCLGE_PPU_MPF_INT_ST3_MASK	GENMASK(7, 0)
+#define HCLGE_PPU_MPF_INT_ST2_MSIX_MASK	GENMASK(29, 28)
+#define HCLGE_PPU_PF_INT_MSIX_MASK	0x27
+#define HCLGE_QCN_FIFO_INT_MASK		GENMASK(17, 0)
+#define HCLGE_QCN_ECC_INT_MASK		GENMASK(21, 0)
+#define HCLGE_NCSI_ECC_INT_MASK		GENMASK(1, 0)
 
-#define HCLGE_IMP_TCM_ECC_INT_MASK	0xFFFF
-#define HCLGE_IMP_ITCM4_ECC_INT_MASK	0x3
-#define HCLGE_CMDQ_ECC_INT_MASK		0xFFFF
-#define HCLGE_CMDQ_ROC_ECC_INT_SHIFT	16
-#define HCLGE_TQP_ECC_INT_MASK		0xFFF
-#define HCLGE_TQP_ECC_INT_SHIFT		16
-#define HCLGE_IMP_TCM_ECC_CLR_MASK	0xFFFF
-#define HCLGE_IMP_ITCM4_ECC_CLR_MASK	0x3
-#define HCLGE_CMDQ_NIC_ECC_CLR_MASK	0xFFFF
-#define HCLGE_CMDQ_ROCEE_ECC_CLR_MASK	0xFFFF0000
-#define HCLGE_TQP_IMP_ERR_CLR_MASK	0x0FFF0001
-#define HCLGE_IGU_COM_INT_MASK		0xF
-#define HCLGE_IGU_EGU_TNL_INT_MASK	0x3F
-#define HCLGE_PPP_PF_INT_MASK		0x100
+#define HCLGE_ROCEE_RAS_NFE_INT_EN		0xF
+#define HCLGE_ROCEE_RAS_CE_INT_EN		0x1
+#define HCLGE_ROCEE_RAS_NFE_INT_EN_MASK		0xF
+#define HCLGE_ROCEE_RAS_CE_INT_EN_MASK		0x1
+#define HCLGE_ROCEE_RERR_INT_MASK		BIT(0)
+#define HCLGE_ROCEE_BERR_INT_MASK		BIT(1)
+#define HCLGE_ROCEE_ECC_INT_MASK		BIT(2)
+#define HCLGE_ROCEE_OVF_INT_MASK		BIT(3)
+#define HCLGE_ROCEE_OVF_ERR_INT_MASK		0x10000
+#define HCLGE_ROCEE_OVF_ERR_TYPE_MASK		0x3F
 
 enum hclge_err_int_type {
 	HCLGE_ERR_INT_MSIX = 0,
@@ -67,9 +105,7 @@ enum hclge_err_int_type {
 struct hclge_hw_blk {
 	u32 msk;
 	const char *name;
-	int (*enable_error)(struct hclge_dev *hdev, bool en);
-	void (*process_error)(struct hclge_dev *hdev,
-			      enum hclge_err_int_type type);
+	int (*config_err_int)(struct hclge_dev *hdev, bool en);
 };
 
 struct hclge_hw_error {
@@ -78,6 +114,7 @@ struct hclge_hw_error {
 };
 
 int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state);
-int hclge_enable_tm_hw_error(struct hclge_dev *hdev, bool en);
-pci_ers_result_t hclge_process_ras_hw_error(struct hnae3_ae_dev *ae_dev);
+pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev);
+int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
+			       unsigned long *reset_requests);
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 8432c841aa4f..22380202ba83 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2200,12 +2200,13 @@ static void hclge_service_complete(struct hclge_dev *hdev)
 
 static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
 {
-	u32 rst_src_reg;
-	u32 cmdq_src_reg;
+	u32 rst_src_reg, cmdq_src_reg, msix_src_reg;
 
 	/* fetch the events from their corresponding regs */
 	rst_src_reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS);
 	cmdq_src_reg = hclge_read_dev(&hdev->hw, HCLGE_VECTOR0_CMDQ_SRC_REG);
+	msix_src_reg = hclge_read_dev(&hdev->hw,
+				      HCLGE_VECTOR0_PF_OTHER_INT_STS_REG);
 
 	/* Assumption: If by any chance reset and mailbox events are reported
 	 * together then we will only process reset event in this go and will
@@ -2239,6 +2240,10 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
 		return HCLGE_VECTOR0_EVENT_RST;
 	}
 
+	/* check for vector0 msix event source */
+	if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK)
+		return HCLGE_VECTOR0_EVENT_ERR;
+
 	/* check for vector0 mailbox(=CMDQ RX) event source */
 	if (BIT(HCLGE_VECTOR0_RX_CMDQ_INT_B) & cmdq_src_reg) {
 		cmdq_src_reg &= ~BIT(HCLGE_VECTOR0_RX_CMDQ_INT_B);
@@ -2289,6 +2294,19 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
 
 	/* vector 0 interrupt is shared with reset and mailbox source events.*/
 	switch (event_cause) {
+	case HCLGE_VECTOR0_EVENT_ERR:
+		/* we do not know what type of reset is required now. This could
+		 * only be decided after we fetch the type of errors which
+		 * caused this event. Therefore, we will do below for now:
+		 * 1. Assert HNAE3_UNKNOWN_RESET type of reset. This means we
+		 *    have defered type of reset to be used.
+		 * 2. Schedule the reset serivce task.
+		 * 3. When service task receives  HNAE3_UNKNOWN_RESET type it
+		 *    will fetch the correct type of reset.  This would be done
+		 *    by first decoding the types of errors.
+		 */
+		set_bit(HNAE3_UNKNOWN_RESET, &hdev->reset_request);
+		/* fall through */
 	case HCLGE_VECTOR0_EVENT_RST:
 		hclge_reset_task_schedule(hdev);
 		break;
@@ -2593,6 +2611,23 @@ static enum hnae3_reset_type hclge_get_reset_level(struct hclge_dev *hdev,
 {
 	enum hnae3_reset_type rst_level = HNAE3_NONE_RESET;
 
+	/* first, resolve any unknown reset type to the known type(s) */
+	if (test_bit(HNAE3_UNKNOWN_RESET, addr)) {
+		/* we will intentionally ignore any errors from this function
+		 *  as we will end up in *some* reset request in any case
+		 */
+		hclge_handle_hw_msix_error(hdev, addr);
+		clear_bit(HNAE3_UNKNOWN_RESET, addr);
+		/* We defered the clearing of the error event which caused
+		 * interrupt since it was not posssible to do that in
+		 * interrupt context (and this is the reason we introduced
+		 * new UNKNOWN reset type). Now, the errors have been
+		 * handled and cleared in hardware we can safely enable
+		 * interrupts. This is an exception to the norm.
+		 */
+		hclge_enable_vector(&hdev->misc_vector, true);
+	}
+
 	/* return the highest priority reset level amongst all */
 	if (test_bit(HNAE3_IMP_RESET, addr)) {
 		rst_level = HNAE3_IMP_RESET;
@@ -7269,7 +7304,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 	ret = hclge_hw_error_set_state(hdev, true);
 	if (ret) {
 		dev_err(&pdev->dev,
-			"hw error interrupts enable failed, ret =%d\n", ret);
+			"fail(%d) to enable hw error interrupts\n", ret);
 		goto err_mdiobus_unreg;
 	}
 
@@ -7405,11 +7440,15 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
-	/* Re-enable the TM hw error interrupts because
-	 * they get disabled on core/global reset.
+	/* Re-enable the hw error interrupts because
+	 * the interrupts get disabled on core/global reset.
 	 */
-	if (hclge_enable_tm_hw_error(hdev, true))
-		dev_err(&pdev->dev, "failed to enable TM hw error interrupts\n");
+	ret = hclge_hw_error_set_state(hdev, true);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"fail(%d) to re-enable HNS hw error interrupts\n", ret);
+		return ret;
+	}
 
 	hclge_reset_vport_state(hdev);
 
@@ -7931,7 +7970,7 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.restore_fd_rules = hclge_restore_fd_entries,
 	.enable_fd = hclge_enable_fd,
 	.dbg_run_cmd = hclge_dbg_run_cmd,
-	.process_hw_error = hclge_process_ras_hw_error,
+	.handle_hw_ras_error = hclge_handle_hw_ras_error,
 	.get_hw_reset_stat = hclge_get_hw_reset_stat,
 	.ae_dev_resetting = hclge_ae_dev_resetting,
 	.ae_dev_reset_cnt = hclge_ae_dev_reset_cnt,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index ee6024270d5a..106fce172563 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -205,6 +205,7 @@ enum HCLGE_DEV_STATE {
 enum hclge_evt_cause {
 	HCLGE_VECTOR0_EVENT_RST,
 	HCLGE_VECTOR0_EVENT_MBX,
+	HCLGE_VECTOR0_EVENT_ERR,
 	HCLGE_VECTOR0_EVENT_OTHER,
 };