summary refs log tree commit diff
path: root/drivers/edac
diff options
context:
space:
mode:
authorYazen Ghannam <Yazen.Ghannam@amd.com>2016-11-28 12:59:53 -0600
committerBorislav Petkov <bp@suse.de>2016-11-29 18:05:48 +0100
commit713ad54675fdfd7358dbcae21ab4788a014c6e23 (patch)
treedb72a151b31b099a0ef044c2ca2b939c27463ab1 /drivers/edac
parentd27f3a348e3677b7d5ee6954ebafce679b011164 (diff)
downloadlinux-713ad54675fdfd7358dbcae21ab4788a014c6e23.tar.gz
EDAC, amd64: Define and register UMC error decode function
How we need to decode UMC errors is different from how we decode bus
errors, so let's define a new function for this. We also need a way to
determine the UMC channel since we're not guaranteed that there is a
fixed relation between channel and MCA bank.

Signed-off-by: Yazen Ghannam <Yazen.Ghannam@amd.com>
Cc: Aravind Gopalakrishnan <aravindksg.lkml@gmail.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: x86-ml <x86@kernel.org>
Link: http://lkml.kernel.org/r/1480359593-80369-1-git-send-email-Yazen.Ghannam@amd.com
[ Fold in decode_synd_reg(), simplify. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/amd64_edac.c89
-rw-r--r--drivers/edac/amd64_edac.h2
2 files changed, 88 insertions, 3 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index d7bd96c83b51..48a38ab363dd 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2392,7 +2392,13 @@ static void __log_ecc_error(struct mem_ctl_info *mci, struct err_info *err,
 		string = "Failed to map error addr to a csrow";
 		break;
 	case ERR_CHANNEL:
-		string = "unknown syndrome - possible error reporting race";
+		string = "Unknown syndrome - possible error reporting race";
+		break;
+	case ERR_SYND:
+		string = "MCA_SYND not valid - unknown syndrome and csrow";
+		break;
+	case ERR_NORM_ADDR:
+		string = "Cannot decode normalized address";
 		break;
 	default:
 		string = "WTF error";
@@ -2442,6 +2448,76 @@ static inline void decode_bus_error(int node_id, struct mce *m)
 }
 
 /*
+ * To find the UMC channel represented by this bank we need to match on its
+ * instance_id. The instance_id of a bank is held in the lower 32 bits of its
+ * IPID.
+ */
+static int find_umc_channel(struct amd64_pvt *pvt, struct mce *m)
+{
+	u32 umc_instance_id[] = {0x50f00, 0x150f00};
+	u32 instance_id = m->ipid & GENMASK(31, 0);
+	int i, channel = -1;
+
+	for (i = 0; i < ARRAY_SIZE(umc_instance_id); i++)
+		if (umc_instance_id[i] == instance_id)
+			channel = i;
+
+	return channel;
+}
+
+static void decode_umc_error(int node_id, struct mce *m)
+{
+	u8 ecc_type = (m->status >> 45) & 0x3;
+	struct mem_ctl_info *mci;
+	struct amd64_pvt *pvt;
+	struct err_info err;
+	u64 sys_addr;
+
+	mci = edac_mc_find(node_id);
+	if (!mci)
+		return;
+
+	pvt = mci->pvt_info;
+
+	memset(&err, 0, sizeof(err));
+
+	if (m->status & MCI_STATUS_DEFERRED)
+		ecc_type = 3;
+
+	err.channel = find_umc_channel(pvt, m);
+	if (err.channel < 0) {
+		err.err_code = ERR_CHANNEL;
+		goto log_error;
+	}
+
+	if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) {
+		err.err_code = ERR_NORM_ADDR;
+		goto log_error;
+	}
+
+	error_address_to_page_and_offset(sys_addr, &err);
+
+	if (!(m->status & MCI_STATUS_SYNDV)) {
+		err.err_code = ERR_SYND;
+		goto log_error;
+	}
+
+	if (ecc_type == 2) {
+		u8 length = (m->synd >> 18) & 0x3f;
+
+		if (length)
+			err.syndrome = (m->synd >> 32) & GENMASK(length - 1, 0);
+		else
+			err.err_code = ERR_CHANNEL;
+	}
+
+	err.csrow = m->synd & 0x7;
+
+log_error:
+	__log_ecc_error(mci, &err, ecc_type);
+}
+
+/*
  * Use pvt->F3 which contains the F3 CPU PCI device to get the related
  * F1 (AddrMap) and F2 (Dct) devices. Return negative value on error.
  * Reserve F0 and F6 on systems with a UMC.
@@ -3232,7 +3308,10 @@ static int init_one_instance(unsigned int nid)
 	if (report_gart_errors)
 		amd_report_gart_errors(true);
 
-	amd_register_ecc_decoder(decode_bus_error);
+	if (pvt->umc)
+		amd_register_ecc_decoder(decode_umc_error);
+	else
+		amd_register_ecc_decoder(decode_bus_error);
 
 	return 0;
 
@@ -3323,7 +3402,11 @@ static void remove_one_instance(unsigned int nid)
 
 	/* unregister from EDAC MCE */
 	amd_report_gart_errors(false);
-	amd_unregister_ecc_decoder(decode_bus_error);
+
+	if (pvt->umc)
+		amd_unregister_ecc_decoder(decode_umc_error);
+	else
+		amd_unregister_ecc_decoder(decode_bus_error);
 
 	kfree(ecc_stngs[nid]);
 	ecc_stngs[nid] = NULL;
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index cb91d0b06d23..c3b004a53eea 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -382,6 +382,8 @@ enum err_codes {
 	ERR_NODE	= -1,
 	ERR_CSROW	= -2,
 	ERR_CHANNEL	= -3,
+	ERR_SYND	= -4,
+	ERR_NORM_ADDR	= -5,
 };
 
 struct err_info {