1 /*
2 * Bridge between MCE and APEI
3 *
4 * On some machine, corrected memory errors are reported via APEI
5 * generic hardware error source (GHES) instead of corrected Machine
6 * Check. These corrected memory errors can be reported to user space
7 * through /dev/mcelog via faking a corrected Machine Check, so that
8 * the error memory page can be offlined by /sbin/mcelog if the error
9 * count for one page is beyond the threshold.
10 *
11 * For fatal MCE, save MCE record into persistent storage via ERST, so
12 * that the MCE record can be logged after reboot via ERST.
13 *
14 * Copyright 2010 Intel Corp.
15 * Author: Huang Ying <ying.huang@intel.com>
16 * Ported by: Liu, Jinsong <jinsong.liu@intel.com>
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License version
20 * 2 as published by the Free Software Foundation.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; If not, see <http://www.gnu.org/licenses/>.
29 */
30
31 #include <xen/kernel.h>
32 #include <xen/cper.h>
33 #include <xen/errno.h>
34 #include <acpi/acpi.h>
35 #include <acpi/apei.h>
36
37 #include "mce.h"
38
39 #define CPER_CREATOR_MCE \
40 UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
41 0x64, 0x90, 0xb8, 0x9d)
42 #define CPER_SECTION_TYPE_MCE \
43 UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
44 0x04, 0x4a, 0x38, 0xfc)
45
46 /*
47 * CPER specification (in UEFI specification 2.3 appendix N) requires
48 * byte-packed.
49 */
50 struct __packed cper_mce_record {
51 struct cper_record_header hdr;
52 struct cper_section_descriptor sec_hdr;
53 struct mce mce;
54 };
55
apei_write_mce(struct mce * m)56 int apei_write_mce(struct mce *m)
57 {
58 struct cper_mce_record rcd;
59
60 if (!m)
61 return -EINVAL;
62
63 memset(&rcd, 0, sizeof(rcd));
64 memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
65 rcd.hdr.revision = CPER_RECORD_REV;
66 rcd.hdr.signature_end = CPER_SIG_END;
67 rcd.hdr.section_count = 1;
68 rcd.hdr.error_severity = CPER_SER_FATAL;
69 /* timestamp, platform_id, partition_id are all invalid */
70 rcd.hdr.validation_bits = 0;
71 rcd.hdr.record_length = sizeof(rcd);
72 rcd.hdr.creator_id = CPER_CREATOR_MCE;
73 rcd.hdr.notification_type = CPER_NOTIFY_MCE;
74 rcd.hdr.record_id = cper_next_record_id();
75 rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
76
77 rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd;
78 rcd.sec_hdr.section_length = sizeof(rcd.mce);
79 rcd.sec_hdr.revision = CPER_SEC_REV;
80 /* fru_id and fru_text is invalid */
81 rcd.sec_hdr.validation_bits = 0;
82 rcd.sec_hdr.flags = CPER_SEC_PRIMARY;
83 rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
84 rcd.sec_hdr.section_severity = CPER_SER_FATAL;
85
86 memcpy(&rcd.mce, m, sizeof(*m));
87
88 return erst_write(&rcd.hdr);
89 }
90
91 #ifndef NDEBUG /* currently dead code */
92
apei_read_mce(struct mce * m,u64 * record_id)93 ssize_t apei_read_mce(struct mce *m, u64 *record_id)
94 {
95 struct cper_mce_record rcd;
96 ssize_t len;
97
98 if (!m || !record_id)
99 return -EINVAL;
100
101 len = erst_read_next(&rcd.hdr, sizeof(rcd));
102 if (len <= 0)
103 return len;
104 /* Can not skip other records in storage via ERST unless clear them */
105 else if (len != sizeof(rcd) ||
106 uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) {
107 printk(KERN_WARNING
108 "MCE-APEI: Can not skip the unknown record in ERST");
109 return -EIO;
110 }
111
112 memcpy(m, &rcd.mce, sizeof(*m));
113 *record_id = rcd.hdr.record_id;
114
115 return sizeof(*m);
116 }
117
118 /* Check whether there is record in ERST */
apei_check_mce(void)119 bool apei_check_mce(void)
120 {
121 return erst_get_record_count() > 0;
122 }
123
apei_clear_mce(u64 record_id)124 int apei_clear_mce(u64 record_id)
125 {
126 return erst_clear(record_id);
127 }
128
129 #endif /* currently dead code */
130