1 /*
2 * common MCA implementation for AMD CPUs.
3 * Copyright (c) 2012-2014 Advanced Micro Devices, Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 /* K8 common MCA documentation published at
20 *
21 * AMD64 Architecture Programmer's Manual Volume 2:
22 * System Programming
23 * Publication # 24593 Revision: 3.24
24 * Issue Date: October 2013
25 *
26 * URL:
27 * http://support.amd.com/TechDocs/24593.pdf
28 */
29
30 /* The related documentation for K8 Revisions A - E is:
31 *
32 * BIOS and Kernel Developer's Guide for
33 * AMD Athlon 64 and AMD Opteron Processors
34 * Publication # 26094 Revision: 3.30
35 * Issue Date: February 2006
36 *
37 * URL:
38 * http://support.amd.com/TechDocs/26094.PDF
39 */
40
41 /* The related documentation for K8 Revisions F - G is:
42 *
43 * BIOS and Kernel Developer's Guide for
44 * AMD NPT Family 0Fh Processors
45 * Publication # 32559 Revision: 3.08
46 * Issue Date: July 2007
47 *
48 * URL:
49 * http://support.amd.com/TechDocs/32559.pdf
50 */
51
52 /* Family10 MCA documentation published at
53 *
54 * BIOS and Kernel Developer's Guide
55 * For AMD Family 10h Processors
56 * Publication # 31116 Revision: 3.62
57 * Isse Date: January 11, 2013
58 *
59 * URL:
60 * http://support.amd.com/TechDocs/31116.pdf
61 */
62
63 #include <xen/init.h>
64 #include <xen/types.h>
65
66 #include <asm/msr.h>
67 #include <asm/processor.h>
68
69 #include "mce.h"
70 #include "x86_mca.h"
71 #include "mce_amd.h"
72 #include "mcaction.h"
73 #include "mce_quirks.h"
74 #include "vmce.h"
75
76 #define ANY -1
77
78 static const struct mce_quirkdata mce_amd_quirks[] = {
79 { 0xf /* cpu family */, ANY /* all models */, ANY /* all steppings */,
80 MCEQUIRK_K8_GART },
81 { 0x10 /* cpu family */, ANY /* all models */, ANY /* all steppings */,
82 MCEQUIRK_F10_GART },
83 };
84
85 /* Error Code Types */
86 enum mc_ec_type {
87 MC_EC_TLB_TYPE = 0x0010,
88 MC_EC_MEM_TYPE = 0x0100,
89 MC_EC_BUS_TYPE = 0x0800,
90 };
91
92 enum mc_ec_type
mc_ec2type(uint16_t errorcode)93 mc_ec2type(uint16_t errorcode)
94 {
95 if ( errorcode & MC_EC_BUS_TYPE )
96 return MC_EC_BUS_TYPE;
97 if ( errorcode & MC_EC_MEM_TYPE )
98 return MC_EC_MEM_TYPE;
99 if ( errorcode & MC_EC_TLB_TYPE )
100 return MC_EC_TLB_TYPE;
101 /* Unreached */
102 BUG();
103 return 0;
104 }
105
mc_amd_recoverable_scan(uint64_t status)106 bool mc_amd_recoverable_scan(uint64_t status)
107 {
108 bool ret = false;
109 enum mc_ec_type ectype;
110 uint16_t errorcode;
111
112 if ( !(status & MCi_STATUS_UC) )
113 return true;
114
115 errorcode = status & (MCi_STATUS_MCA | MCi_STATUS_MSEC);
116 ectype = mc_ec2type(errorcode);
117
118 switch ( ectype )
119 {
120 case MC_EC_BUS_TYPE: /* value in addr MSR is physical */
121 /* should run cpu offline action */
122 break;
123
124 case MC_EC_MEM_TYPE: /* value in addr MSR is physical */
125 ret = true; /* run memory page offline action */
126 break;
127
128 case MC_EC_TLB_TYPE: /* value in addr MSR is virtual */
129 /* should run tlb flush action and retry */
130 break;
131 }
132
133 return ret;
134 }
135
mc_amd_addrcheck(uint64_t status,uint64_t misc,int addrtype)136 bool mc_amd_addrcheck(uint64_t status, uint64_t misc, int addrtype)
137 {
138 enum mc_ec_type ectype;
139 uint16_t errorcode;
140
141 errorcode = status & (MCi_STATUS_MCA | MCi_STATUS_MSEC);
142 ectype = mc_ec2type(errorcode);
143
144 switch ( ectype )
145 {
146 case MC_EC_BUS_TYPE: /* value in addr MSR is physical */
147 case MC_EC_MEM_TYPE: /* value in addr MSR is physical */
148 return (addrtype == MC_ADDR_PHYSICAL);
149
150 case MC_EC_TLB_TYPE: /* value in addr MSR is virtual */
151 return (addrtype == MC_ADDR_VIRTUAL);
152 }
153
154 /* unreached */
155 BUG();
156 return false;
157 }
158
159 /* MC quirks */
160 enum mcequirk_amd_flags
mcequirk_lookup_amd_quirkdata(struct cpuinfo_x86 * c)161 mcequirk_lookup_amd_quirkdata(struct cpuinfo_x86 *c)
162 {
163 int i;
164
165 BUG_ON(c->x86_vendor != X86_VENDOR_AMD);
166
167 for ( i = 0; i < ARRAY_SIZE(mce_amd_quirks); i++ )
168 {
169 if ( c->x86 != mce_amd_quirks[i].cpu_family )
170 continue;
171 if ( (mce_amd_quirks[i].cpu_model != ANY) &&
172 (mce_amd_quirks[i].cpu_model != c->x86_model) )
173 continue;
174 if ( (mce_amd_quirks[i].cpu_stepping != ANY) &&
175 (mce_amd_quirks[i].cpu_stepping != c->x86_mask) )
176 continue;
177 return mce_amd_quirks[i].quirk;
178 }
179 return 0;
180 }
181
mcequirk_amd_apply(enum mcequirk_amd_flags flags)182 int mcequirk_amd_apply(enum mcequirk_amd_flags flags)
183 {
184 uint64_t val;
185
186 switch ( flags )
187 {
188 case MCEQUIRK_K8_GART:
189 /*
190 * Enable error reporting for all errors except for GART
191 * TBL walk error reporting, which trips off incorrectly
192 * with AGP GART & 3ware & Cerberus.
193 */
194 wrmsrl(MSR_IA32_MCx_CTL(4), ~(1ULL << 10));
195 wrmsrl(MSR_IA32_MCx_STATUS(4), 0ULL);
196 break;
197
198 case MCEQUIRK_F10_GART:
199 if ( rdmsr_safe(MSR_AMD64_MCx_MASK(4), val) == 0 )
200 wrmsr_safe(MSR_AMD64_MCx_MASK(4), val | (1 << 10));
201 break;
202 }
203
204 return 0;
205 }
206
207 static struct mcinfo_extended *
amd_f10_handler(struct mc_info * mi,uint16_t bank,uint64_t status)208 amd_f10_handler(struct mc_info *mi, uint16_t bank, uint64_t status)
209 {
210 struct mcinfo_extended *mc_ext;
211
212 /* Family 0x10 introduced additional MSR that belong to the
213 * northbridge bank (4). */
214 if ( mi == NULL || bank != 4 )
215 return NULL;
216
217 if ( !(status & MCi_STATUS_VAL) )
218 return NULL;
219
220 if ( !(status & MCi_STATUS_MISCV) )
221 return NULL;
222
223 mc_ext = x86_mcinfo_reserve(mi, sizeof(*mc_ext), MC_TYPE_EXTENDED);
224 if ( !mc_ext )
225 {
226 mi->flags |= MCINFO_FLAGS_UNCOMPLETE;
227 return NULL;
228 }
229
230 mc_ext->mc_msrs = 3;
231
232 mc_ext->mc_msr[0].reg = MSR_F10_MC4_MISC1;
233 mc_ext->mc_msr[1].reg = MSR_F10_MC4_MISC2;
234 mc_ext->mc_msr[2].reg = MSR_F10_MC4_MISC3;
235
236 mc_ext->mc_msr[0].value = mca_rdmsr(MSR_F10_MC4_MISC1);
237 mc_ext->mc_msr[1].value = mca_rdmsr(MSR_F10_MC4_MISC2);
238 mc_ext->mc_msr[2].value = mca_rdmsr(MSR_F10_MC4_MISC3);
239
240 return mc_ext;
241 }
242
amd_need_clearbank_scan(enum mca_source who,uint64_t status)243 static bool amd_need_clearbank_scan(enum mca_source who, uint64_t status)
244 {
245 if ( who != MCA_MCE_SCAN )
246 return true;
247
248 /*
249 * For fatal error, it shouldn't be cleared so that sticky bank
250 * have a chance to be handled after reboot by polling.
251 */
252 if ( (status & MCi_STATUS_UC) && (status & MCi_STATUS_PCC) )
253 return false;
254
255 return true;
256 }
257
258 /* AMD specific MCA MSR */
vmce_amd_wrmsr(struct vcpu * v,uint32_t msr,uint64_t val)259 int vmce_amd_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
260 {
261 /* Do nothing as we don't emulate this MC bank currently */
262 mce_printk(MCE_VERBOSE, "MCE: wr msr %#"PRIx64"\n", val);
263 return 1;
264 }
265
vmce_amd_rdmsr(const struct vcpu * v,uint32_t msr,uint64_t * val)266 int vmce_amd_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
267 {
268 /* Assign '0' as we don't emulate this MC bank currently */
269 *val = 0;
270 return 1;
271 }
272
273 enum mcheck_type
amd_mcheck_init(struct cpuinfo_x86 * ci)274 amd_mcheck_init(struct cpuinfo_x86 *ci)
275 {
276 uint32_t i;
277 enum mcequirk_amd_flags quirkflag = mcequirk_lookup_amd_quirkdata(ci);
278
279 /* Assume that machine check support is available.
280 * The minimum provided support is at least the K8. */
281 mce_handler_init();
282 x86_mce_vector_register(mcheck_cmn_handler);
283 mce_need_clearbank_register(amd_need_clearbank_scan);
284
285 for ( i = 0; i < nr_mce_banks; i++ )
286 {
287 if ( quirkflag == MCEQUIRK_K8_GART && i == 4 )
288 mcequirk_amd_apply(quirkflag);
289 else
290 {
291 /* Enable error reporting of all errors */
292 wrmsrl(MSR_IA32_MCx_CTL(i), 0xffffffffffffffffULL);
293 wrmsrl(MSR_IA32_MCx_STATUS(i), 0x0ULL);
294 }
295 }
296
297 if ( ci->x86 == 0xf )
298 return mcheck_amd_k8;
299
300 if ( quirkflag == MCEQUIRK_F10_GART )
301 mcequirk_amd_apply(quirkflag);
302
303 x86_mce_callback_register(amd_f10_handler);
304 mce_recoverable_register(mc_amd_recoverable_scan);
305 mce_register_addrcheck(mc_amd_addrcheck);
306
307 return mcheck_amd_famXX;
308 }
309