1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3 * common MCA implementation for AMD CPUs.
4 * Copyright (c) 2012-2014 Advanced Micro Devices, Inc.
5 */
6
7 /* K8 common MCA documentation published at
8 *
9 * AMD64 Architecture Programmer's Manual Volume 2:
10 * System Programming
11 * Publication # 24593 Revision: 3.24
12 * Issue Date: October 2013
13 *
14 * URL:
15 * http://support.amd.com/TechDocs/24593.pdf
16 */
17
18 /* The related documentation for K8 Revisions A - E is:
19 *
20 * BIOS and Kernel Developer's Guide for
21 * AMD Athlon 64 and AMD Opteron Processors
22 * Publication # 26094 Revision: 3.30
23 * Issue Date: February 2006
24 *
25 * URL:
26 * http://support.amd.com/TechDocs/26094.PDF
27 */
28
29 /* The related documentation for K8 Revisions F - G is:
30 *
31 * BIOS and Kernel Developer's Guide for
32 * AMD NPT Family 0Fh Processors
33 * Publication # 32559 Revision: 3.08
34 * Issue Date: July 2007
35 *
36 * URL:
37 * http://support.amd.com/TechDocs/32559.pdf
38 */
39
40 /* Family10 MCA documentation published at
41 *
42 * BIOS and Kernel Developer's Guide
43 * For AMD Family 10h Processors
44 * Publication # 31116 Revision: 3.62
45 * Isse Date: January 11, 2013
46 *
47 * URL:
48 * http://support.amd.com/TechDocs/31116.pdf
49 */
50
51 #include <xen/init.h>
52 #include <xen/types.h>
53
54 #include <asm/msr.h>
55 #include <asm/processor.h>
56
57 #include "mce.h"
58 #include "x86_mca.h"
59 #include "mce_amd.h"
60 #include "mcaction.h"
61 #include "vmce.h"
62
63 #define ANY (~0U)
64
65 enum mcequirk_amd_flags {
66 MCEQUIRK_NONE,
67 MCEQUIRK_K8_GART,
68 MCEQUIRK_F10_GART,
69 };
70
71 static const struct mce_quirkdata {
72 unsigned int cpu_family;
73 unsigned int cpu_model;
74 unsigned int cpu_stepping;
75 enum mcequirk_amd_flags quirk;
76 } mce_amd_quirks[] = {
77 { 0xf /* cpu family */, ANY /* all models */, ANY /* all steppings */,
78 MCEQUIRK_K8_GART },
79 { 0x10 /* cpu family */, ANY /* all models */, ANY /* all steppings */,
80 MCEQUIRK_F10_GART },
81 };
82
83 /* Error Code Types */
84 enum mc_ec_type {
85 MC_EC_TLB_TYPE = 0x0010,
86 MC_EC_MEM_TYPE = 0x0100,
87 MC_EC_BUS_TYPE = 0x0800,
88 };
89
90 static enum mc_ec_type
mc_ec2type(uint16_t errorcode)91 mc_ec2type(uint16_t errorcode)
92 {
93 if ( errorcode & MC_EC_BUS_TYPE )
94 return MC_EC_BUS_TYPE;
95 if ( errorcode & MC_EC_MEM_TYPE )
96 return MC_EC_MEM_TYPE;
97 if ( errorcode & MC_EC_TLB_TYPE )
98 return MC_EC_TLB_TYPE;
99 /* Unreached */
100 BUG();
101 return 0;
102 }
103
mc_amd_recoverable_scan(uint64_t status)104 bool cf_check mc_amd_recoverable_scan(uint64_t status)
105 {
106 bool ret = false;
107 enum mc_ec_type ectype;
108 uint16_t errorcode;
109
110 if ( !(status & MCi_STATUS_UC) )
111 return true;
112
113 errorcode = status & (MCi_STATUS_MCA | MCi_STATUS_MSEC);
114 ectype = mc_ec2type(errorcode);
115
116 switch ( ectype )
117 {
118 case MC_EC_BUS_TYPE: /* value in addr MSR is physical */
119 /* should run cpu offline action */
120 break;
121
122 case MC_EC_MEM_TYPE: /* value in addr MSR is physical */
123 ret = true; /* run memory page offline action */
124 break;
125
126 case MC_EC_TLB_TYPE: /* value in addr MSR is virtual */
127 /* should run tlb flush action and retry */
128 break;
129 }
130
131 return ret;
132 }
133
mc_amd_addrcheck(uint64_t status,uint64_t misc,int addrtype)134 bool cf_check mc_amd_addrcheck(uint64_t status, uint64_t misc, int addrtype)
135 {
136 enum mc_ec_type ectype;
137 uint16_t errorcode;
138
139 errorcode = status & (MCi_STATUS_MCA | MCi_STATUS_MSEC);
140 ectype = mc_ec2type(errorcode);
141
142 switch ( ectype )
143 {
144 case MC_EC_BUS_TYPE: /* value in addr MSR is physical */
145 case MC_EC_MEM_TYPE: /* value in addr MSR is physical */
146 return (addrtype == MC_ADDR_PHYSICAL);
147
148 case MC_EC_TLB_TYPE: /* value in addr MSR is virtual */
149 return (addrtype == MC_ADDR_VIRTUAL);
150 }
151
152 /* unreached */
153 BUG();
154 return false;
155 }
156
157 /* MC quirks */
158 static enum mcequirk_amd_flags
mcequirk_lookup_amd_quirkdata(const struct cpuinfo_x86 * c)159 mcequirk_lookup_amd_quirkdata(const struct cpuinfo_x86 *c)
160 {
161 unsigned int i;
162
163 BUG_ON(c->x86_vendor != X86_VENDOR_AMD);
164
165 for ( i = 0; i < ARRAY_SIZE(mce_amd_quirks); i++ )
166 {
167 if ( c->x86 != mce_amd_quirks[i].cpu_family )
168 continue;
169 if ( (mce_amd_quirks[i].cpu_model != ANY) &&
170 (mce_amd_quirks[i].cpu_model != c->x86_model) )
171 continue;
172 if ( (mce_amd_quirks[i].cpu_stepping != ANY) &&
173 (mce_amd_quirks[i].cpu_stepping != c->x86_mask) )
174 continue;
175 return mce_amd_quirks[i].quirk;
176 }
177
178 return MCEQUIRK_NONE;
179 }
180
mcequirk_amd_apply(enum mcequirk_amd_flags flags)181 static void mcequirk_amd_apply(enum mcequirk_amd_flags flags)
182 {
183 uint64_t val;
184
185 switch ( flags )
186 {
187 case MCEQUIRK_K8_GART:
188 /*
189 * Enable error reporting for all errors except for GART
190 * TBL walk error reporting, which trips off incorrectly
191 * with AGP GART & 3ware & Cerberus.
192 */
193 wrmsrl(MSR_IA32_MCx_CTL(4), ~(1ULL << 10));
194 wrmsrl(MSR_IA32_MCx_STATUS(4), 0ULL);
195 break;
196
197 case MCEQUIRK_F10_GART:
198 if ( rdmsr_safe(MSR_AMD64_MCx_MASK(4), val) == 0 )
199 wrmsr_safe(MSR_AMD64_MCx_MASK(4), val | (1 << 10));
200 break;
201
202 default:
203 ASSERT(flags == MCEQUIRK_NONE);
204 }
205 }
206
207 static struct mcinfo_extended *cf_check
amd_f10_handler(struct mc_info * mi,uint16_t bank,uint64_t status)208 amd_f10_handler(struct mc_info *mi, uint16_t bank, uint64_t status)
209 {
210 struct mcinfo_extended *mc_ext;
211
212 /* Family 0x10 introduced additional MSR that belong to the
213 * northbridge bank (4). */
214 if ( mi == NULL || bank != 4 )
215 return NULL;
216
217 if ( !(status & MCi_STATUS_VAL) )
218 return NULL;
219
220 if ( !(status & MCi_STATUS_MISCV) )
221 return NULL;
222
223 mc_ext = x86_mcinfo_reserve(mi, sizeof(*mc_ext), MC_TYPE_EXTENDED);
224 if ( !mc_ext )
225 {
226 mi->flags |= MCINFO_FLAGS_UNCOMPLETE;
227 return NULL;
228 }
229
230 mc_ext->mc_msrs = 3;
231
232 mc_ext->mc_msr[0].reg = MSR_F10_MC4_MISC1;
233 mc_ext->mc_msr[1].reg = MSR_F10_MC4_MISC2;
234 mc_ext->mc_msr[2].reg = MSR_F10_MC4_MISC3;
235
236 mc_ext->mc_msr[0].value = mca_rdmsr(MSR_F10_MC4_MISC1);
237 mc_ext->mc_msr[1].value = mca_rdmsr(MSR_F10_MC4_MISC2);
238 mc_ext->mc_msr[2].value = mca_rdmsr(MSR_F10_MC4_MISC3);
239
240 return mc_ext;
241 }
242
amd_need_clearbank_scan(enum mca_source who,uint64_t status)243 static bool cf_check amd_need_clearbank_scan(
244 enum mca_source who, uint64_t status)
245 {
246 if ( who != MCA_MCE_SCAN )
247 return true;
248
249 /*
250 * For fatal error, it shouldn't be cleared so that sticky bank
251 * have a chance to be handled after reboot by polling.
252 */
253 if ( (status & MCi_STATUS_UC) && (status & MCi_STATUS_PCC) )
254 return false;
255
256 return true;
257 }
258
259 /* AMD specific MCA MSR */
vmce_amd_wrmsr(struct vcpu * v,uint32_t msr,uint64_t val)260 int vmce_amd_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
261 {
262 /* Do nothing as we don't emulate this MC bank currently */
263 mce_printk(MCE_VERBOSE, "MCE: wr msr %#"PRIx64"\n", val);
264 return 1;
265 }
266
vmce_amd_rdmsr(const struct vcpu * v,uint32_t msr,uint64_t * val)267 int vmce_amd_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
268 {
269 /* Assign '0' as we don't emulate this MC bank currently */
270 *val = 0;
271 return 1;
272 }
273
274 static const struct mce_callbacks __initconst_cf_clobber k8_callbacks = {
275 .handler = mcheck_cmn_handler,
276 .need_clearbank_scan = amd_need_clearbank_scan,
277 };
278
279 static const struct mce_callbacks __initconst_cf_clobber k10_callbacks = {
280 .handler = mcheck_cmn_handler,
281 .check_addr = mc_amd_addrcheck,
282 .recoverable_scan = mc_amd_recoverable_scan,
283 .need_clearbank_scan = amd_need_clearbank_scan,
284 .info_collect = amd_f10_handler,
285 };
286
287 enum mcheck_type
amd_mcheck_init(const struct cpuinfo_x86 * c,bool bsp)288 amd_mcheck_init(const struct cpuinfo_x86 *c, bool bsp)
289 {
290 uint32_t i;
291 enum mcequirk_amd_flags quirkflag = 0;
292
293 if ( c->x86_vendor != X86_VENDOR_HYGON )
294 quirkflag = mcequirk_lookup_amd_quirkdata(c);
295
296 /* Assume that machine check support is available.
297 * The minimum provided support is at least the K8. */
298 if ( bsp )
299 mce_handler_init(c->x86 == 0xf ? &k8_callbacks : &k10_callbacks);
300
301 for ( i = 0; i < this_cpu(nr_mce_banks); i++ )
302 {
303 if ( quirkflag == MCEQUIRK_K8_GART && i == 4 )
304 mcequirk_amd_apply(quirkflag);
305 else
306 {
307 /* Enable error reporting of all errors */
308 wrmsrl(MSR_IA32_MCx_CTL(i), 0xffffffffffffffffULL);
309 wrmsrl(MSR_IA32_MCx_STATUS(i), 0x0ULL);
310 }
311 }
312
313 if ( c->x86 == 0xf )
314 return mcheck_amd_k8;
315
316 if ( quirkflag == MCEQUIRK_F10_GART )
317 mcequirk_amd_apply(quirkflag);
318
319 if ( cpu_has(c, X86_FEATURE_AMD_PPIN) &&
320 (c == &boot_cpu_data || ppin_msr) )
321 {
322 uint64_t val;
323
324 rdmsrl(MSR_AMD_PPIN_CTL, val);
325
326 /* If PPIN is disabled, but not locked, try to enable. */
327 if ( !(val & (PPIN_ENABLE | PPIN_LOCKOUT)) )
328 {
329 wrmsr_safe(MSR_PPIN_CTL, val | PPIN_ENABLE);
330 rdmsrl(MSR_AMD_PPIN_CTL, val);
331 }
332
333 if ( !(val & PPIN_ENABLE) )
334 ppin_msr = 0;
335 else if ( c == &boot_cpu_data )
336 ppin_msr = MSR_AMD_PPIN;
337 }
338
339 return c->x86_vendor == X86_VENDOR_HYGON ?
340 mcheck_hygon : mcheck_amd_famXX;
341 }
342