1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * common MCA implementation for AMD CPUs.
4  * Copyright (c) 2012-2014 Advanced Micro Devices, Inc.
5  */
6 
7 /* K8 common MCA documentation published at
8  *
9  * AMD64 Architecture Programmer's Manual Volume 2:
10  * System Programming
11  * Publication # 24593 Revision: 3.24
12  * Issue Date: October 2013
13  *
14  * URL:
15  * http://support.amd.com/TechDocs/24593.pdf
16  */
17 
18 /* The related documentation for K8 Revisions A - E is:
19  *
20  * BIOS and Kernel Developer's Guide for
21  * AMD Athlon 64 and AMD Opteron Processors
22  * Publication # 26094 Revision: 3.30
23  * Issue Date: February 2006
24  *
25  * URL:
26  * http://support.amd.com/TechDocs/26094.PDF
27  */
28 
29 /* The related documentation for K8 Revisions F - G is:
30  *
31  * BIOS and Kernel Developer's Guide for
32  * AMD NPT Family 0Fh Processors
33  * Publication # 32559 Revision: 3.08
34  * Issue Date: July 2007
35  *
36  * URL:
37  * http://support.amd.com/TechDocs/32559.pdf
38  */
39 
40 /* Family10 MCA documentation published at
41  *
42  * BIOS and Kernel Developer's Guide
43  * For AMD Family 10h Processors
44  * Publication # 31116 Revision: 3.62
45  * Isse Date: January 11, 2013
46  *
47  * URL:
48  * http://support.amd.com/TechDocs/31116.pdf
49  */
50 
51 #include <xen/init.h>
52 #include <xen/types.h>
53 
54 #include <asm/msr.h>
55 #include <asm/processor.h>
56 
57 #include "mce.h"
58 #include "x86_mca.h"
59 #include "mce_amd.h"
60 #include "mcaction.h"
61 #include "vmce.h"
62 
63 #define ANY (~0U)
64 
65 enum mcequirk_amd_flags {
66     MCEQUIRK_NONE,
67     MCEQUIRK_K8_GART,
68     MCEQUIRK_F10_GART,
69 };
70 
71 static const struct mce_quirkdata {
72     unsigned int cpu_family;
73     unsigned int cpu_model;
74     unsigned int cpu_stepping;
75     enum mcequirk_amd_flags quirk;
76 } mce_amd_quirks[] = {
77     { 0xf /* cpu family */, ANY /* all models */, ANY /* all steppings */,
78       MCEQUIRK_K8_GART },
79     { 0x10 /* cpu family */, ANY /* all models */, ANY /* all steppings */,
80       MCEQUIRK_F10_GART },
81 };
82 
83 /* Error Code Types */
84 enum mc_ec_type {
85     MC_EC_TLB_TYPE = 0x0010,
86     MC_EC_MEM_TYPE = 0x0100,
87     MC_EC_BUS_TYPE = 0x0800,
88 };
89 
90 static enum mc_ec_type
mc_ec2type(uint16_t errorcode)91 mc_ec2type(uint16_t errorcode)
92 {
93     if ( errorcode & MC_EC_BUS_TYPE )
94         return MC_EC_BUS_TYPE;
95     if ( errorcode & MC_EC_MEM_TYPE )
96         return MC_EC_MEM_TYPE;
97     if ( errorcode & MC_EC_TLB_TYPE )
98         return MC_EC_TLB_TYPE;
99     /* Unreached */
100     BUG();
101     return 0;
102 }
103 
mc_amd_recoverable_scan(uint64_t status)104 bool cf_check mc_amd_recoverable_scan(uint64_t status)
105 {
106     bool ret = false;
107     enum mc_ec_type ectype;
108     uint16_t errorcode;
109 
110     if ( !(status & MCi_STATUS_UC) )
111         return true;
112 
113     errorcode = status & (MCi_STATUS_MCA | MCi_STATUS_MSEC);
114     ectype = mc_ec2type(errorcode);
115 
116     switch ( ectype )
117     {
118     case MC_EC_BUS_TYPE: /* value in addr MSR is physical */
119         /* should run cpu offline action */
120         break;
121 
122     case MC_EC_MEM_TYPE: /* value in addr MSR is physical */
123         ret = true; /* run memory page offline action */
124         break;
125 
126     case MC_EC_TLB_TYPE: /* value in addr MSR is virtual */
127         /* should run tlb flush action and retry */
128         break;
129     }
130 
131     return ret;
132 }
133 
mc_amd_addrcheck(uint64_t status,uint64_t misc,int addrtype)134 bool cf_check mc_amd_addrcheck(uint64_t status, uint64_t misc, int addrtype)
135 {
136     enum mc_ec_type ectype;
137     uint16_t errorcode;
138 
139     errorcode = status & (MCi_STATUS_MCA | MCi_STATUS_MSEC);
140     ectype = mc_ec2type(errorcode);
141 
142     switch ( ectype )
143     {
144     case MC_EC_BUS_TYPE: /* value in addr MSR is physical */
145     case MC_EC_MEM_TYPE: /* value in addr MSR is physical */
146         return (addrtype == MC_ADDR_PHYSICAL);
147 
148     case MC_EC_TLB_TYPE: /* value in addr MSR is virtual */
149         return (addrtype == MC_ADDR_VIRTUAL);
150     }
151 
152     /* unreached */
153     BUG();
154     return false;
155 }
156 
157 /* MC quirks */
158 static enum mcequirk_amd_flags
mcequirk_lookup_amd_quirkdata(const struct cpuinfo_x86 * c)159 mcequirk_lookup_amd_quirkdata(const struct cpuinfo_x86 *c)
160 {
161     unsigned int i;
162 
163     BUG_ON(c->x86_vendor != X86_VENDOR_AMD);
164 
165     for ( i = 0; i < ARRAY_SIZE(mce_amd_quirks); i++ )
166     {
167         if ( c->x86 != mce_amd_quirks[i].cpu_family )
168             continue;
169         if ( (mce_amd_quirks[i].cpu_model != ANY) &&
170              (mce_amd_quirks[i].cpu_model != c->x86_model) )
171             continue;
172         if ( (mce_amd_quirks[i].cpu_stepping != ANY) &&
173              (mce_amd_quirks[i].cpu_stepping != c->x86_mask) )
174                 continue;
175         return mce_amd_quirks[i].quirk;
176     }
177 
178     return MCEQUIRK_NONE;
179 }
180 
mcequirk_amd_apply(enum mcequirk_amd_flags flags)181 static void mcequirk_amd_apply(enum mcequirk_amd_flags flags)
182 {
183     uint64_t val;
184 
185     switch ( flags )
186     {
187     case MCEQUIRK_K8_GART:
188         /*
189          * Enable error reporting for all errors except for GART
190          * TBL walk error reporting, which trips off incorrectly
191          * with AGP GART & 3ware & Cerberus.
192          */
193         wrmsrl(MSR_IA32_MCx_CTL(4), ~(1ULL << 10));
194         wrmsrl(MSR_IA32_MCx_STATUS(4), 0ULL);
195         break;
196 
197     case MCEQUIRK_F10_GART:
198         if ( rdmsr_safe(MSR_AMD64_MCx_MASK(4), val) == 0 )
199             wrmsr_safe(MSR_AMD64_MCx_MASK(4), val | (1 << 10));
200         break;
201 
202     default:
203         ASSERT(flags == MCEQUIRK_NONE);
204     }
205 }
206 
207 static struct mcinfo_extended *cf_check
amd_f10_handler(struct mc_info * mi,uint16_t bank,uint64_t status)208 amd_f10_handler(struct mc_info *mi, uint16_t bank, uint64_t status)
209 {
210     struct mcinfo_extended *mc_ext;
211 
212     /* Family 0x10 introduced additional MSR that belong to the
213      * northbridge bank (4). */
214     if ( mi == NULL || bank != 4 )
215         return NULL;
216 
217     if ( !(status & MCi_STATUS_VAL) )
218         return NULL;
219 
220     if ( !(status & MCi_STATUS_MISCV) )
221         return NULL;
222 
223     mc_ext = x86_mcinfo_reserve(mi, sizeof(*mc_ext), MC_TYPE_EXTENDED);
224     if ( !mc_ext )
225     {
226         mi->flags |= MCINFO_FLAGS_UNCOMPLETE;
227         return NULL;
228     }
229 
230     mc_ext->mc_msrs = 3;
231 
232     mc_ext->mc_msr[0].reg = MSR_F10_MC4_MISC1;
233     mc_ext->mc_msr[1].reg = MSR_F10_MC4_MISC2;
234     mc_ext->mc_msr[2].reg = MSR_F10_MC4_MISC3;
235 
236     mc_ext->mc_msr[0].value = mca_rdmsr(MSR_F10_MC4_MISC1);
237     mc_ext->mc_msr[1].value = mca_rdmsr(MSR_F10_MC4_MISC2);
238     mc_ext->mc_msr[2].value = mca_rdmsr(MSR_F10_MC4_MISC3);
239 
240     return mc_ext;
241 }
242 
amd_need_clearbank_scan(enum mca_source who,uint64_t status)243 static bool cf_check amd_need_clearbank_scan(
244     enum mca_source who, uint64_t status)
245 {
246     if ( who != MCA_MCE_SCAN )
247         return true;
248 
249     /*
250      * For fatal error, it shouldn't be cleared so that sticky bank
251      * have a chance to be handled after reboot by polling.
252      */
253     if ( (status & MCi_STATUS_UC) && (status & MCi_STATUS_PCC) )
254         return false;
255 
256     return true;
257 }
258 
259 /* AMD specific MCA MSR */
vmce_amd_wrmsr(struct vcpu * v,uint32_t msr,uint64_t val)260 int vmce_amd_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
261 {
262     /* Do nothing as we don't emulate this MC bank currently */
263     mce_printk(MCE_VERBOSE, "MCE: wr msr %#"PRIx64"\n", val);
264     return 1;
265 }
266 
vmce_amd_rdmsr(const struct vcpu * v,uint32_t msr,uint64_t * val)267 int vmce_amd_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
268 {
269     /* Assign '0' as we don't emulate this MC bank currently */
270     *val = 0;
271     return 1;
272 }
273 
274 static const struct mce_callbacks __initconst_cf_clobber k8_callbacks = {
275     .handler = mcheck_cmn_handler,
276     .need_clearbank_scan = amd_need_clearbank_scan,
277 };
278 
279 static const struct mce_callbacks __initconst_cf_clobber k10_callbacks = {
280     .handler = mcheck_cmn_handler,
281     .check_addr = mc_amd_addrcheck,
282     .recoverable_scan = mc_amd_recoverable_scan,
283     .need_clearbank_scan = amd_need_clearbank_scan,
284     .info_collect = amd_f10_handler,
285 };
286 
287 enum mcheck_type
amd_mcheck_init(const struct cpuinfo_x86 * c,bool bsp)288 amd_mcheck_init(const struct cpuinfo_x86 *c, bool bsp)
289 {
290     uint32_t i;
291     enum mcequirk_amd_flags quirkflag = 0;
292 
293     if ( c->x86_vendor != X86_VENDOR_HYGON )
294         quirkflag = mcequirk_lookup_amd_quirkdata(c);
295 
296     /* Assume that machine check support is available.
297      * The minimum provided support is at least the K8. */
298     if ( bsp )
299         mce_handler_init(c->x86 == 0xf ? &k8_callbacks : &k10_callbacks);
300 
301     for ( i = 0; i < this_cpu(nr_mce_banks); i++ )
302     {
303         if ( quirkflag == MCEQUIRK_K8_GART && i == 4 )
304             mcequirk_amd_apply(quirkflag);
305         else
306         {
307             /* Enable error reporting of all errors */
308             wrmsrl(MSR_IA32_MCx_CTL(i), 0xffffffffffffffffULL);
309             wrmsrl(MSR_IA32_MCx_STATUS(i), 0x0ULL);
310         }
311     }
312 
313     if ( c->x86 == 0xf )
314         return mcheck_amd_k8;
315 
316     if ( quirkflag == MCEQUIRK_F10_GART )
317         mcequirk_amd_apply(quirkflag);
318 
319     if ( cpu_has(c, X86_FEATURE_AMD_PPIN) &&
320          (c == &boot_cpu_data || ppin_msr) )
321     {
322         uint64_t val;
323 
324         rdmsrl(MSR_AMD_PPIN_CTL, val);
325 
326         /* If PPIN is disabled, but not locked, try to enable. */
327         if ( !(val & (PPIN_ENABLE | PPIN_LOCKOUT)) )
328         {
329             wrmsr_safe(MSR_PPIN_CTL, val | PPIN_ENABLE);
330             rdmsrl(MSR_AMD_PPIN_CTL, val);
331         }
332 
333         if ( !(val & PPIN_ENABLE) )
334             ppin_msr = 0;
335         else if ( c == &boot_cpu_data )
336             ppin_msr = MSR_AMD_PPIN;
337     }
338 
339     return c->x86_vendor == X86_VENDOR_HYGON ?
340             mcheck_hygon : mcheck_amd_famXX;
341 }
342