1 /*
2  * common MCA implementation for AMD CPUs.
3  * Copyright (c) 2012-2014 Advanced Micro Devices, Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 /* K8 common MCA documentation published at
20  *
21  * AMD64 Architecture Programmer's Manual Volume 2:
22  * System Programming
23  * Publication # 24593 Revision: 3.24
24  * Issue Date: October 2013
25  *
26  * URL:
27  * http://support.amd.com/TechDocs/24593.pdf
28  */
29 
30 /* The related documentation for K8 Revisions A - E is:
31  *
32  * BIOS and Kernel Developer's Guide for
33  * AMD Athlon 64 and AMD Opteron Processors
34  * Publication # 26094 Revision: 3.30
35  * Issue Date: February 2006
36  *
37  * URL:
38  * http://support.amd.com/TechDocs/26094.PDF
39  */
40 
41 /* The related documentation for K8 Revisions F - G is:
42  *
43  * BIOS and Kernel Developer's Guide for
44  * AMD NPT Family 0Fh Processors
45  * Publication # 32559 Revision: 3.08
46  * Issue Date: July 2007
47  *
48  * URL:
49  * http://support.amd.com/TechDocs/32559.pdf
50  */
51 
52 /* Family10 MCA documentation published at
53  *
54  * BIOS and Kernel Developer's Guide
55  * For AMD Family 10h Processors
56  * Publication # 31116 Revision: 3.62
57  * Isse Date: January 11, 2013
58  *
59  * URL:
60  * http://support.amd.com/TechDocs/31116.pdf
61  */
62 
63 #include <xen/init.h>
64 #include <xen/types.h>
65 
66 #include <asm/msr.h>
67 #include <asm/processor.h>
68 
69 #include "mce.h"
70 #include "x86_mca.h"
71 #include "mce_amd.h"
72 #include "mcaction.h"
73 #include "mce_quirks.h"
74 #include "vmce.h"
75 
76 #define ANY -1
77 
78 static const struct mce_quirkdata mce_amd_quirks[] = {
79     { 0xf /* cpu family */, ANY /* all models */, ANY /* all steppings */,
80       MCEQUIRK_K8_GART },
81     { 0x10 /* cpu family */, ANY /* all models */, ANY /* all steppings */,
82       MCEQUIRK_F10_GART },
83 };
84 
85 /* Error Code Types */
86 enum mc_ec_type {
87     MC_EC_TLB_TYPE = 0x0010,
88     MC_EC_MEM_TYPE = 0x0100,
89     MC_EC_BUS_TYPE = 0x0800,
90 };
91 
92 enum mc_ec_type
mc_ec2type(uint16_t errorcode)93 mc_ec2type(uint16_t errorcode)
94 {
95     if ( errorcode & MC_EC_BUS_TYPE )
96         return MC_EC_BUS_TYPE;
97     if ( errorcode & MC_EC_MEM_TYPE )
98         return MC_EC_MEM_TYPE;
99     if ( errorcode & MC_EC_TLB_TYPE )
100         return MC_EC_TLB_TYPE;
101     /* Unreached */
102     BUG();
103     return 0;
104 }
105 
mc_amd_recoverable_scan(uint64_t status)106 bool mc_amd_recoverable_scan(uint64_t status)
107 {
108     bool ret = false;
109     enum mc_ec_type ectype;
110     uint16_t errorcode;
111 
112     if ( !(status & MCi_STATUS_UC) )
113         return true;
114 
115     errorcode = status & (MCi_STATUS_MCA | MCi_STATUS_MSEC);
116     ectype = mc_ec2type(errorcode);
117 
118     switch ( ectype )
119     {
120     case MC_EC_BUS_TYPE: /* value in addr MSR is physical */
121         /* should run cpu offline action */
122         break;
123 
124     case MC_EC_MEM_TYPE: /* value in addr MSR is physical */
125         ret = true; /* run memory page offline action */
126         break;
127 
128     case MC_EC_TLB_TYPE: /* value in addr MSR is virtual */
129         /* should run tlb flush action and retry */
130         break;
131     }
132 
133     return ret;
134 }
135 
mc_amd_addrcheck(uint64_t status,uint64_t misc,int addrtype)136 bool mc_amd_addrcheck(uint64_t status, uint64_t misc, int addrtype)
137 {
138     enum mc_ec_type ectype;
139     uint16_t errorcode;
140 
141     errorcode = status & (MCi_STATUS_MCA | MCi_STATUS_MSEC);
142     ectype = mc_ec2type(errorcode);
143 
144     switch ( ectype )
145     {
146     case MC_EC_BUS_TYPE: /* value in addr MSR is physical */
147     case MC_EC_MEM_TYPE: /* value in addr MSR is physical */
148         return (addrtype == MC_ADDR_PHYSICAL);
149 
150     case MC_EC_TLB_TYPE: /* value in addr MSR is virtual */
151         return (addrtype == MC_ADDR_VIRTUAL);
152     }
153 
154     /* unreached */
155     BUG();
156     return false;
157 }
158 
159 /* MC quirks */
160 enum mcequirk_amd_flags
mcequirk_lookup_amd_quirkdata(struct cpuinfo_x86 * c)161 mcequirk_lookup_amd_quirkdata(struct cpuinfo_x86 *c)
162 {
163     int i;
164 
165     BUG_ON(c->x86_vendor != X86_VENDOR_AMD);
166 
167     for ( i = 0; i < ARRAY_SIZE(mce_amd_quirks); i++ )
168     {
169         if ( c->x86 != mce_amd_quirks[i].cpu_family )
170             continue;
171         if ( (mce_amd_quirks[i].cpu_model != ANY) &&
172              (mce_amd_quirks[i].cpu_model != c->x86_model) )
173             continue;
174         if ( (mce_amd_quirks[i].cpu_stepping != ANY) &&
175              (mce_amd_quirks[i].cpu_stepping != c->x86_mask) )
176                 continue;
177         return mce_amd_quirks[i].quirk;
178     }
179     return 0;
180 }
181 
mcequirk_amd_apply(enum mcequirk_amd_flags flags)182 int mcequirk_amd_apply(enum mcequirk_amd_flags flags)
183 {
184     uint64_t val;
185 
186     switch ( flags )
187     {
188     case MCEQUIRK_K8_GART:
189         /*
190          * Enable error reporting for all errors except for GART
191          * TBL walk error reporting, which trips off incorrectly
192          * with AGP GART & 3ware & Cerberus.
193          */
194         wrmsrl(MSR_IA32_MCx_CTL(4), ~(1ULL << 10));
195         wrmsrl(MSR_IA32_MCx_STATUS(4), 0ULL);
196         break;
197 
198     case MCEQUIRK_F10_GART:
199         if ( rdmsr_safe(MSR_AMD64_MCx_MASK(4), val) == 0 )
200                 wrmsr_safe(MSR_AMD64_MCx_MASK(4), val | (1 << 10));
201         break;
202     }
203 
204     return 0;
205 }
206 
207 static struct mcinfo_extended *
amd_f10_handler(struct mc_info * mi,uint16_t bank,uint64_t status)208 amd_f10_handler(struct mc_info *mi, uint16_t bank, uint64_t status)
209 {
210     struct mcinfo_extended *mc_ext;
211 
212     /* Family 0x10 introduced additional MSR that belong to the
213      * northbridge bank (4). */
214     if ( mi == NULL || bank != 4 )
215         return NULL;
216 
217     if ( !(status & MCi_STATUS_VAL) )
218         return NULL;
219 
220     if ( !(status & MCi_STATUS_MISCV) )
221         return NULL;
222 
223     mc_ext = x86_mcinfo_reserve(mi, sizeof(*mc_ext), MC_TYPE_EXTENDED);
224     if ( !mc_ext )
225     {
226         mi->flags |= MCINFO_FLAGS_UNCOMPLETE;
227         return NULL;
228     }
229 
230     mc_ext->mc_msrs = 3;
231 
232     mc_ext->mc_msr[0].reg = MSR_F10_MC4_MISC1;
233     mc_ext->mc_msr[1].reg = MSR_F10_MC4_MISC2;
234     mc_ext->mc_msr[2].reg = MSR_F10_MC4_MISC3;
235 
236     mc_ext->mc_msr[0].value = mca_rdmsr(MSR_F10_MC4_MISC1);
237     mc_ext->mc_msr[1].value = mca_rdmsr(MSR_F10_MC4_MISC2);
238     mc_ext->mc_msr[2].value = mca_rdmsr(MSR_F10_MC4_MISC3);
239 
240     return mc_ext;
241 }
242 
amd_need_clearbank_scan(enum mca_source who,uint64_t status)243 static bool amd_need_clearbank_scan(enum mca_source who, uint64_t status)
244 {
245     if ( who != MCA_MCE_SCAN )
246         return true;
247 
248     /*
249      * For fatal error, it shouldn't be cleared so that sticky bank
250      * have a chance to be handled after reboot by polling.
251      */
252     if ( (status & MCi_STATUS_UC) && (status & MCi_STATUS_PCC) )
253         return false;
254 
255     return true;
256 }
257 
258 /* AMD specific MCA MSR */
vmce_amd_wrmsr(struct vcpu * v,uint32_t msr,uint64_t val)259 int vmce_amd_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
260 {
261     /* Do nothing as we don't emulate this MC bank currently */
262     mce_printk(MCE_VERBOSE, "MCE: wr msr %#"PRIx64"\n", val);
263     return 1;
264 }
265 
vmce_amd_rdmsr(const struct vcpu * v,uint32_t msr,uint64_t * val)266 int vmce_amd_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
267 {
268     /* Assign '0' as we don't emulate this MC bank currently */
269     *val = 0;
270     return 1;
271 }
272 
273 enum mcheck_type
amd_mcheck_init(struct cpuinfo_x86 * ci)274 amd_mcheck_init(struct cpuinfo_x86 *ci)
275 {
276     uint32_t i;
277     enum mcequirk_amd_flags quirkflag = mcequirk_lookup_amd_quirkdata(ci);
278 
279     /* Assume that machine check support is available.
280      * The minimum provided support is at least the K8. */
281     mce_handler_init();
282     x86_mce_vector_register(mcheck_cmn_handler);
283     mce_need_clearbank_register(amd_need_clearbank_scan);
284 
285     for ( i = 0; i < nr_mce_banks; i++ )
286     {
287         if ( quirkflag == MCEQUIRK_K8_GART && i == 4 )
288             mcequirk_amd_apply(quirkflag);
289         else
290         {
291             /* Enable error reporting of all errors */
292             wrmsrl(MSR_IA32_MCx_CTL(i), 0xffffffffffffffffULL);
293             wrmsrl(MSR_IA32_MCx_STATUS(i), 0x0ULL);
294         }
295     }
296 
297     if ( ci->x86 == 0xf )
298         return mcheck_amd_k8;
299 
300     if ( quirkflag == MCEQUIRK_F10_GART )
301         mcequirk_amd_apply(quirkflag);
302 
303     x86_mce_callback_register(amd_f10_handler);
304     mce_recoverable_register(mc_amd_recoverable_scan);
305     mce_register_addrcheck(mc_amd_addrcheck);
306 
307     return mcheck_amd_famXX;
308 }
309