1 /*
2 * MCA implementation for AMD CPUs
3 * Copyright (c) 2007 Advanced Micro Devices, Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; If not, see <http://www.gnu.org/licenses/>.
17 */
18
19
20 /* K8 common MCA documentation published at
21 *
22 * AMD64 Architecture Programmer's Manual Volume 2:
23 * System Programming
24 * Publication # 24593 Revision: 3.12
25 * Issue Date: September 2006
26 *
27 * URL:
28 * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/24593.pdf
29 */
30
31 /* The related documentation for K8 Revisions A - E is:
32 *
33 * BIOS and Kernel Developer's Guide for
34 * AMD Athlon 64 and AMD Opteron Processors
35 * Publication # 26094 Revision: 3.30
36 * Issue Date: February 2006
37 *
38 * URL:
39 * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/26094.PDF
40 */
41
42 /* The related documentation for K8 Revisions F - G is:
43 *
44 * BIOS and Kernel Developer's Guide for
45 * AMD NPT Family 0Fh Processors
46 * Publication # 32559 Revision: 3.04
47 * Issue Date: December 2006
48 *
49 * URL:
50 * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/32559.pdf
51 */
52
53 #include <xen/init.h>
54 #include <xen/types.h>
55 #include <xen/kernel.h>
56 #include <xen/smp.h>
57 #include <xen/timer.h>
58 #include <xen/event.h>
59
60 #include <asm/processor.h>
61 #include <asm/system.h>
62 #include <asm/msr.h>
63
64 #include "mce.h"
65 #include "vmce.h"
66
67 static struct timer mce_timer;
68
69 #define MCE_PERIOD MILLISECS(10000)
70 #define MCE_MIN MILLISECS(2000)
71 #define MCE_MAX MILLISECS(30000)
72
73 static s_time_t period = MCE_PERIOD;
74 static int hw_threshold = 0;
75 static int adjust = 0;
76 static int variable_period = 1;
77
78 /* The polling service routine:
79 * Collects information of correctable errors and notifies
80 * Dom0 via an event.
81 */
mce_amd_checkregs(void * info)82 static void mce_amd_checkregs(void *info)
83 {
84 mctelem_cookie_t mctc;
85 struct mca_summary bs;
86
87 mctc = mcheck_mca_logout(MCA_POLLER, mca_allbanks, &bs, NULL);
88
89 if (bs.errcnt && mctc != NULL) {
90 static uint64_t dumpcount = 0;
91
92 /* If Dom0 enabled the VIRQ_MCA event, then notify it.
93 * Otherwise, if dom0 has had plenty of time to register
94 * the virq handler but still hasn't then dump telemetry
95 * to the Xen console. The call count may be incremented
96 * on multiple cpus at once and is indicative only - just
97 * a simple-minded attempt to avoid spamming the console
98 * for corrected errors in early startup. */
99
100 if (dom0_vmce_enabled()) {
101 mctelem_commit(mctc);
102 send_global_virq(VIRQ_MCA);
103 } else if (++dumpcount >= 10) {
104 x86_mcinfo_dump((struct mc_info *)mctelem_dataptr(mctc));
105 mctelem_dismiss(mctc);
106 } else {
107 mctelem_dismiss(mctc);
108 }
109
110 } else if (mctc != NULL) {
111 mctelem_dismiss(mctc);
112 }
113
114 /* adjust is global and all cpus may attempt to increment it without
115 * synchronisation, so they race and the final adjust count
116 * (number of cpus seeing any error) is approximate. We can
117 * guarantee that if any cpu observes an error that the
118 * adjust count is at least 1. */
119 if (bs.errcnt)
120 adjust++;
121 }
122
123 /* polling service routine invoker:
124 * Adjust poll frequency at runtime. No error means slow polling frequency,
125 * an error means higher polling frequency.
126 * It uses hw threshold register introduced in AMD K8 RevF to detect
127 * multiple correctable errors between two polls. In that case,
128 * increase polling frequency higher than normal.
129 */
mce_amd_work_fn(void * data)130 static void mce_amd_work_fn(void *data)
131 {
132 on_each_cpu(mce_amd_checkregs, data, 1);
133
134 if (adjust > 0) {
135 if (!dom0_vmce_enabled()) {
136 /* Dom0 did not enable VIRQ_MCA, so Xen is reporting. */
137 printk("MCE: polling routine found correctable error. "
138 " Use mcelog to parse above error output.\n");
139 }
140 }
141
142 if (hw_threshold) {
143 uint64_t value;
144 uint32_t counter;
145
146 value = mca_rdmsr(MSR_IA32_MCx_MISC(4));
147 /* Only the error counter field is of interest
148 * Bit field is described in AMD K8 BKDG chapter 6.4.5.5
149 */
150 counter = (value & 0xFFF00000000ULL) >> 32U;
151
152 /* HW does not count *all* kinds of correctable errors.
153 * Thus it is possible, that the polling routine finds an
154 * correctable error even if the HW reports nothing. */
155 if (counter > 0) {
156 /* HW reported correctable errors,
157 * the polling routine did not find...
158 */
159 if (adjust == 0) {
160 printk("CPU counter reports %"PRIu32
161 " correctable hardware error%s that %s"
162 " not reported by the status MSRs\n",
163 counter,
164 (counter == 1 ? "" : "s"),
165 (counter == 1 ? "was" : "were"));
166 }
167 /* subtract 1 to not double count the error
168 * from the polling service routine */
169 adjust += (counter - 1);
170
171 /* Restart counter */
172 /* No interrupt, reset counter value */
173 value &= ~(0x60FFF00000000ULL);
174 /* Counter enable */
175 value |= (1ULL << 51);
176 mca_wrmsr(MSR_IA32_MCx_MISC(4), value);
177 }
178 }
179
180 if (variable_period && adjust > 0) {
181 /* Increase polling frequency */
182 adjust++; /* adjust == 1 must have an effect */
183 period /= adjust;
184 } else if (variable_period) {
185 /* Decrease polling frequency */
186 period *= 2;
187 }
188 if (variable_period && period > MCE_MAX) {
189 /* limit: Poll at least every 30s */
190 period = MCE_MAX;
191 }
192 if (variable_period && period < MCE_MIN) {
193 /* limit: Poll every 2s.
194 * When this is reached an uncorrectable error
195 * is expected to happen, if Dom0 does nothing.
196 */
197 period = MCE_MIN;
198 }
199
200 set_timer(&mce_timer, NOW() + period);
201 adjust = 0;
202 }
203
amd_nonfatal_mcheck_init(struct cpuinfo_x86 * c)204 void __init amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c)
205 {
206 if (c->x86_vendor != X86_VENDOR_AMD)
207 return;
208
209 /* Assume we are on K8 or newer AMD CPU here */
210
211 /* The threshold bitfields in MSR_IA32_MC4_MISC has
212 * been introduced along with the SVME feature bit. */
213 if (variable_period && cpu_has(c, X86_FEATURE_SVM)) {
214 uint64_t value;
215
216 /* hw threshold registers present */
217 hw_threshold = 1;
218 rdmsrl(MSR_IA32_MCx_MISC(4), value);
219
220 if (value & (1ULL << 61)) { /* Locked bit */
221 /* Locked by BIOS. Not available for use */
222 hw_threshold = 0;
223 }
224 if (!(value & (1ULL << 63))) { /* Valid bit */
225 /* No CtrP present */
226 hw_threshold = 0;
227 } else {
228 if (!(value & (1ULL << 62))) { /* Counter Bit */
229 /* No counter field present */
230 hw_threshold = 0;
231 }
232 }
233
234 if (hw_threshold) {
235 /* No interrupt, reset counter value */
236 value &= ~(0x60FFF00000000ULL);
237 /* Counter enable */
238 value |= (1ULL << 51);
239 wrmsrl(MSR_IA32_MCx_MISC(4), value);
240 printk(XENLOG_INFO "MCA: Use hw thresholding to adjust polling frequency\n");
241 }
242 }
243
244 init_timer(&mce_timer, mce_amd_work_fn, NULL, 0);
245 set_timer(&mce_timer, NOW() + period);
246 }
247