1 /*
2  * MCA implementation for AMD CPUs
3  * Copyright (c) 2007 Advanced Micro Devices, Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 
20 /* K8 common MCA documentation published at
21  *
22  * AMD64 Architecture Programmer's Manual Volume 2:
23  * System Programming
24  * Publication # 24593 Revision: 3.12
25  * Issue Date: September 2006
26  *
27  * URL:
28  * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/24593.pdf
29  */
30 
31 /* The related documentation for K8 Revisions A - E is:
32  *
33  * BIOS and Kernel Developer's Guide for
34  * AMD Athlon 64 and AMD Opteron Processors
35  * Publication # 26094 Revision: 3.30
36  * Issue Date: February 2006
37  *
38  * URL:
39  * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/26094.PDF
40  */
41 
42 /* The related documentation for K8 Revisions F - G is:
43  *
44  * BIOS and Kernel Developer's Guide for
45  * AMD NPT Family 0Fh Processors
46  * Publication # 32559 Revision: 3.04
47  * Issue Date: December 2006
48  *
49  * URL:
50  * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/32559.pdf
51  */
52 
53 #include <xen/init.h>
54 #include <xen/types.h>
55 #include <xen/kernel.h>
56 #include <xen/smp.h>
57 #include <xen/timer.h>
58 #include <xen/event.h>
59 
60 #include <asm/processor.h>
61 #include <asm/system.h>
62 #include <asm/msr.h>
63 
64 #include "mce.h"
65 #include "vmce.h"
66 
67 static struct timer mce_timer;
68 
69 #define MCE_PERIOD MILLISECS(10000)
70 #define MCE_MIN    MILLISECS(2000)
71 #define MCE_MAX    MILLISECS(30000)
72 
73 static s_time_t period = MCE_PERIOD;
74 static int hw_threshold = 0;
75 static int adjust = 0;
76 static int variable_period = 1;
77 
78 /* The polling service routine:
79  * Collects information of correctable errors and notifies
80  * Dom0 via an event.
81  */
mce_amd_checkregs(void * info)82 static void mce_amd_checkregs(void *info)
83 {
84 	mctelem_cookie_t mctc;
85 	struct mca_summary bs;
86 
87 	mctc = mcheck_mca_logout(MCA_POLLER, mca_allbanks, &bs, NULL);
88 
89 	if (bs.errcnt && mctc != NULL) {
90 		static uint64_t dumpcount = 0;
91 
92 		/* If Dom0 enabled the VIRQ_MCA event, then notify it.
93 		 * Otherwise, if dom0 has had plenty of time to register
94 		 * the virq handler but still hasn't then dump telemetry
95 		 * to the Xen console.  The call count may be incremented
96 		 * on multiple cpus at once and is indicative only - just
97 		 * a simple-minded attempt to avoid spamming the console
98 		 * for corrected errors in early startup. */
99 
100 		if (dom0_vmce_enabled()) {
101 			mctelem_commit(mctc);
102 			send_global_virq(VIRQ_MCA);
103 		} else if (++dumpcount >= 10) {
104 			x86_mcinfo_dump((struct mc_info *)mctelem_dataptr(mctc));
105 			mctelem_dismiss(mctc);
106 		} else {
107 			mctelem_dismiss(mctc);
108 		}
109 
110 	} else if (mctc != NULL) {
111 		mctelem_dismiss(mctc);
112 	}
113 
114 	/* adjust is global and all cpus may attempt to increment it without
115 	 * synchronisation, so they race and the final adjust count
116 	 * (number of cpus seeing any error) is approximate.  We can
117 	 * guarantee that if any cpu observes an error that the
118 	 * adjust count is at least 1. */
119 	if (bs.errcnt)
120 		adjust++;
121 }
122 
123 /* polling service routine invoker:
124  * Adjust poll frequency at runtime. No error means slow polling frequency,
125  * an error means higher polling frequency.
126  * It uses hw threshold register introduced in AMD K8 RevF to detect
127  * multiple correctable errors between two polls. In that case,
128  * increase polling frequency higher than normal.
129  */
mce_amd_work_fn(void * data)130 static void mce_amd_work_fn(void *data)
131 {
132 	on_each_cpu(mce_amd_checkregs, data, 1);
133 
134 	if (adjust > 0) {
135 		if (!dom0_vmce_enabled()) {
136 			/* Dom0 did not enable VIRQ_MCA, so Xen is reporting. */
137 			printk("MCE: polling routine found correctable error. "
138 				" Use mcelog to parse above error output.\n");
139 		}
140 	}
141 
142 	if (hw_threshold) {
143 		uint64_t value;
144 		uint32_t counter;
145 
146 		value = mca_rdmsr(MSR_IA32_MCx_MISC(4));
147 		/* Only the error counter field is of interest
148 		 * Bit field is described in AMD K8 BKDG chapter 6.4.5.5
149 		 */
150 		counter = (value & 0xFFF00000000ULL) >> 32U;
151 
152 		/* HW does not count *all* kinds of correctable errors.
153 		 * Thus it is possible, that the polling routine finds an
154 		 * correctable error even if the HW reports nothing. */
155 		if (counter > 0) {
156 			/* HW reported correctable errors,
157 			 * the polling routine did not find...
158 			 */
159 			if (adjust == 0) {
160 				printk("CPU counter reports %"PRIu32
161 					" correctable hardware error%s that %s"
162 					" not reported by the status MSRs\n",
163 					counter,
164 					(counter == 1 ? "" : "s"),
165 					(counter == 1 ? "was" : "were"));
166 			}
167 			/* subtract 1 to not double count the error
168 			 * from the polling service routine */
169 			adjust += (counter - 1);
170 
171 			/* Restart counter */
172 			/* No interrupt, reset counter value */
173 			value &= ~(0x60FFF00000000ULL);
174 			/* Counter enable */
175 			value |= (1ULL << 51);
176 			mca_wrmsr(MSR_IA32_MCx_MISC(4), value);
177 		}
178 	}
179 
180 	if (variable_period && adjust > 0) {
181 		/* Increase polling frequency */
182 		adjust++; /* adjust == 1 must have an effect */
183 		period /= adjust;
184 	} else if (variable_period) {
185 		/* Decrease polling frequency */
186 		period *= 2;
187 	}
188 	if (variable_period && period > MCE_MAX) {
189 		/* limit: Poll at least every 30s */
190 		period = MCE_MAX;
191 	}
192 	if (variable_period && period < MCE_MIN) {
193 		/* limit: Poll every 2s.
194 		 * When this is reached an uncorrectable error
195 		 * is expected to happen, if Dom0 does nothing.
196 		 */
197 		period = MCE_MIN;
198 	}
199 
200 	set_timer(&mce_timer, NOW() + period);
201 	adjust = 0;
202 }
203 
amd_nonfatal_mcheck_init(struct cpuinfo_x86 * c)204 void __init amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c)
205 {
206 	if (c->x86_vendor != X86_VENDOR_AMD)
207 		return;
208 
209 	/* Assume we are on K8 or newer AMD CPU here */
210 
211 	/* The threshold bitfields in MSR_IA32_MC4_MISC has
212 	 * been introduced along with the SVME feature bit. */
213 	if (variable_period && cpu_has(c, X86_FEATURE_SVM)) {
214 		uint64_t value;
215 
216 		/* hw threshold registers present */
217 		hw_threshold = 1;
218 		rdmsrl(MSR_IA32_MCx_MISC(4), value);
219 
220 		if (value & (1ULL << 61)) { /* Locked bit */
221 			/* Locked by BIOS. Not available for use */
222 			hw_threshold = 0;
223 		}
224 		if (!(value & (1ULL << 63))) { /* Valid bit */
225 			/* No CtrP present */
226 			hw_threshold = 0;
227 		} else {
228 			if (!(value & (1ULL << 62))) { /* Counter Bit */
229 				/* No counter field present */
230 				hw_threshold = 0;
231 			}
232 		}
233 
234 		if (hw_threshold) {
235 			/* No interrupt, reset counter value */
236 			value &= ~(0x60FFF00000000ULL);
237 			/* Counter enable */
238 			value |= (1ULL << 51);
239 			wrmsrl(MSR_IA32_MCx_MISC(4), value);
240 			printk(XENLOG_INFO "MCA: Use hw thresholding to adjust polling frequency\n");
241 		}
242 	}
243 
244 	init_timer(&mce_timer, mce_amd_work_fn, NULL, 0);
245 	set_timer(&mce_timer, NOW() + period);
246 }
247