1 #include <xen/init.h>
2 #include <xen/types.h>
3 #include <xen/irq.h>
4 #include <xen/event.h>
5 #include <xen/kernel.h>
6 #include <xen/delay.h>
7 #include <xen/smp.h>
8 #include <xen/mm.h>
9 #include <xen/cpu.h>
10 #include <asm/processor.h>
11 #include <public/sysctl.h>
12 #include <asm/system.h>
13 #include <asm/msr.h>
14 #include <asm/p2m.h>
15 #include <asm/mce.h>
16 #include <asm/apic.h>
17 #include "mce.h"
18 #include "x86_mca.h"
19 #include "barrier.h"
20 #include "util.h"
21 #include "vmce.h"
22 #include "mcaction.h"
23
24 static DEFINE_PER_CPU_READ_MOSTLY(struct mca_banks *, mce_banks_owned);
25 bool __read_mostly cmci_support;
26 static bool __read_mostly ser_support;
27 static bool __read_mostly mce_force_broadcast;
28 boolean_param("mce_fb", mce_force_broadcast);
29
30 static int __read_mostly nr_intel_ext_msrs;
31
32 /* If mce_force_broadcast == 1, lmce_support will be disabled forcibly. */
33 bool __read_mostly lmce_support;
34
35 /* Intel SDM define bit15~bit0 of IA32_MCi_STATUS as the MC error code */
36 #define INTEL_MCCOD_MASK 0xFFFF
37
38 /*
39 * Currently Intel SDM define 2 kinds of srao errors:
40 * 1). Memory scrubbing error, error code = 0xC0 ~ 0xCF
41 * 2). L3 explicit writeback error, error code = 0x17A
42 */
43 #define INTEL_SRAO_MEM_SCRUB 0xC0 ... 0xCF
44 #define INTEL_SRAO_L3_EWB 0x17A
45
46 /*
47 * Currently Intel SDM define 2 kinds of srar errors:
48 * 1). Data Load error, error code = 0x134
49 * 2). Instruction Fetch error, error code = 0x150
50 */
51 #define INTEL_SRAR_DATA_LOAD 0x134
52 #define INTEL_SRAR_INSTR_FETCH 0x150
53
54 #ifdef CONFIG_X86_MCE_THERMAL
55 #define MCE_RING 0x1
56 static DEFINE_PER_CPU(int, last_state);
57
intel_thermal_interrupt(struct cpu_user_regs * regs)58 static void intel_thermal_interrupt(struct cpu_user_regs *regs)
59 {
60 uint64_t msr_content;
61 unsigned int cpu = smp_processor_id();
62 static DEFINE_PER_CPU(s_time_t, next);
63 int *this_last_state;
64
65 ack_APIC_irq();
66
67 if ( NOW() < per_cpu(next, cpu) )
68 return;
69
70 per_cpu(next, cpu) = NOW() + MILLISECS(5000);
71 rdmsrl(MSR_IA32_THERM_STATUS, msr_content);
72 this_last_state = &per_cpu(last_state, cpu);
73 if ( *this_last_state == (msr_content & MCE_RING) )
74 return;
75 *this_last_state = msr_content & MCE_RING;
76 if ( msr_content & MCE_RING )
77 {
78 printk(KERN_EMERG "CPU%u: Temperature above threshold\n", cpu);
79 printk(KERN_EMERG "CPU%u: Running in modulated clock mode\n", cpu);
80 add_taint(TAINT_MACHINE_CHECK);
81 } else
82 printk(KERN_INFO "CPU%u: Temperature/speed normal\n", cpu);
83 }
84
85 /* Thermal monitoring depends on APIC, ACPI and clock modulation */
intel_thermal_supported(struct cpuinfo_x86 * c)86 static bool intel_thermal_supported(struct cpuinfo_x86 *c)
87 {
88 if ( !cpu_has_apic )
89 return false;
90 if ( !cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_TM1) )
91 return false;
92 return true;
93 }
94
95 static u32 __read_mostly lvtthmr_init;
96
mcheck_intel_therm_init(void)97 static void __init mcheck_intel_therm_init(void)
98 {
99 /*
100 * This function is only called on boot CPU. Save the init thermal
101 * LVT value on BSP and use that value to restore APs' thermal LVT
102 * entry BIOS programmed later
103 */
104 if ( intel_thermal_supported(&boot_cpu_data) )
105 lvtthmr_init = apic_read(APIC_LVTTHMR);
106 }
107
108 /* P4/Xeon Thermal regulation detect and init */
intel_init_thermal(struct cpuinfo_x86 * c)109 static void intel_init_thermal(struct cpuinfo_x86 *c)
110 {
111 uint64_t msr_content;
112 uint32_t val;
113 int tm2 = 0;
114 unsigned int cpu = smp_processor_id();
115 static uint8_t thermal_apic_vector;
116
117 if ( !intel_thermal_supported(c) )
118 return; /* -ENODEV */
119
120 /* first check if its enabled already, in which case there might
121 * be some SMM goo which handles it, so we can't even put a handler
122 * since it might be delivered via SMI already -zwanem.
123 */
124 rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
125 val = lvtthmr_init;
126 /*
127 * The initial value of thermal LVT entries on all APs always reads
128 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
129 * sequence to them and LVT registers are reset to 0s except for
130 * the mask bits which are set to 1s when APs receive INIT IPI.
131 * If BIOS takes over the thermal interrupt and sets its interrupt
132 * delivery mode to SMI (not fixed), it restores the value that the
133 * BIOS has programmed on AP based on BSP's info we saved (since BIOS
134 * is required to set the same value for all threads/cores).
135 */
136 if ( (val & APIC_MODE_MASK) != APIC_DM_FIXED
137 || (val & APIC_VECTOR_MASK) > 0xf )
138 apic_write(APIC_LVTTHMR, val);
139
140 if ( (msr_content & (1ULL<<3))
141 && (val & APIC_MODE_MASK) == APIC_DM_SMI )
142 {
143 if ( c == &boot_cpu_data )
144 printk(KERN_DEBUG "Thermal monitoring handled by SMI\n");
145 return; /* -EBUSY */
146 }
147
148 if ( cpu_has(c, X86_FEATURE_TM2) && (msr_content & (1ULL << 13)) )
149 tm2 = 1;
150
151 /* check whether a vector already exists, temporarily masked? */
152 if ( val & APIC_VECTOR_MASK )
153 {
154 if ( c == &boot_cpu_data )
155 printk(KERN_DEBUG "Thermal LVT vector (%#x) already installed\n",
156 val & APIC_VECTOR_MASK);
157 return; /* -EBUSY */
158 }
159
160 alloc_direct_apic_vector(&thermal_apic_vector, intel_thermal_interrupt);
161
162 /* The temperature transition interrupt handler setup */
163 val = thermal_apic_vector; /* our delivery vector */
164 val |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
165 apic_write(APIC_LVTTHMR, val);
166
167 rdmsrl(MSR_IA32_THERM_INTERRUPT, msr_content);
168 wrmsrl(MSR_IA32_THERM_INTERRUPT, msr_content | 0x03);
169
170 rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
171 wrmsrl(MSR_IA32_MISC_ENABLE, msr_content | (1ULL<<3));
172
173 apic_write(APIC_LVTTHMR, val & ~APIC_LVT_MASKED);
174 if ( opt_cpu_info )
175 printk(KERN_INFO "CPU%u: Thermal monitoring enabled (%s)\n",
176 cpu, tm2 ? "TM2" : "TM1");
177 return;
178 }
179 #endif /* CONFIG_X86_MCE_THERMAL */
180
181 /* Intel MCE handler */
intel_get_extended_msr(struct mcinfo_extended * ext,u32 msr)182 static inline void intel_get_extended_msr(struct mcinfo_extended *ext, u32 msr)
183 {
184 if ( ext->mc_msrs < ARRAY_SIZE(ext->mc_msr)
185 && msr < MSR_IA32_MCG_EAX + nr_intel_ext_msrs )
186 {
187 ext->mc_msr[ext->mc_msrs].reg = msr;
188 rdmsrl(msr, ext->mc_msr[ext->mc_msrs].value);
189 ++ext->mc_msrs;
190 }
191 }
192
193
194 struct mcinfo_extended *
intel_get_extended_msrs(struct mcinfo_global * mig,struct mc_info * mi)195 intel_get_extended_msrs(struct mcinfo_global *mig, struct mc_info *mi)
196 {
197 struct mcinfo_extended *mc_ext;
198 int i;
199
200 /*
201 * According to spec, processor _support_ 64 bit will always
202 * have MSR beyond IA32_MCG_MISC
203 */
204 if ( !mi|| !mig || nr_intel_ext_msrs == 0 ||
205 !(mig->mc_gstatus & MCG_STATUS_EIPV) )
206 return NULL;
207
208 mc_ext = x86_mcinfo_reserve(mi, sizeof(*mc_ext), MC_TYPE_EXTENDED);
209 if ( !mc_ext )
210 {
211 mi->flags |= MCINFO_FLAGS_UNCOMPLETE;
212 return NULL;
213 }
214
215 for ( i = MSR_IA32_MCG_EAX; i <= MSR_IA32_MCG_MISC; i++ )
216 intel_get_extended_msr(mc_ext, i);
217
218 for ( i = MSR_IA32_MCG_R8; i <= MSR_IA32_MCG_R15; i++ )
219 intel_get_extended_msr(mc_ext, i);
220
221 return mc_ext;
222 }
223
224 enum intel_mce_type
225 {
226 intel_mce_invalid,
227 intel_mce_fatal,
228 intel_mce_corrected,
229 intel_mce_ucr_ucna,
230 intel_mce_ucr_srao,
231 intel_mce_ucr_srar,
232 };
233
intel_check_mce_type(uint64_t status)234 static enum intel_mce_type intel_check_mce_type(uint64_t status)
235 {
236 if ( !(status & MCi_STATUS_VAL) )
237 return intel_mce_invalid;
238
239 if ( status & MCi_STATUS_PCC )
240 return intel_mce_fatal;
241
242 /* Corrected error? */
243 if ( !(status & MCi_STATUS_UC) )
244 return intel_mce_corrected;
245
246 if ( !ser_support )
247 return intel_mce_fatal;
248
249 if ( status & MCi_STATUS_S )
250 {
251 if ( status & MCi_STATUS_AR )
252 {
253 if ( status & MCi_STATUS_OVER )
254 return intel_mce_fatal;
255 else
256 return intel_mce_ucr_srar;
257 } else
258 return intel_mce_ucr_srao;
259 }
260 else
261 return intel_mce_ucr_ucna;
262
263 /* Any type not included abovoe ? */
264 return intel_mce_fatal;
265 }
266
intel_memerr_dhandler(struct mca_binfo * binfo,enum mce_result * result,const struct cpu_user_regs * regs)267 static void intel_memerr_dhandler(
268 struct mca_binfo *binfo,
269 enum mce_result *result,
270 const struct cpu_user_regs *regs)
271 {
272 mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n");
273 mc_memerr_dhandler(binfo, result, regs);
274 }
275
intel_srar_check(uint64_t status)276 static bool intel_srar_check(uint64_t status)
277 {
278 return (intel_check_mce_type(status) == intel_mce_ucr_srar);
279 }
280
intel_checkaddr(uint64_t status,uint64_t misc,int addrtype)281 static bool intel_checkaddr(uint64_t status, uint64_t misc, int addrtype)
282 {
283 if ( !(status & MCi_STATUS_ADDRV) ||
284 !(status & MCi_STATUS_MISCV) ||
285 ((misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) )
286 /* addr is virtual */
287 return (addrtype == MC_ADDR_VIRTUAL);
288
289 return (addrtype == MC_ADDR_PHYSICAL);
290 }
291
intel_srar_dhandler(struct mca_binfo * binfo,enum mce_result * result,const struct cpu_user_regs * regs)292 static void intel_srar_dhandler(
293 struct mca_binfo *binfo,
294 enum mce_result *result,
295 const struct cpu_user_regs *regs)
296 {
297 uint64_t status = binfo->mib->mc_status;
298
299 /* For unknown srar error code, reset system */
300 *result = MCER_RESET;
301
302 switch ( status & INTEL_MCCOD_MASK )
303 {
304 case INTEL_SRAR_DATA_LOAD:
305 case INTEL_SRAR_INSTR_FETCH:
306 intel_memerr_dhandler(binfo, result, regs);
307 break;
308 }
309 }
310
intel_srao_check(uint64_t status)311 static bool intel_srao_check(uint64_t status)
312 {
313 return (intel_check_mce_type(status) == intel_mce_ucr_srao);
314 }
315
intel_srao_dhandler(struct mca_binfo * binfo,enum mce_result * result,const struct cpu_user_regs * regs)316 static void intel_srao_dhandler(
317 struct mca_binfo *binfo,
318 enum mce_result *result,
319 const struct cpu_user_regs *regs)
320 {
321 uint64_t status = binfo->mib->mc_status;
322
323 /* For unknown srao error code, no action required */
324 *result = MCER_CONTINUE;
325
326 if ( status & MCi_STATUS_VAL )
327 {
328 switch ( status & INTEL_MCCOD_MASK )
329 {
330 case INTEL_SRAO_MEM_SCRUB:
331 case INTEL_SRAO_L3_EWB:
332 intel_memerr_dhandler(binfo, result, regs);
333 break;
334 }
335 }
336 }
337
intel_default_check(uint64_t status)338 static bool intel_default_check(uint64_t status)
339 {
340 return true;
341 }
342
intel_default_mce_dhandler(struct mca_binfo * binfo,enum mce_result * result,const struct cpu_user_regs * regs)343 static void intel_default_mce_dhandler(
344 struct mca_binfo *binfo,
345 enum mce_result *result,
346 const struct cpu_user_regs * regs)
347 {
348 uint64_t status = binfo->mib->mc_status;
349 enum intel_mce_type type;
350
351 type = intel_check_mce_type(status);
352
353 if ( type == intel_mce_fatal )
354 *result = MCER_RESET;
355 else
356 *result = MCER_CONTINUE;
357 }
358
359 static const struct mca_error_handler intel_mce_dhandlers[] = {
360 {intel_srao_check, intel_srao_dhandler},
361 {intel_srar_check, intel_srar_dhandler},
362 {intel_default_check, intel_default_mce_dhandler}
363 };
364
intel_default_mce_uhandler(struct mca_binfo * binfo,enum mce_result * result,const struct cpu_user_regs * regs)365 static void intel_default_mce_uhandler(
366 struct mca_binfo *binfo,
367 enum mce_result *result,
368 const struct cpu_user_regs *regs)
369 {
370 uint64_t status = binfo->mib->mc_status;
371 enum intel_mce_type type;
372
373 type = intel_check_mce_type(status);
374
375 switch ( type )
376 {
377 case intel_mce_fatal:
378 *result = MCER_RESET;
379 break;
380
381 default:
382 *result = MCER_CONTINUE;
383 break;
384 }
385 }
386
387 static const struct mca_error_handler intel_mce_uhandlers[] = {
388 {intel_default_check, intel_default_mce_uhandler}
389 };
390
391 /* According to MCA OS writer guide, CMCI handler need to clear bank when
392 * 1) CE (UC = 0)
393 * 2) ser_support = 1, Superious error, OVER = 0, EN = 0, [UC = 1]
394 * 3) ser_support = 1, UCNA, OVER = 0, S = 1, AR = 0, PCC = 0, [UC = 1, EN = 1]
395 * MCA handler need to clear bank when
396 * 1) ser_support = 1, Superious error, OVER = 0, EN = 0, UC = 1
397 * 2) ser_support = 1, SRAR, UC = 1, OVER = 0, S = 1, AR = 1, [EN = 1]
398 * 3) ser_support = 1, SRAO, UC = 1, S = 1, AR = 0, [EN = 1]
399 */
400
intel_need_clearbank_scan(enum mca_source who,u64 status)401 static bool intel_need_clearbank_scan(enum mca_source who, u64 status)
402 {
403 if ( who == MCA_CMCI_HANDLER )
404 {
405 /* CMCI need clear bank */
406 if ( !(status & MCi_STATUS_UC) )
407 return true;
408 /* Spurious need clear bank */
409 else if ( ser_support && !(status & MCi_STATUS_OVER)
410 && !(status & MCi_STATUS_EN) )
411 return true;
412 /* UCNA OVER = 0 need clear bank */
413 else if ( ser_support && !(status & MCi_STATUS_OVER)
414 && !(status & MCi_STATUS_PCC) && !(status & MCi_STATUS_S)
415 && !(status & MCi_STATUS_AR) )
416 return true;
417 /* Only Log, no clear */
418 else return false;
419 }
420 else if ( who == MCA_MCE_SCAN )
421 {
422 if ( !ser_support )
423 return false;
424 /*
425 * For fatal error, it shouldn't be cleared so that sticky bank
426 * have chance to be handled after reboot by polling
427 */
428 if ( (status & MCi_STATUS_UC) && (status & MCi_STATUS_PCC) )
429 return false;
430 /* Spurious need clear bank */
431 else if ( !(status & MCi_STATUS_OVER)
432 && (status & MCi_STATUS_UC) && !(status & MCi_STATUS_EN) )
433 return true;
434 /* SRAR OVER=0 clear bank. OVER = 1 have caused reset */
435 else if ( (status & MCi_STATUS_UC)
436 && (status & MCi_STATUS_S) && (status & MCi_STATUS_AR)
437 && !(status & MCi_STATUS_OVER) )
438 return true;
439 /* SRAO need clear bank */
440 else if ( !(status & MCi_STATUS_AR)
441 && (status & MCi_STATUS_S) && (status & MCi_STATUS_UC) )
442 return true;
443 else
444 return false;
445 }
446
447 return true;
448 }
449
450 /*
451 * MCE continues/is recoverable when
452 * 1) CE UC = 0
453 * 2) Supious ser_support = 1, OVER = 0, En = 0 [UC = 1]
454 * 3) SRAR ser_support = 1, OVER = 0, PCC = 0, S = 1, AR = 1 [UC =1, EN = 1]
455 * 4) SRAO ser_support = 1, PCC = 0, S = 1, AR = 0, EN = 1 [UC = 1]
456 * 5) UCNA ser_support = 1, OVER = 0, EN = 1, PCC = 0, S = 0, AR = 0, [UC = 1]
457 */
intel_recoverable_scan(uint64_t status)458 static bool intel_recoverable_scan(uint64_t status)
459 {
460
461 if ( !(status & MCi_STATUS_UC ) )
462 return true;
463 else if ( ser_support && !(status & MCi_STATUS_EN)
464 && !(status & MCi_STATUS_OVER) )
465 return true;
466 /* SRAR error */
467 else if ( ser_support && !(status & MCi_STATUS_OVER)
468 && !(status & MCi_STATUS_PCC) && (status & MCi_STATUS_S)
469 && (status & MCi_STATUS_AR) && (status & MCi_STATUS_EN) )
470 return true;
471 /* SRAO error */
472 else if ( ser_support && !(status & MCi_STATUS_PCC)
473 && (status & MCi_STATUS_S) && !(status & MCi_STATUS_AR)
474 && (status & MCi_STATUS_EN) )
475 return true;
476 /* UCNA error */
477 else if ( ser_support && !(status & MCi_STATUS_OVER)
478 && (status & MCi_STATUS_EN) && !(status & MCi_STATUS_PCC)
479 && !(status & MCi_STATUS_S) && !(status & MCi_STATUS_AR) )
480 return true;
481 return false;
482 }
483
484 /* CMCI */
485 static DEFINE_SPINLOCK(cmci_discover_lock);
486
487 /*
488 * Discover bank sharing using the algorithm recommended in the SDM.
489 */
do_cmci_discover(int i)490 static int do_cmci_discover(int i)
491 {
492 unsigned msr = MSR_IA32_MCx_CTL2(i);
493 u64 val;
494 unsigned int threshold, max_threshold;
495 static unsigned int cmci_threshold = 2;
496 integer_param("cmci-threshold", cmci_threshold);
497
498 rdmsrl(msr, val);
499 /* Some other CPU already owns this bank. */
500 if ( val & CMCI_EN )
501 {
502 mcabanks_clear(i, __get_cpu_var(mce_banks_owned));
503 goto out;
504 }
505
506 if ( cmci_threshold )
507 {
508 wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD_MASK);
509 rdmsrl(msr, val);
510 }
511
512 if ( !(val & CMCI_EN) )
513 {
514 /* This bank does not support CMCI. Polling timer has to handle it. */
515 mcabanks_set(i, __get_cpu_var(no_cmci_banks));
516 wrmsrl(msr, val & ~CMCI_THRESHOLD_MASK);
517 return 0;
518 }
519 max_threshold = MASK_EXTR(val, CMCI_THRESHOLD_MASK);
520 threshold = cmci_threshold;
521 if ( threshold > max_threshold )
522 {
523 mce_printk(MCE_QUIET,
524 "CMCI: threshold %#x too large for CPU%u bank %u, using %#x\n",
525 threshold, smp_processor_id(), i, max_threshold);
526 threshold = max_threshold;
527 }
528 wrmsrl(msr, (val & ~CMCI_THRESHOLD_MASK) | CMCI_EN | threshold);
529 mcabanks_set(i, __get_cpu_var(mce_banks_owned));
530 out:
531 mcabanks_clear(i, __get_cpu_var(no_cmci_banks));
532 return 1;
533 }
534
cmci_discover(void)535 static void cmci_discover(void)
536 {
537 unsigned long flags;
538 int i;
539 mctelem_cookie_t mctc;
540 struct mca_summary bs;
541
542 mce_printk(MCE_VERBOSE, "CMCI: find owner on CPU%d\n", smp_processor_id());
543
544 spin_lock_irqsave(&cmci_discover_lock, flags);
545
546 for ( i = 0; i < nr_mce_banks; i++ )
547 if ( !mcabanks_test(i, __get_cpu_var(mce_banks_owned)) )
548 do_cmci_discover(i);
549
550 spin_unlock_irqrestore(&cmci_discover_lock, flags);
551
552 /*
553 * In case CMCI happended when do owner change.
554 * If CMCI happened yet not processed immediately,
555 * MCi_status (error_count bit 38~52) is not cleared,
556 * the CMCI interrupt will never be triggered again.
557 */
558
559 mctc = mcheck_mca_logout(
560 MCA_CMCI_HANDLER, __get_cpu_var(mce_banks_owned), &bs, NULL);
561
562 if ( bs.errcnt && mctc != NULL )
563 {
564 if ( dom0_vmce_enabled() )
565 {
566 mctelem_commit(mctc);
567 send_global_virq(VIRQ_MCA);
568 }
569 else
570 {
571 x86_mcinfo_dump(mctelem_dataptr(mctc));
572 mctelem_dismiss(mctc);
573 }
574 }
575 else if ( mctc != NULL )
576 mctelem_dismiss(mctc);
577
578 mce_printk(MCE_VERBOSE, "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n",
579 smp_processor_id(),
580 *((unsigned long *)__get_cpu_var(mce_banks_owned)->bank_map),
581 *((unsigned long *)__get_cpu_var(no_cmci_banks)->bank_map));
582 }
583
584 /*
585 * Define an owner for each bank. Banks can be shared between CPUs
586 * and to avoid reporting events multiple times always set up one
587 * CPU as owner.
588 *
589 * The assignment has to be redone when CPUs go offline and
590 * any of the owners goes away. Also pollers run in parallel so we
591 * have to be careful to update the banks in a way that doesn't
592 * lose or duplicate events.
593 */
594
mce_set_owner(void)595 static void mce_set_owner(void)
596 {
597 if ( !cmci_support || !opt_mce )
598 return;
599
600 cmci_discover();
601 }
602
__cpu_mcheck_distribute_cmci(void * unused)603 static void __cpu_mcheck_distribute_cmci(void *unused)
604 {
605 cmci_discover();
606 }
607
cpu_mcheck_distribute_cmci(void)608 static void cpu_mcheck_distribute_cmci(void)
609 {
610 if ( cmci_support && opt_mce )
611 on_each_cpu(__cpu_mcheck_distribute_cmci, NULL, 0);
612 }
613
clear_cmci(void)614 static void clear_cmci(void)
615 {
616 int i;
617
618 if ( !cmci_support || !opt_mce )
619 return;
620
621 mce_printk(MCE_VERBOSE, "CMCI: clear_cmci support on CPU%d\n",
622 smp_processor_id());
623
624 for ( i = 0; i < nr_mce_banks; i++ )
625 {
626 unsigned msr = MSR_IA32_MCx_CTL2(i);
627 u64 val;
628 if ( !mcabanks_test(i, __get_cpu_var(mce_banks_owned)) )
629 continue;
630 rdmsrl(msr, val);
631 if ( val & (CMCI_EN|CMCI_THRESHOLD_MASK) )
632 wrmsrl(msr, val & ~(CMCI_EN|CMCI_THRESHOLD_MASK));
633 mcabanks_clear(i, __get_cpu_var(mce_banks_owned));
634 }
635 }
636
cpu_mcheck_disable(void)637 static void cpu_mcheck_disable(void)
638 {
639 clear_in_cr4(X86_CR4_MCE);
640
641 if ( cmci_support && opt_mce )
642 clear_cmci();
643 }
644
cmci_interrupt(struct cpu_user_regs * regs)645 static void cmci_interrupt(struct cpu_user_regs *regs)
646 {
647 mctelem_cookie_t mctc;
648 struct mca_summary bs;
649
650 ack_APIC_irq();
651
652 mctc = mcheck_mca_logout(
653 MCA_CMCI_HANDLER, __get_cpu_var(mce_banks_owned), &bs, NULL);
654
655 if ( bs.errcnt && mctc != NULL )
656 {
657 if ( dom0_vmce_enabled() )
658 {
659 mctelem_commit(mctc);
660 mce_printk(MCE_VERBOSE, "CMCI: send CMCI to DOM0 through virq\n");
661 send_global_virq(VIRQ_MCA);
662 }
663 else
664 {
665 x86_mcinfo_dump(mctelem_dataptr(mctc));
666 mctelem_dismiss(mctc);
667 }
668 }
669 else if ( mctc != NULL )
670 mctelem_dismiss(mctc);
671 }
672
intel_init_cmci(struct cpuinfo_x86 * c)673 static void intel_init_cmci(struct cpuinfo_x86 *c)
674 {
675 u32 l, apic;
676 int cpu = smp_processor_id();
677
678 if ( !mce_available(c) || !cmci_support )
679 {
680 if ( opt_cpu_info )
681 mce_printk(MCE_QUIET, "CMCI: CPU%d has no CMCI support\n", cpu);
682 return;
683 }
684
685 apic = apic_read(APIC_CMCI);
686 if ( apic & APIC_VECTOR_MASK )
687 {
688 mce_printk(MCE_QUIET, "CPU%d CMCI LVT vector (%#x) already installed\n",
689 cpu, ( apic & APIC_VECTOR_MASK ));
690 return;
691 }
692
693 alloc_direct_apic_vector(&cmci_apic_vector, cmci_interrupt);
694
695 apic = cmci_apic_vector;
696 apic |= (APIC_DM_FIXED | APIC_LVT_MASKED);
697 apic_write(APIC_CMCI, apic);
698
699 l = apic_read(APIC_CMCI);
700 apic_write(APIC_CMCI, l & ~APIC_LVT_MASKED);
701
702 mce_set_owner();
703 }
704
705 /* MCA */
706
mce_is_broadcast(struct cpuinfo_x86 * c)707 static bool mce_is_broadcast(struct cpuinfo_x86 *c)
708 {
709 if ( mce_force_broadcast )
710 return true;
711
712 /*
713 * According to Intel SDM Dec, 2009, 15.10.4.1, For processors with
714 * DisplayFamily_DisplayModel encoding of 06H_EH and above,
715 * a MCA signal is broadcast to all logical processors in the system
716 */
717 if ( c->x86_vendor == X86_VENDOR_INTEL && c->x86 == 6 &&
718 c->x86_model >= 0xe )
719 return true;
720 return false;
721 }
722
intel_enable_lmce(void)723 static bool intel_enable_lmce(void)
724 {
725 uint64_t msr_content;
726
727 /*
728 * Section "Enabling Local Machine Check" in Intel SDM Vol 3
729 * requires software must ensure the LOCK bit and LMCE_ON bit
730 * of MSR_IA32_FEATURE_CONTROL are set before setting
731 * MSR_IA32_MCG_EXT_CTL.LMCE_EN.
732 */
733
734 if ( rdmsr_safe(MSR_IA32_FEATURE_CONTROL, msr_content) )
735 return false;
736
737 if ( (msr_content & IA32_FEATURE_CONTROL_LOCK) &&
738 (msr_content & IA32_FEATURE_CONTROL_LMCE_ON) )
739 {
740 wrmsrl(MSR_IA32_MCG_EXT_CTL, MCG_EXT_CTL_LMCE_EN);
741 return true;
742 }
743
744 return false;
745 }
746
747 /* Check and init MCA */
intel_init_mca(struct cpuinfo_x86 * c)748 static void intel_init_mca(struct cpuinfo_x86 *c)
749 {
750 bool broadcast, cmci = false, ser = false, lmce = false;
751 int ext_num = 0, first;
752 uint64_t msr_content;
753
754 broadcast = mce_is_broadcast(c);
755
756 rdmsrl(MSR_IA32_MCG_CAP, msr_content);
757
758 if ( (msr_content & MCG_CMCI_P) && cpu_has_apic )
759 cmci = true;
760
761 /* Support Software Error Recovery */
762 if ( msr_content & MCG_SER_P )
763 ser = true;
764
765 if ( msr_content & MCG_EXT_P )
766 ext_num = (msr_content >> MCG_EXT_CNT) & 0xff;
767
768 first = mce_firstbank(c);
769
770 if ( !mce_force_broadcast && (msr_content & MCG_LMCE_P) )
771 lmce = intel_enable_lmce();
772
773 #define CAP(enabled, name) ((enabled) ? ", " name : "")
774 if ( smp_processor_id() == 0 )
775 {
776 dprintk(XENLOG_INFO,
777 "MCA Capability: firstbank %d, extended MCE MSR %d%s%s%s%s\n",
778 first, ext_num,
779 CAP(broadcast, "BCAST"),
780 CAP(ser, "SER"),
781 CAP(cmci, "CMCI"),
782 CAP(lmce, "LMCE"));
783
784 mce_broadcast = broadcast;
785 cmci_support = cmci;
786 ser_support = ser;
787 lmce_support = lmce;
788 nr_intel_ext_msrs = ext_num;
789 firstbank = first;
790 }
791 else if ( cmci != cmci_support || ser != ser_support ||
792 broadcast != mce_broadcast ||
793 first != firstbank || ext_num != nr_intel_ext_msrs ||
794 lmce != lmce_support )
795 dprintk(XENLOG_WARNING,
796 "CPU%u has different MCA capability "
797 "(firstbank %d, extended MCE MSR %d%s%s%s%s)"
798 " than BSP, may cause undetermined result!!!\n",
799 smp_processor_id(), first, ext_num,
800 CAP(broadcast, "BCAST"),
801 CAP(ser, "SER"),
802 CAP(cmci, "CMCI"),
803 CAP(lmce, "LMCE"));
804 #undef CAP
805 }
806
intel_mce_post_reset(void)807 static void intel_mce_post_reset(void)
808 {
809 mctelem_cookie_t mctc;
810 struct mca_summary bs;
811
812 mctc = mcheck_mca_logout(MCA_RESET, mca_allbanks, &bs, NULL);
813
814 /* in the boot up stage, print out and also log in DOM0 boot process */
815 if ( bs.errcnt && mctc != NULL )
816 {
817 x86_mcinfo_dump(mctelem_dataptr(mctc));
818 mctelem_commit(mctc);
819 }
820 return;
821 }
822
intel_init_mce(void)823 static void intel_init_mce(void)
824 {
825 uint64_t msr_content;
826 int i;
827
828 intel_mce_post_reset();
829
830 /* clear all banks */
831 for ( i = firstbank; i < nr_mce_banks; i++ )
832 {
833 /*
834 * Some banks are shared across cores, use MCi_CTRL to judge whether
835 * this bank has been initialized by other cores already.
836 */
837 rdmsrl(MSR_IA32_MCx_CTL(i), msr_content);
838 if ( !msr_content )
839 {
840 /* if ctl is 0, this bank is never initialized */
841 mce_printk(MCE_VERBOSE, "mce_init: init bank%d\n", i);
842 wrmsrl(MSR_IA32_MCx_CTL(i), 0xffffffffffffffffULL);
843 wrmsrl(MSR_IA32_MCx_STATUS(i), 0x0ULL);
844 }
845 }
846 if ( firstbank ) /* if cmci enabled, firstbank = 0 */
847 wrmsrl(MSR_IA32_MC0_STATUS, 0x0ULL);
848
849 x86_mce_vector_register(mcheck_cmn_handler);
850 mce_recoverable_register(intel_recoverable_scan);
851 mce_need_clearbank_register(intel_need_clearbank_scan);
852 mce_register_addrcheck(intel_checkaddr);
853
854 mce_dhandlers = intel_mce_dhandlers;
855 mce_dhandler_num = ARRAY_SIZE(intel_mce_dhandlers);
856 mce_uhandlers = intel_mce_uhandlers;
857 mce_uhandler_num = ARRAY_SIZE(intel_mce_uhandlers);
858 }
859
cpu_mcabank_free(unsigned int cpu)860 static void cpu_mcabank_free(unsigned int cpu)
861 {
862 struct mca_banks *cmci = per_cpu(no_cmci_banks, cpu);
863 struct mca_banks *owned = per_cpu(mce_banks_owned, cpu);
864
865 mcabanks_free(cmci);
866 mcabanks_free(owned);
867 }
868
cpu_mcabank_alloc(unsigned int cpu)869 static int cpu_mcabank_alloc(unsigned int cpu)
870 {
871 struct mca_banks *cmci = mcabanks_alloc();
872 struct mca_banks *owned = mcabanks_alloc();
873
874 if ( !cmci || !owned )
875 goto out;
876
877 per_cpu(no_cmci_banks, cpu) = cmci;
878 per_cpu(mce_banks_owned, cpu) = owned;
879 per_cpu(last_state, cpu) = -1;
880
881 return 0;
882 out:
883 mcabanks_free(cmci);
884 mcabanks_free(owned);
885 return -ENOMEM;
886 }
887
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)888 static int cpu_callback(
889 struct notifier_block *nfb, unsigned long action, void *hcpu)
890 {
891 unsigned int cpu = (unsigned long)hcpu;
892 int rc = 0;
893
894 switch ( action )
895 {
896 case CPU_UP_PREPARE:
897 rc = cpu_mcabank_alloc(cpu);
898 break;
899
900 case CPU_DYING:
901 cpu_mcheck_disable();
902 break;
903
904 case CPU_UP_CANCELED:
905 case CPU_DEAD:
906 cpu_mcheck_distribute_cmci();
907 cpu_mcabank_free(cpu);
908 break;
909 }
910
911 return !rc ? NOTIFY_DONE : notifier_from_errno(rc);
912 }
913
914 static struct notifier_block cpu_nfb = {
915 .notifier_call = cpu_callback
916 };
917
918 /* p4/p6 family have similar MCA initialization process */
intel_mcheck_init(struct cpuinfo_x86 * c,bool bsp)919 enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c, bool bsp)
920 {
921 if ( bsp )
922 {
923 /* Early MCE initialisation for BSP. */
924 if ( cpu_mcabank_alloc(0) )
925 BUG();
926 register_cpu_notifier(&cpu_nfb);
927 mcheck_intel_therm_init();
928 }
929
930 intel_init_mca(c);
931
932 mce_handler_init();
933
934 intel_init_mce();
935
936 intel_init_cmci(c);
937 #ifdef CONFIG_X86_MCE_THERMAL
938 intel_init_thermal(c);
939 #endif
940
941 return mcheck_intel;
942 }
943
944 /* intel specific MCA MSR */
vmce_intel_wrmsr(struct vcpu * v,uint32_t msr,uint64_t val)945 int vmce_intel_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
946 {
947 unsigned int bank = msr - MSR_IA32_MC0_CTL2;
948
949 if ( bank < GUEST_MC_BANK_NUM )
950 {
951 v->arch.vmce.bank[bank].mci_ctl2 = val;
952 mce_printk(MCE_VERBOSE, "MCE: wr MC%u_CTL2 %#"PRIx64"\n", bank, val);
953 }
954
955 return 1;
956 }
957
vmce_intel_rdmsr(const struct vcpu * v,uint32_t msr,uint64_t * val)958 int vmce_intel_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
959 {
960 unsigned int bank = msr - MSR_IA32_MC0_CTL2;
961
962 if ( bank < GUEST_MC_BANK_NUM )
963 {
964 *val = v->arch.vmce.bank[bank].mci_ctl2;
965 mce_printk(MCE_VERBOSE, "MCE: rd MC%u_CTL2 %#"PRIx64"\n", bank, *val);
966 }
967
968 return 1;
969 }
970
vmce_has_lmce(const struct vcpu * v)971 bool vmce_has_lmce(const struct vcpu *v)
972 {
973 return v->arch.vmce.mcg_cap & MCG_LMCE_P;
974 }
975