1 #ifndef _MCE_H
2
3 #define _MCE_H
4
5 #include <xen/init.h>
6 #include <xen/sched.h>
7 #include <xen/smp.h>
8 #include <asm/types.h>
9 #include <asm/traps.h>
10 #include <asm/atomic.h>
11 #include <asm/percpu.h>
12
13 #include "x86_mca.h"
14 #include "mctelem.h"
15
16 #define MCE_QUIET 0
17 #define MCE_VERBOSE 1
18 /* !only for developer debug as printk is unsafe in MCE context */
19 #define MCE_CRITICAL 2
20
21 extern int mce_verbosity;
22 /*
23 * Define the default level of machine check related print.
24 * When set mce_verbosity=verbose, all mce debug information
25 * will be printed, otherwise, those information will not be
26 * printed.
27 */
28 #define mce_printk(v, s, a...) do { \
29 if ((v) <= mce_verbosity) \
30 printk(s, ##a); \
31 } while (0)
32
33 enum mcheck_type {
34 mcheck_unset = -1,
35 mcheck_none,
36 mcheck_amd_famXX,
37 mcheck_amd_k8,
38 mcheck_intel
39 };
40
41 extern uint8_t cmci_apic_vector;
42 extern bool lmce_support;
43
44 /* Init functions */
45 enum mcheck_type amd_mcheck_init(struct cpuinfo_x86 *c);
46 enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c, bool bsp);
47
48 void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c);
49
50 extern unsigned int firstbank;
51
52 struct mcinfo_extended *intel_get_extended_msrs(
53 struct mcinfo_global *mig, struct mc_info *mi);
54
55 bool mce_available(const struct cpuinfo_x86 *c);
56 unsigned int mce_firstbank(struct cpuinfo_x86 *c);
57 /* Helper functions used for collecting error telemetry */
58 void noreturn mc_panic(char *s);
59 void x86_mc_get_cpu_info(unsigned, uint32_t *, uint16_t *, uint16_t *,
60 uint32_t *, uint32_t *, uint32_t *, uint32_t *);
61
62 /* Register a handler for machine check exceptions. */
63 typedef void (*x86_mce_vector_t)(const struct cpu_user_regs *regs);
64 extern void x86_mce_vector_register(x86_mce_vector_t);
65
66 /*
67 * Common generic MCE handler that implementations may nominate
68 * via x86_mce_vector_register.
69 */
70 extern void mcheck_cmn_handler(const struct cpu_user_regs *regs);
71
72 /* Register a handler for judging whether mce is recoverable. */
73 typedef bool (*mce_recoverable_t)(uint64_t status);
74 extern void mce_recoverable_register(mce_recoverable_t);
75
76 /* Read an MSR, checking for an interposed value first */
77 extern struct intpose_ent *intpose_lookup(unsigned int, uint64_t,
78 uint64_t *);
79 extern bool intpose_inval(unsigned int, uint64_t);
80
mca_rdmsr(unsigned int msr)81 static inline uint64_t mca_rdmsr(unsigned int msr)
82 {
83 uint64_t val;
84 if (intpose_lookup(smp_processor_id(), msr, &val) == NULL)
85 rdmsrl(msr, val);
86 return val;
87 }
88
89 /* Write an MSR, invalidating any interposed value */
90 #define mca_wrmsr(msr, val) do { \
91 if ( !intpose_inval(smp_processor_id(), msr) ) \
92 wrmsrl(msr, val); \
93 } while ( 0 )
94
95
96 /*
97 * Utility function to "logout" all architectural MCA telemetry from the MCA
98 * banks of the current processor. A cookie is returned which may be
99 * uses to reference the data so logged (the cookie can be NULL if
100 * no logout structures were available). The caller can also pass a pointer
101 * to a structure which will be completed with some summary information
102 * of the MCA data observed in the logout operation.
103 */
104
105 enum mca_source {
106 MCA_POLLER,
107 MCA_CMCI_HANDLER,
108 MCA_RESET,
109 MCA_MCE_SCAN
110 };
111
112 struct mca_summary {
113 uint32_t errcnt; /* number of banks with valid errors */
114 int ripv; /* meaningful on #MC */
115 int eipv; /* meaningful on #MC */
116 bool uc; /* UC flag */
117 bool pcc; /* PCC flag */
118 bool lmce; /* LMCE flag (Intel only) */
119 bool recoverable; /* software error recoverable flag */
120 };
121
122 DECLARE_PER_CPU(struct mca_banks *, poll_bankmask);
123 DECLARE_PER_CPU(struct mca_banks *, no_cmci_banks);
124 DECLARE_PER_CPU(struct mca_banks *, mce_clear_banks);
125
126 extern bool cmci_support;
127 extern bool is_mc_panic;
128 extern bool mce_broadcast;
129 extern void mcheck_mca_clearbanks(struct mca_banks *);
130
131 extern mctelem_cookie_t mcheck_mca_logout(enum mca_source, struct mca_banks *,
132 struct mca_summary *, struct mca_banks *);
133
134 /*
135 * Register callbacks to be made during bank telemetry logout.
136 * Those callbacks are only available to those machine check handlers
137 * that call to the common mcheck_cmn_handler or who use the common
138 * telemetry logout function mcheck_mca_logout in error polling.
139 */
140
141 /* Register a handler for judging whether the bank need to be cleared */
142 typedef bool (*mce_need_clearbank_t)(enum mca_source who, u64 status);
143 extern void mce_need_clearbank_register(mce_need_clearbank_t);
144
145 /*
146 * Register a callback to collect additional information (typically non-
147 * architectural) provided by newer CPU families/models without the need
148 * to duplicate the whole handler resulting in various handlers each with
149 * its own tweaks and bugs. The callback receives an struct mc_info pointer
150 * which it can use with x86_mcinfo_reserve to add additional telemetry,
151 * the current MCA bank number we are reading telemetry from, and the
152 * MCi_STATUS value for that bank.
153 */
154 typedef struct mcinfo_extended *(*x86_mce_callback_t)
155 (struct mc_info *, uint16_t, uint64_t);
156 extern void x86_mce_callback_register(x86_mce_callback_t);
157
158 void *x86_mcinfo_reserve(struct mc_info *mi,
159 unsigned int size, unsigned int type);
160 void x86_mcinfo_dump(struct mc_info *mi);
161
mce_vendor_bank_msr(const struct vcpu * v,uint32_t msr)162 static inline int mce_vendor_bank_msr(const struct vcpu *v, uint32_t msr)
163 {
164 switch (boot_cpu_data.x86_vendor) {
165 case X86_VENDOR_INTEL:
166 if (msr >= MSR_IA32_MC0_CTL2 &&
167 msr < MSR_IA32_MCx_CTL2(v->arch.vmce.mcg_cap & MCG_CAP_COUNT) )
168 return 1;
169 break;
170
171 case X86_VENDOR_AMD:
172 switch (msr) {
173 case MSR_F10_MC4_MISC1:
174 case MSR_F10_MC4_MISC2:
175 case MSR_F10_MC4_MISC3:
176 return 1;
177 }
178 break;
179 }
180 return 0;
181 }
182
mce_bank_msr(const struct vcpu * v,uint32_t msr)183 static inline int mce_bank_msr(const struct vcpu *v, uint32_t msr)
184 {
185 if ( (msr >= MSR_IA32_MC0_CTL &&
186 msr < MSR_IA32_MCx_CTL(v->arch.vmce.mcg_cap & MCG_CAP_COUNT)) ||
187 mce_vendor_bank_msr(v, msr) )
188 return 1;
189 return 0;
190 }
191
192 /* MC softirq */
193 void mce_handler_init(void);
194
195 extern const struct mca_error_handler *mce_dhandlers;
196 extern const struct mca_error_handler *mce_uhandlers;
197 extern unsigned int mce_dhandler_num;
198 extern unsigned int mce_uhandler_num;
199
200 /* Fields are zero when not available */
201 struct mce {
202 uint64_t status;
203 uint64_t misc;
204 uint64_t addr;
205 uint64_t mcgstatus;
206 uint64_t ip;
207 uint64_t tsc; /* cpu time stamp counter */
208 uint64_t time; /* wall time_t when error was detected */
209 uint8_t cpuvendor; /* cpu vendor as encoded in system.h */
210 uint8_t inject_flags; /* software inject flags */
211 uint16_t pad;
212 uint32_t cpuid; /* CPUID 1 EAX */
213 uint8_t cs; /* code segment */
214 uint8_t bank; /* machine check bank */
215 uint8_t cpu; /* cpu number; obsolete; use extcpu now */
216 uint8_t finished; /* entry is valid */
217 uint32_t extcpu; /* linux cpu number that detected the error */
218 uint32_t socketid; /* CPU socket ID */
219 uint32_t apicid; /* CPU initial apic ID */
220 uint64_t mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
221 };
222
223 extern int apei_write_mce(struct mce *m);
224
225 #endif /* _MCE_H */
226