1 #ifndef _MCE_H
2 
3 #define _MCE_H
4 
5 #include <xen/init.h>
6 #include <xen/sched.h>
7 #include <xen/smp.h>
8 #include <asm/types.h>
9 #include <asm/traps.h>
10 #include <asm/atomic.h>
11 #include <asm/percpu.h>
12 
13 #include "x86_mca.h"
14 #include "mctelem.h"
15 
16 #define MCE_QUIET       0
17 #define MCE_VERBOSE     1
18 /* !only for developer debug as printk is unsafe in MCE context */
19 #define MCE_CRITICAL    2
20 
21 extern int mce_verbosity;
22 /*
23  * Define the default level of machine check related print.
24  * When set mce_verbosity=verbose, all mce debug information
25  * will be printed, otherwise, those information will not be
26  * printed.
27  */
28 #define mce_printk(v, s, a...) do {       \
29         if ((v) <= mce_verbosity) \
30             printk(s, ##a);       \
31         } while (0)
32 
33 enum mcheck_type {
34     mcheck_unset = -1,
35     mcheck_none,
36     mcheck_amd_famXX,
37     mcheck_amd_k8,
38     mcheck_intel
39 };
40 
41 extern uint8_t cmci_apic_vector;
42 extern bool lmce_support;
43 
44 /* Init functions */
45 enum mcheck_type amd_mcheck_init(struct cpuinfo_x86 *c);
46 enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c, bool bsp);
47 
48 void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c);
49 
50 extern unsigned int firstbank;
51 
52 struct mcinfo_extended *intel_get_extended_msrs(
53     struct mcinfo_global *mig, struct mc_info *mi);
54 
55 bool mce_available(const struct cpuinfo_x86 *c);
56 unsigned int mce_firstbank(struct cpuinfo_x86 *c);
57 /* Helper functions used for collecting error telemetry */
58 void noreturn mc_panic(char *s);
59 void x86_mc_get_cpu_info(unsigned, uint32_t *, uint16_t *, uint16_t *,
60                          uint32_t *, uint32_t *, uint32_t *, uint32_t *);
61 
62 /* Register a handler for machine check exceptions. */
63 typedef void (*x86_mce_vector_t)(const struct cpu_user_regs *regs);
64 extern void x86_mce_vector_register(x86_mce_vector_t);
65 
66 /*
67  * Common generic MCE handler that implementations may nominate
68  * via x86_mce_vector_register.
69  */
70 extern void mcheck_cmn_handler(const struct cpu_user_regs *regs);
71 
72 /* Register a handler for judging whether mce is recoverable. */
73 typedef bool (*mce_recoverable_t)(uint64_t status);
74 extern void mce_recoverable_register(mce_recoverable_t);
75 
76 /* Read an MSR, checking for an interposed value first */
77 extern struct intpose_ent *intpose_lookup(unsigned int, uint64_t,
78     uint64_t *);
79 extern bool intpose_inval(unsigned int, uint64_t);
80 
mca_rdmsr(unsigned int msr)81 static inline uint64_t mca_rdmsr(unsigned int msr)
82 {
83     uint64_t val;
84     if (intpose_lookup(smp_processor_id(), msr, &val) == NULL)
85         rdmsrl(msr, val);
86     return val;
87 }
88 
89 /* Write an MSR, invalidating any interposed value */
90 #define mca_wrmsr(msr, val) do { \
91     if ( !intpose_inval(smp_processor_id(), msr) ) \
92         wrmsrl(msr, val); \
93 } while ( 0 )
94 
95 
96 /*
97  * Utility function to "logout" all architectural MCA telemetry from the MCA
98  * banks of the current processor.  A cookie is returned which may be
99  * uses to reference the data so logged (the cookie can be NULL if
100  * no logout structures were available).  The caller can also pass a pointer
101  * to a structure which will be completed with some summary information
102  * of the MCA data observed in the logout operation.
103  */
104 
105 enum mca_source {
106     MCA_POLLER,
107     MCA_CMCI_HANDLER,
108     MCA_RESET,
109     MCA_MCE_SCAN
110 };
111 
112 struct mca_summary {
113     uint32_t    errcnt; /* number of banks with valid errors */
114     int         ripv;   /* meaningful on #MC */
115     int         eipv;   /* meaningful on #MC */
116     bool        uc;     /* UC flag */
117     bool        pcc;    /* PCC flag */
118     bool        lmce;   /* LMCE flag (Intel only) */
119     bool        recoverable; /* software error recoverable flag */
120 };
121 
122 DECLARE_PER_CPU(struct mca_banks *, poll_bankmask);
123 DECLARE_PER_CPU(struct mca_banks *, no_cmci_banks);
124 DECLARE_PER_CPU(struct mca_banks *, mce_clear_banks);
125 
126 extern bool cmci_support;
127 extern bool is_mc_panic;
128 extern bool mce_broadcast;
129 extern void mcheck_mca_clearbanks(struct mca_banks *);
130 
131 extern mctelem_cookie_t mcheck_mca_logout(enum mca_source, struct mca_banks *,
132     struct mca_summary *, struct mca_banks *);
133 
134 /*
135  * Register callbacks to be made during bank telemetry logout.
136  * Those callbacks are only available to those machine check handlers
137  * that call to the common mcheck_cmn_handler or who use the common
138  * telemetry logout function mcheck_mca_logout in error polling.
139  */
140 
141 /* Register a handler for judging whether the bank need to be cleared */
142 typedef bool (*mce_need_clearbank_t)(enum mca_source who, u64 status);
143 extern void mce_need_clearbank_register(mce_need_clearbank_t);
144 
145 /*
146  * Register a callback to collect additional information (typically non-
147  * architectural) provided by newer CPU families/models without the need
148  * to duplicate the whole handler resulting in various handlers each with
149  * its own tweaks and bugs. The callback receives an struct mc_info pointer
150  * which it can use with x86_mcinfo_reserve to add additional telemetry,
151  * the current MCA bank number we are reading telemetry from, and the
152  * MCi_STATUS value for that bank.
153  */
154 typedef struct mcinfo_extended *(*x86_mce_callback_t)
155     (struct mc_info *, uint16_t, uint64_t);
156 extern void x86_mce_callback_register(x86_mce_callback_t);
157 
158 void *x86_mcinfo_reserve(struct mc_info *mi,
159                          unsigned int size, unsigned int type);
160 void x86_mcinfo_dump(struct mc_info *mi);
161 
mce_vendor_bank_msr(const struct vcpu * v,uint32_t msr)162 static inline int mce_vendor_bank_msr(const struct vcpu *v, uint32_t msr)
163 {
164     switch (boot_cpu_data.x86_vendor) {
165     case X86_VENDOR_INTEL:
166         if (msr >= MSR_IA32_MC0_CTL2 &&
167             msr < MSR_IA32_MCx_CTL2(v->arch.vmce.mcg_cap & MCG_CAP_COUNT) )
168             return 1;
169         break;
170 
171     case X86_VENDOR_AMD:
172         switch (msr) {
173         case MSR_F10_MC4_MISC1:
174         case MSR_F10_MC4_MISC2:
175         case MSR_F10_MC4_MISC3:
176             return 1;
177         }
178         break;
179     }
180     return 0;
181 }
182 
mce_bank_msr(const struct vcpu * v,uint32_t msr)183 static inline int mce_bank_msr(const struct vcpu *v, uint32_t msr)
184 {
185     if ( (msr >= MSR_IA32_MC0_CTL &&
186          msr < MSR_IA32_MCx_CTL(v->arch.vmce.mcg_cap & MCG_CAP_COUNT)) ||
187          mce_vendor_bank_msr(v, msr) )
188         return 1;
189     return 0;
190 }
191 
192 /* MC softirq */
193 void mce_handler_init(void);
194 
195 extern const struct mca_error_handler *mce_dhandlers;
196 extern const struct mca_error_handler *mce_uhandlers;
197 extern unsigned int mce_dhandler_num;
198 extern unsigned int mce_uhandler_num;
199 
200 /* Fields are zero when not available */
201 struct mce {
202     uint64_t status;
203     uint64_t misc;
204     uint64_t addr;
205     uint64_t mcgstatus;
206     uint64_t ip;
207     uint64_t tsc;      /* cpu time stamp counter */
208     uint64_t time;     /* wall time_t when error was detected */
209     uint8_t  cpuvendor;        /* cpu vendor as encoded in system.h */
210     uint8_t  inject_flags;     /* software inject flags */
211     uint16_t pad;
212     uint32_t cpuid;    /* CPUID 1 EAX */
213     uint8_t  cs;       /* code segment */
214     uint8_t  bank;     /* machine check bank */
215     uint8_t  cpu;      /* cpu number; obsolete; use extcpu now */
216     uint8_t  finished; /* entry is valid */
217     uint32_t extcpu;   /* linux cpu number that detected the error */
218     uint32_t socketid; /* CPU socket ID */
219     uint32_t apicid;   /* CPU initial apic ID */
220     uint64_t mcgcap;   /* MCGCAP MSR: machine check capabilities of CPU */
221 };
222 
223 extern int apei_write_mce(struct mce *m);
224 
225 #endif /* _MCE_H */
226