1 #include <xen/init.h>
2 #include <xen/kernel.h>
3 #include <xen/string.h>
4 #include <xen/bitops.h>
5 #include <xen/smp.h>
6 #include <asm/processor.h>
7 #include <asm/msr.h>
8 #include <asm/uaccess.h>
9 #include <asm/mpspec.h>
10 #include <asm/apic.h>
11 #include <asm/i387.h>
12 #include <mach_apic.h>
13 #include <asm/hvm/support.h>
14 #include <asm/setup.h>
15
16 #include "cpu.h"
17
18 #define select_idle_routine(x) ((void)0)
19
20 /*
21 * Set caps in expected_levelling_cap, probe a specific masking MSR, and set
22 * caps in levelling_caps if it is found, or clobber the MSR index if missing.
23 * If preset, reads the default value into msr_val.
24 */
_probe_mask_msr(unsigned int * msr,uint64_t caps)25 static uint64_t __init _probe_mask_msr(unsigned int *msr, uint64_t caps)
26 {
27 uint64_t val = 0;
28
29 expected_levelling_cap |= caps;
30
31 if (rdmsr_safe(*msr, val) || wrmsr_safe(*msr, val))
32 *msr = 0;
33 else
34 levelling_caps |= caps;
35
36 return val;
37 }
38
39 /* Indices of the masking MSRs, or 0 if unavailable. */
40 static unsigned int __read_mostly msr_basic, __read_mostly msr_ext,
41 __read_mostly msr_xsave;
42
43 /*
44 * Probe for the existance of the expected masking MSRs. They might easily
45 * not be available if Xen is running virtualised.
46 */
probe_masking_msrs(void)47 static void __init probe_masking_msrs(void)
48 {
49 const struct cpuinfo_x86 *c = &boot_cpu_data;
50 unsigned int exp_msr_basic, exp_msr_ext, exp_msr_xsave;
51
52 /* Only family 6 supports this feature. */
53 if (c->x86 != 6)
54 return;
55
56 switch (c->x86_model) {
57 case 0x17: /* Yorkfield, Wolfdale, Penryn, Harpertown(DP) */
58 case 0x1d: /* Dunnington(MP) */
59 msr_basic = MSR_INTEL_MASK_V1_CPUID1;
60 break;
61
62 case 0x1a: /* Bloomfield, Nehalem-EP(Gainestown) */
63 case 0x1e: /* Clarksfield, Lynnfield, Jasper Forest */
64 case 0x1f: /* Something Nehalem-based - perhaps Auburndale/Havendale? */
65 case 0x25: /* Arrandale, Clarksdale */
66 case 0x2c: /* Gulftown, Westmere-EP */
67 case 0x2e: /* Nehalem-EX(Beckton) */
68 case 0x2f: /* Westmere-EX */
69 msr_basic = MSR_INTEL_MASK_V2_CPUID1;
70 msr_ext = MSR_INTEL_MASK_V2_CPUID80000001;
71 break;
72
73 case 0x2a: /* SandyBridge */
74 case 0x2d: /* SandyBridge-E, SandyBridge-EN, SandyBridge-EP */
75 msr_basic = MSR_INTEL_MASK_V3_CPUID1;
76 msr_ext = MSR_INTEL_MASK_V3_CPUID80000001;
77 msr_xsave = MSR_INTEL_MASK_V3_CPUIDD_01;
78 break;
79 }
80
81 exp_msr_basic = msr_basic;
82 exp_msr_ext = msr_ext;
83 exp_msr_xsave = msr_xsave;
84
85 if (msr_basic)
86 cpuidmask_defaults._1cd = _probe_mask_msr(&msr_basic, LCAP_1cd);
87
88 if (msr_ext)
89 cpuidmask_defaults.e1cd = _probe_mask_msr(&msr_ext, LCAP_e1cd);
90
91 if (msr_xsave)
92 cpuidmask_defaults.Da1 = _probe_mask_msr(&msr_xsave, LCAP_Da1);
93
94 /*
95 * Don't bother warning about a mismatch if virtualised. These MSRs
96 * are not architectural and almost never virtualised.
97 */
98 if ((expected_levelling_cap == levelling_caps) ||
99 cpu_has_hypervisor)
100 return;
101
102 printk(XENLOG_WARNING "Mismatch between expected (%#x) "
103 "and real (%#x) levelling caps: missing %#x\n",
104 expected_levelling_cap, levelling_caps,
105 (expected_levelling_cap ^ levelling_caps) & levelling_caps);
106 printk(XENLOG_WARNING "Fam %#x, model %#x expected (%#x/%#x/%#x), "
107 "got (%#x/%#x/%#x)\n", c->x86, c->x86_model,
108 exp_msr_basic, exp_msr_ext, exp_msr_xsave,
109 msr_basic, msr_ext, msr_xsave);
110 printk(XENLOG_WARNING
111 "If not running virtualised, please report a bug\n");
112 }
113
114 /*
115 * Context switch CPUID masking state to the next domain. Only called if
116 * CPUID Faulting isn't available, but masking MSRs have been detected. A
117 * parameter of NULL is used to context switch to the default host state (by
118 * the cpu bringup-code, crash path, etc).
119 */
intel_ctxt_switch_masking(const struct vcpu * next)120 static void intel_ctxt_switch_masking(const struct vcpu *next)
121 {
122 struct cpuidmasks *these_masks = &this_cpu(cpuidmasks);
123 const struct domain *nextd = next ? next->domain : NULL;
124 const struct cpuidmasks *masks =
125 (nextd && is_pv_domain(nextd) && nextd->arch.pv_domain.cpuidmasks)
126 ? nextd->arch.pv_domain.cpuidmasks : &cpuidmask_defaults;
127
128 if (msr_basic) {
129 uint64_t val = masks->_1cd;
130
131 /*
132 * OSXSAVE defaults to 1, which causes fast-forwarding of
133 * Xen's real setting. Clobber it if disabled by the guest
134 * kernel.
135 */
136 if (next && is_pv_vcpu(next) && !is_idle_vcpu(next) &&
137 !(next->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE))
138 val &= ~(uint64_t)cpufeat_mask(X86_FEATURE_OSXSAVE);
139
140 if (unlikely(these_masks->_1cd != val)) {
141 wrmsrl(msr_basic, val);
142 these_masks->_1cd = val;
143 }
144 }
145
146 #define LAZY(msr, field) \
147 ({ \
148 if (unlikely(these_masks->field != masks->field) && \
149 (msr)) \
150 { \
151 wrmsrl((msr), masks->field); \
152 these_masks->field = masks->field; \
153 } \
154 })
155
156 LAZY(msr_ext, e1cd);
157 LAZY(msr_xsave, Da1);
158
159 #undef LAZY
160 }
161
162 /*
163 * opt_cpuid_mask_ecx/edx: cpuid.1[ecx, edx] feature mask.
164 * For example, E8400[Intel Core 2 Duo Processor series] ecx = 0x0008E3FD,
165 * edx = 0xBFEBFBFF when executing CPUID.EAX = 1 normally. If you want to
166 * 'rev down' to E8400, you can set these values in these Xen boot parameters.
167 */
intel_init_levelling(void)168 static void __init noinline intel_init_levelling(void)
169 {
170 if (probe_cpuid_faulting())
171 return;
172
173 probe_masking_msrs();
174
175 if (msr_basic) {
176 uint32_t ecx, edx, tmp;
177
178 cpuid(0x00000001, &tmp, &tmp, &ecx, &edx);
179
180 ecx &= opt_cpuid_mask_ecx;
181 edx &= opt_cpuid_mask_edx;
182
183 /* Fast-forward bits - Must be set. */
184 if (ecx & cpufeat_mask(X86_FEATURE_XSAVE))
185 ecx |= cpufeat_mask(X86_FEATURE_OSXSAVE);
186 edx |= cpufeat_mask(X86_FEATURE_APIC);
187
188 cpuidmask_defaults._1cd &= ((u64)edx << 32) | ecx;
189 }
190
191 if (msr_ext) {
192 uint32_t ecx, edx, tmp;
193
194 cpuid(0x80000001, &tmp, &tmp, &ecx, &edx);
195
196 ecx &= opt_cpuid_mask_ext_ecx;
197 edx &= opt_cpuid_mask_ext_edx;
198
199 cpuidmask_defaults.e1cd &= ((u64)edx << 32) | ecx;
200 }
201
202 if (msr_xsave) {
203 uint32_t eax, tmp;
204
205 cpuid_count(0x0000000d, 1, &eax, &tmp, &tmp, &tmp);
206
207 eax &= opt_cpuid_mask_xsave_eax;
208
209 cpuidmask_defaults.Da1 &= (~0ULL << 32) | eax;
210 }
211
212 if (opt_cpu_info) {
213 printk(XENLOG_INFO "Levelling caps: %#x\n", levelling_caps);
214
215 if (!cpu_has_cpuid_faulting)
216 printk(XENLOG_INFO
217 "MSR defaults: 1d 0x%08x, 1c 0x%08x, e1d 0x%08x, "
218 "e1c 0x%08x, Da1 0x%08x\n",
219 (uint32_t)(cpuidmask_defaults._1cd >> 32),
220 (uint32_t)cpuidmask_defaults._1cd,
221 (uint32_t)(cpuidmask_defaults.e1cd >> 32),
222 (uint32_t)cpuidmask_defaults.e1cd,
223 (uint32_t)cpuidmask_defaults.Da1);
224 }
225
226 if (levelling_caps)
227 ctxt_switch_masking = intel_ctxt_switch_masking;
228 }
229
early_init_intel(struct cpuinfo_x86 * c)230 static void early_init_intel(struct cpuinfo_x86 *c)
231 {
232 /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
233 if (c->x86 == 15 && c->x86_cache_alignment == 64)
234 c->x86_cache_alignment = 128;
235
236 /* Unmask CPUID levels and NX if masked: */
237 if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
238 u64 misc_enable, disable;
239
240 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
241
242 disable = misc_enable & (MSR_IA32_MISC_ENABLE_LIMIT_CPUID |
243 MSR_IA32_MISC_ENABLE_XD_DISABLE);
244 if (disable) {
245 wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & ~disable);
246 bootsym(trampoline_misc_enable_off) |= disable;
247 }
248
249 if (disable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID)
250 printk(KERN_INFO "revised cpuid level: %d\n",
251 cpuid_eax(0));
252 if (disable & MSR_IA32_MISC_ENABLE_XD_DISABLE) {
253 write_efer(read_efer() | EFER_NX);
254 printk(KERN_INFO
255 "re-enabled NX (Execute Disable) protection\n");
256 }
257 }
258
259 /* CPUID workaround for Intel 0F33/0F34 CPU */
260 if (boot_cpu_data.x86 == 0xF && boot_cpu_data.x86_model == 3 &&
261 (boot_cpu_data.x86_mask == 3 || boot_cpu_data.x86_mask == 4))
262 paddr_bits = 36;
263
264 if (c == &boot_cpu_data)
265 intel_init_levelling();
266
267 ctxt_switch_levelling(NULL);
268 }
269
270 /*
271 * P4 Xeon errata 037 workaround.
272 * Hardware prefetcher may cause stale data to be loaded into the cache.
273 *
274 * Xeon 7400 erratum AAI65 (and further newer Xeons)
275 * MONITOR/MWAIT may have excessive false wakeups
276 */
Intel_errata_workarounds(struct cpuinfo_x86 * c)277 static void Intel_errata_workarounds(struct cpuinfo_x86 *c)
278 {
279 unsigned long lo, hi;
280
281 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
282 rdmsr (MSR_IA32_MISC_ENABLE, lo, hi);
283 if ((lo & (1<<9)) == 0) {
284 printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
285 printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
286 lo |= (1<<9); /* Disable hw prefetching */
287 wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
288 }
289 }
290
291 if (c->x86 == 6 && cpu_has_clflush &&
292 (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
293 __set_bit(X86_FEATURE_CLFLUSH_MONITOR, c->x86_capability);
294 }
295
296
297 /*
298 * find out the number of processor cores on the die
299 */
num_cpu_cores(struct cpuinfo_x86 * c)300 static int num_cpu_cores(struct cpuinfo_x86 *c)
301 {
302 unsigned int eax, ebx, ecx, edx;
303
304 if (c->cpuid_level < 4)
305 return 1;
306
307 /* Intel has a non-standard dependency on %ecx for this CPUID level. */
308 cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
309 if (eax & 0x1f)
310 return ((eax >> 26) + 1);
311 else
312 return 1;
313 }
314
init_intel(struct cpuinfo_x86 * c)315 static void init_intel(struct cpuinfo_x86 *c)
316 {
317 unsigned int l2 = 0;
318
319 /* Detect the extended topology information if available */
320 detect_extended_topology(c);
321
322 select_idle_routine(c);
323 l2 = init_intel_cacheinfo(c);
324 if (c->cpuid_level > 9) {
325 unsigned eax = cpuid_eax(10);
326 /* Check for version and the number of counters */
327 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
328 __set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
329 }
330
331 if ( !cpu_has(c, X86_FEATURE_XTOPOLOGY) )
332 {
333 c->x86_max_cores = num_cpu_cores(c);
334 detect_ht(c);
335 }
336
337 /* Work around errata */
338 Intel_errata_workarounds(c);
339
340 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
341 (c->x86 == 0x6 && c->x86_model >= 0x0e))
342 __set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
343 if (cpu_has(c, X86_FEATURE_ITSC)) {
344 __set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
345 __set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
346 __set_bit(X86_FEATURE_TSC_RELIABLE, c->x86_capability);
347 }
348 if ( opt_arat &&
349 ( c->cpuid_level >= 0x00000006 ) &&
350 ( cpuid_eax(0x00000006) & (1u<<2) ) )
351 __set_bit(X86_FEATURE_ARAT, c->x86_capability);
352 }
353
354 static const struct cpu_dev intel_cpu_dev = {
355 .c_vendor = "Intel",
356 .c_ident = { "GenuineIntel" },
357 .c_early_init = early_init_intel,
358 .c_init = init_intel,
359 };
360
intel_cpu_init(void)361 int __init intel_cpu_init(void)
362 {
363 cpu_devs[X86_VENDOR_INTEL] = &intel_cpu_dev;
364 return 0;
365 }
366
367 // arch_initcall(intel_cpu_init);
368
369