1 #include <xen/delay.h>
2 #include <xen/init.h>
3 #include <xen/param.h>
4 #include <xen/smp.h>
5 #include <xen/string.h>
6
7 #include <asm/amd.h>
8 #include <asm/apic.h>
9 #include <asm/cpu-policy.h>
10 #include <asm/current.h>
11 #include <asm/debugreg.h>
12 #include <asm/guest-msr.h>
13 #include <asm/idt.h>
14 #include <asm/io.h>
15 #include <asm/match-cpu.h>
16 #include <asm/mpspec.h>
17 #include <asm/msr.h>
18 #include <asm/prot-key.h>
19 #include <asm/random.h>
20 #include <asm/setup.h>
21 #include <asm/shstk.h>
22 #include <asm/xstate.h>
23
24 #include <public/sysctl.h>
25
26 #include "cpu.h"
27 #include "mcheck/x86_mca.h"
28
29 bool __read_mostly opt_dom0_cpuid_faulting = true;
30
31 bool opt_arat = true;
32 boolean_param("arat", opt_arat);
33
34 unsigned int opt_cpuid_mask_ecx = ~0u;
35 integer_param("cpuid_mask_ecx", opt_cpuid_mask_ecx);
36 unsigned int opt_cpuid_mask_edx = ~0u;
37 integer_param("cpuid_mask_edx", opt_cpuid_mask_edx);
38
39 unsigned int opt_cpuid_mask_xsave_eax = ~0u;
40 integer_param("cpuid_mask_xsave_eax", opt_cpuid_mask_xsave_eax);
41
42 unsigned int opt_cpuid_mask_ext_ecx = ~0u;
43 integer_param("cpuid_mask_ext_ecx", opt_cpuid_mask_ext_ecx);
44 unsigned int opt_cpuid_mask_ext_edx = ~0u;
45 integer_param("cpuid_mask_ext_edx", opt_cpuid_mask_ext_edx);
46
47 unsigned int __initdata expected_levelling_cap;
48 unsigned int __read_mostly levelling_caps;
49
50 DEFINE_PER_CPU(struct cpuidmasks, cpuidmasks);
51 struct cpuidmasks __read_mostly cpuidmask_defaults;
52
53 unsigned int paddr_bits __read_mostly = 36;
54 unsigned int hap_paddr_bits __read_mostly = 36;
55 unsigned int vaddr_bits __read_mostly = VADDR_BITS;
56
57 static unsigned int cleared_caps[NCAPINTS];
58 static unsigned int forced_caps[NCAPINTS];
59
60 DEFINE_PER_CPU(bool, full_gdt_loaded);
61
62 DEFINE_PER_CPU(uint32_t, pkrs);
63
64 extern uint32_t clear_page_clzero_post_count[];
65 extern int8_t clear_page_clzero_post_neg_size[];
66
setup_clear_cpu_cap(unsigned int cap)67 void __init setup_clear_cpu_cap(unsigned int cap)
68 {
69 const uint32_t *dfs;
70 unsigned int i;
71
72 if (__test_and_set_bit(cap, cleared_caps))
73 return;
74
75 if (test_bit(cap, forced_caps))
76 printk("%pS clearing previously forced feature %#x\n",
77 __builtin_return_address(0), cap);
78
79 __clear_bit(cap, boot_cpu_data.x86_capability);
80 dfs = x86_cpu_policy_lookup_deep_deps(cap);
81
82 if (!dfs)
83 return;
84
85 for (i = 0; i < FSCAPINTS; ++i) {
86 cleared_caps[i] |= dfs[i];
87 boot_cpu_data.x86_capability[i] &= ~dfs[i];
88 if (!(forced_caps[i] & dfs[i]))
89 continue;
90 printk("%pS implicitly clearing previously forced feature(s) %u:%#x\n",
91 __builtin_return_address(0),
92 i, forced_caps[i] & dfs[i]);
93 }
94 }
95
setup_force_cpu_cap(unsigned int cap)96 void __init setup_force_cpu_cap(unsigned int cap)
97 {
98 if (__test_and_set_bit(cap, forced_caps))
99 return;
100
101 if (test_bit(cap, cleared_caps)) {
102 printk("%pS tries to force previously cleared feature %#x\n",
103 __builtin_return_address(0), cap);
104 return;
105 }
106
107 __set_bit(cap, boot_cpu_data.x86_capability);
108 }
109
is_forced_cpu_cap(unsigned int cap)110 bool __init is_forced_cpu_cap(unsigned int cap)
111 {
112 return test_bit(cap, forced_caps);
113 }
114
default_init(struct cpuinfo_x86 * c)115 static void cf_check default_init(struct cpuinfo_x86 * c)
116 {
117 /* Not much we can do here... */
118 __clear_bit(X86_FEATURE_SEP, c->x86_capability);
119 }
120
121 static const struct cpu_dev __initconst_cf_clobber __used default_cpu = {
122 .c_init = default_init,
123 };
124 static struct cpu_dev __ro_after_init actual_cpu;
125
126 static DEFINE_PER_CPU(uint64_t, msr_misc_features);
127 void (* __ro_after_init ctxt_switch_masking)(const struct vcpu *next);
128
probe_cpuid_faulting(void)129 bool __init probe_cpuid_faulting(void)
130 {
131 uint64_t val;
132 int rc;
133
134 if ((rc = rdmsr_safe(MSR_INTEL_PLATFORM_INFO, val)) == 0)
135 raw_cpu_policy.platform_info.cpuid_faulting =
136 val & MSR_PLATFORM_INFO_CPUID_FAULTING;
137
138 if (rc ||
139 !(val & MSR_PLATFORM_INFO_CPUID_FAULTING) ||
140 rdmsr_safe(MSR_INTEL_MISC_FEATURES_ENABLES,
141 this_cpu(msr_misc_features)))
142 {
143 setup_clear_cpu_cap(X86_FEATURE_CPUID_FAULTING);
144 return false;
145 }
146
147 setup_force_cpu_cap(X86_FEATURE_CPUID_FAULTING);
148
149 return true;
150 }
151
set_cpuid_faulting(bool enable)152 static void set_cpuid_faulting(bool enable)
153 {
154 uint64_t *this_misc_features = &this_cpu(msr_misc_features);
155 uint64_t val = *this_misc_features;
156
157 if (!!(val & MSR_MISC_FEATURES_CPUID_FAULTING) == enable)
158 return;
159
160 val ^= MSR_MISC_FEATURES_CPUID_FAULTING;
161
162 wrmsrl(MSR_INTEL_MISC_FEATURES_ENABLES, val);
163 *this_misc_features = val;
164 }
165
ctxt_switch_levelling(const struct vcpu * next)166 void ctxt_switch_levelling(const struct vcpu *next)
167 {
168 const struct domain *nextd = next ? next->domain : NULL;
169 bool enable_cpuid_faulting;
170
171 if (cpu_has_cpuid_faulting ||
172 boot_cpu_has(X86_FEATURE_CPUID_USER_DIS)) {
173 /*
174 * No need to alter the faulting setting if we are switching
175 * to idle; it won't affect any code running in idle context.
176 */
177 if (nextd && is_idle_domain(nextd))
178 return;
179 /*
180 * We *should* be enabling faulting for PV control domains.
181 *
182 * The domain builder has now been updated to not depend on
183 * seeing host CPUID values. This makes it compatible with
184 * PVH toolstack domains, and lets us enable faulting by
185 * default for all PV domains.
186 *
187 * However, as PV control domains have never had faulting
188 * enforced on them before, there might plausibly be other
189 * dependenices on host CPUID data. Therefore, we have left
190 * an interim escape hatch in the form of
191 * `dom0=no-cpuid-faulting` to restore the older behaviour.
192 */
193 enable_cpuid_faulting = nextd && (opt_dom0_cpuid_faulting ||
194 !is_control_domain(nextd) ||
195 !is_pv_domain(nextd)) &&
196 (is_pv_domain(nextd) ||
197 next->arch.msrs->
198 misc_features_enables.cpuid_faulting);
199
200 if (cpu_has_cpuid_faulting)
201 set_cpuid_faulting(enable_cpuid_faulting);
202 else
203 amd_set_cpuid_user_dis(enable_cpuid_faulting);
204
205 return;
206 }
207
208 if (ctxt_switch_masking)
209 alternative_vcall(ctxt_switch_masking, next);
210 }
211
setup_doitm(void)212 static void setup_doitm(void)
213 {
214 uint64_t msr;
215
216 if ( !cpu_has_doitm )
217 return;
218
219 /*
220 * We don't currently enumerate DOITM to guests. As a conseqeuence, guest
221 * kernels will believe they're safe even when they are not.
222 *
223 * For now, set it unilaterally. This prevents otherwise-correct crypto
224 * code from becoming vulnerable to timing sidechannels.
225 */
226
227 rdmsrl(MSR_UARCH_MISC_CTRL, msr);
228 msr |= UARCH_CTRL_DOITM;
229 if ( !opt_dit )
230 msr &= ~UARCH_CTRL_DOITM;
231 wrmsrl(MSR_UARCH_MISC_CTRL, msr);
232 }
233
234 bool opt_cpu_info;
235 boolean_param("cpuinfo", opt_cpu_info);
236
get_model_name(struct cpuinfo_x86 * c)237 int get_model_name(struct cpuinfo_x86 *c)
238 {
239 unsigned int *v;
240 char *p, *q;
241
242 if (c->extended_cpuid_level < 0x80000004)
243 return 0;
244
245 v = (unsigned int *) c->x86_model_id;
246 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
247 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
248 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
249 c->x86_model_id[48] = 0;
250
251 /* Intel chips right-justify this string for some dumb reason;
252 undo that brain damage */
253 p = q = &c->x86_model_id[0];
254 while ( *p == ' ' )
255 p++;
256 if ( p != q ) {
257 while ( *p )
258 *q++ = *p++;
259 while ( q <= &c->x86_model_id[48] )
260 *q++ = '\0'; /* Zero-pad the rest */
261 }
262
263 return 1;
264 }
265
266
display_cacheinfo(struct cpuinfo_x86 * c)267 void display_cacheinfo(struct cpuinfo_x86 *c)
268 {
269 unsigned int dummy, ecx, edx, size;
270
271 if (c->extended_cpuid_level >= 0x80000005) {
272 cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
273 if ((edx | ecx) >> 24) {
274 if (opt_cpu_info)
275 printk("CPU: L1 I cache %uK (%u bytes/line),"
276 " D cache %uK (%u bytes/line)\n",
277 edx >> 24, edx & 0xFF, ecx >> 24, ecx & 0xFF);
278 c->x86_cache_size = (ecx >> 24) + (edx >> 24);
279 }
280 }
281
282 if (c->extended_cpuid_level < 0x80000006) /* Some chips just has a large L1. */
283 return;
284
285 cpuid(0x80000006, &dummy, &dummy, &ecx, &edx);
286
287 size = ecx >> 16;
288 if (size) {
289 c->x86_cache_size = size;
290
291 if (opt_cpu_info)
292 printk("CPU: L2 Cache: %uK (%u bytes/line)\n",
293 size, ecx & 0xFF);
294 }
295
296 size = edx >> 18;
297 if (size) {
298 c->x86_cache_size = size * 512;
299
300 if (opt_cpu_info)
301 printk("CPU: L3 Cache: %uM (%u bytes/line)\n",
302 (size + (size & 1)) >> 1, edx & 0xFF);
303 }
304 }
305
_phys_pkg_id(u32 cpuid_apic,int index_msb)306 static inline u32 _phys_pkg_id(u32 cpuid_apic, int index_msb)
307 {
308 return cpuid_apic >> index_msb;
309 }
310
311 /*
312 * cpuid returns the value latched in the HW at reset, not the APIC ID
313 * register's value. For any box whose BIOS changes APIC IDs, like
314 * clustered APIC systems, we must use get_apic_id().
315 *
316 * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
317 */
phys_pkg_id(u32 cpuid_apic,int index_msb)318 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
319 {
320 return _phys_pkg_id(get_apic_id(), index_msb);
321 }
322
323 /* Do minimum CPU detection early.
324 Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
325 The others are not touched to avoid unwanted side effects.
326
327 WARNING: this function is only called on the BP. Don't add code here
328 that is supposed to run on all CPUs. */
early_cpu_init(bool verbose)329 void __init early_cpu_init(bool verbose)
330 {
331 struct cpuinfo_x86 *c = &boot_cpu_data;
332 u32 eax, ebx, ecx, edx;
333
334 c->x86_cache_alignment = 32;
335
336 /* Get vendor name */
337 cpuid(0x00000000, &c->cpuid_level, &ebx, &ecx, &edx);
338 *(u32 *)&c->x86_vendor_id[0] = ebx;
339 *(u32 *)&c->x86_vendor_id[8] = ecx;
340 *(u32 *)&c->x86_vendor_id[4] = edx;
341
342 c->x86_vendor = x86_cpuid_lookup_vendor(ebx, ecx, edx);
343 switch (c->x86_vendor) {
344 case X86_VENDOR_INTEL: intel_unlock_cpuid_leaves(c);
345 actual_cpu = intel_cpu_dev; break;
346 case X86_VENDOR_AMD: actual_cpu = amd_cpu_dev; break;
347 case X86_VENDOR_CENTAUR: actual_cpu = centaur_cpu_dev; break;
348 case X86_VENDOR_SHANGHAI: actual_cpu = shanghai_cpu_dev; break;
349 case X86_VENDOR_HYGON: actual_cpu = hygon_cpu_dev; break;
350 default:
351 actual_cpu = default_cpu;
352 if (!verbose)
353 break;
354 printk(XENLOG_ERR
355 "Unrecognised or unsupported CPU vendor '%.12s'\n",
356 c->x86_vendor_id);
357 }
358
359 cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
360 c->x86 = get_cpu_family(eax, &c->x86_model, &c->x86_mask);
361
362 edx &= ~cleared_caps[FEATURESET_1d];
363 ecx &= ~cleared_caps[FEATURESET_1c];
364 if (edx & cpufeat_mask(X86_FEATURE_CLFLUSH)) {
365 unsigned int size = ((ebx >> 8) & 0xff) * 8;
366
367 c->x86_cache_alignment = size;
368
369 /*
370 * Patch in parameters of clear_page_cold()'s CLZERO
371 * alternative. Note that for now we cap this at 128 bytes.
372 * Larger cache line sizes would still be dealt with
373 * correctly, but would cause redundant work done.
374 */
375 if (size > 128)
376 size = 128;
377 if (size && !(size & (size - 1))) {
378 /*
379 * Need to play some games to keep the compiler from
380 * recognizing the negative array index as being out
381 * of bounds. The labels in assembler code really are
382 * _after_ the locations to be patched, so the
383 * negative index is intentional.
384 */
385 uint32_t *pcount = clear_page_clzero_post_count;
386 int8_t *neg_size = clear_page_clzero_post_neg_size;
387
388 OPTIMIZER_HIDE_VAR(pcount);
389 OPTIMIZER_HIDE_VAR(neg_size);
390 pcount[-1] = PAGE_SIZE / size;
391 neg_size[-1] = -size;
392 }
393 else
394 setup_clear_cpu_cap(X86_FEATURE_CLZERO);
395 }
396 /* Leaf 0x1 capabilities filled in early for Xen. */
397 c->x86_capability[FEATURESET_1d] = edx;
398 c->x86_capability[FEATURESET_1c] = ecx;
399
400 if (verbose)
401 printk(XENLOG_INFO
402 "CPU Vendor: %s, Family %u (%#x), "
403 "Model %u (%#x), Stepping %u (raw %08x)\n",
404 x86_cpuid_vendor_to_str(c->x86_vendor), c->x86,
405 c->x86, c->x86_model, c->x86_model, c->x86_mask,
406 eax);
407
408 if (c->cpuid_level >= 7) {
409 uint32_t max_subleaf;
410
411 cpuid_count(7, 0, &max_subleaf, &ebx,
412 &c->x86_capability[FEATURESET_7c0],
413 &c->x86_capability[FEATURESET_7d0]);
414
415 if (test_bit(X86_FEATURE_ARCH_CAPS, c->x86_capability))
416 rdmsr(MSR_ARCH_CAPABILITIES,
417 c->x86_capability[FEATURESET_m10Al],
418 c->x86_capability[FEATURESET_m10Ah]);
419
420 if (max_subleaf >= 1)
421 cpuid_count(7, 1, &eax, &ebx, &ecx,
422 &c->x86_capability[FEATURESET_7d1]);
423 }
424
425 eax = cpuid_eax(0x80000000);
426 if ((eax >> 16) == 0x8000 && eax >= 0x80000008) {
427 ebx = eax >= 0x8000001f ? cpuid_ebx(0x8000001f) : 0;
428 eax = cpuid_eax(0x80000008);
429
430 paddr_bits = eax & 0xff;
431 if (paddr_bits > PADDR_BITS)
432 paddr_bits = PADDR_BITS;
433
434 vaddr_bits = (eax >> 8) & 0xff;
435 if (vaddr_bits > VADDR_BITS)
436 vaddr_bits = VADDR_BITS;
437
438 hap_paddr_bits = ((eax >> 16) & 0xff) ?: paddr_bits;
439 if (hap_paddr_bits > PADDR_BITS)
440 hap_paddr_bits = PADDR_BITS;
441
442 /* Account for SME's physical address space reduction. */
443 paddr_bits -= (ebx >> 6) & 0x3f;
444 }
445
446 if (!(c->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)))
447 park_offline_cpus = opt_mce;
448
449 initialize_cpu_data(0);
450 }
451
reset_cpuinfo(struct cpuinfo_x86 * c,bool keep_basic)452 void reset_cpuinfo(struct cpuinfo_x86 *c, bool keep_basic)
453 {
454 if ( !keep_basic )
455 {
456 c->x86_vendor = 0;
457 c->x86 = 0;
458 c->x86_model = 0;
459 c->x86_mask = 0;
460 memset(&c->x86_capability, 0, sizeof(c->x86_capability));
461 memset(&c->x86_vendor_id, 0, sizeof(c->x86_vendor_id));
462 memset(&c->x86_model_id, 0, sizeof(c->x86_model_id));
463 }
464
465 CPU_DATA_INIT((*c));
466 }
467
generic_identify(struct cpuinfo_x86 * c)468 static void generic_identify(struct cpuinfo_x86 *c)
469 {
470 u32 eax, ebx, ecx, edx, tmp;
471
472 /* Get vendor name */
473 cpuid(0, &c->cpuid_level, &ebx, &ecx, &edx);
474 *(u32 *)&c->x86_vendor_id[0] = ebx;
475 *(u32 *)&c->x86_vendor_id[8] = ecx;
476 *(u32 *)&c->x86_vendor_id[4] = edx;
477
478 c->x86_vendor = x86_cpuid_lookup_vendor(ebx, ecx, edx);
479 if (boot_cpu_data.x86_vendor != c->x86_vendor)
480 printk(XENLOG_ERR "CPU%u vendor %u mismatch against BSP %u\n",
481 smp_processor_id(), c->x86_vendor,
482 boot_cpu_data.x86_vendor);
483
484 /* Initialize the standard set of capabilities */
485 /* Note that the vendor-specific code below might override */
486
487 /* Model and family information. */
488 cpuid(1, &eax, &ebx, &ecx, &edx);
489 c->x86 = get_cpu_family(eax, &c->x86_model, &c->x86_mask);
490 c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
491 c->phys_proc_id = c->apicid;
492
493 eax = cpuid_eax(0x80000000);
494 if ((eax >> 16) == 0x8000)
495 c->extended_cpuid_level = eax;
496
497 /*
498 * These AMD-defined flags are out of place, but we need
499 * them early for the CPUID faulting probe code
500 */
501 if (c->extended_cpuid_level >= 0x80000021)
502 c->x86_capability[FEATURESET_e21a] = cpuid_eax(0x80000021);
503
504 if (actual_cpu.c_early_init)
505 alternative_vcall(actual_cpu.c_early_init, c);
506
507 /* c_early_init() may have adjusted cpuid levels/features. Reread. */
508 c->cpuid_level = cpuid_eax(0);
509 cpuid(1, &eax, &ebx,
510 &c->x86_capability[FEATURESET_1c],
511 &c->x86_capability[FEATURESET_1d]);
512
513 if ( cpu_has(c, X86_FEATURE_CLFLUSH) )
514 c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
515
516 if ( (c->cpuid_level >= CPUID_PM_LEAF) &&
517 (cpuid_ecx(CPUID_PM_LEAF) & CPUID6_ECX_APERFMPERF_CAPABILITY) )
518 __set_bit(X86_FEATURE_APERFMPERF, c->x86_capability);
519
520 /* AMD-defined flags: level 0x80000001 */
521 if (c->extended_cpuid_level >= 0x80000001)
522 cpuid(0x80000001, &tmp, &tmp,
523 &c->x86_capability[FEATURESET_e1c],
524 &c->x86_capability[FEATURESET_e1d]);
525
526 if (c->extended_cpuid_level >= 0x80000004)
527 get_model_name(c); /* Default name */
528 if (c->extended_cpuid_level >= 0x80000007)
529 c->x86_capability[FEATURESET_e7d] = cpuid_edx(0x80000007);
530 if (c->extended_cpuid_level >= 0x80000008)
531 c->x86_capability[FEATURESET_e8b] = cpuid_ebx(0x80000008);
532 if (c->extended_cpuid_level >= 0x80000021)
533 cpuid(0x80000021,
534 &c->x86_capability[FEATURESET_e21a], &tmp,
535 &c->x86_capability[FEATURESET_e21c], &tmp);
536
537 /* Intel-defined flags: level 0x00000007 */
538 if (c->cpuid_level >= 7) {
539 uint32_t max_subleaf;
540
541 cpuid_count(7, 0, &max_subleaf,
542 &c->x86_capability[FEATURESET_7b0],
543 &c->x86_capability[FEATURESET_7c0],
544 &c->x86_capability[FEATURESET_7d0]);
545 if (max_subleaf >= 1)
546 cpuid_count(7, 1,
547 &c->x86_capability[FEATURESET_7a1],
548 &c->x86_capability[FEATURESET_7b1],
549 &c->x86_capability[FEATURESET_7c1],
550 &c->x86_capability[FEATURESET_7d1]);
551 if (max_subleaf >= 2)
552 cpuid_count(7, 2,
553 &tmp, &tmp, &tmp,
554 &c->x86_capability[FEATURESET_7d2]);
555 }
556
557 if (c->cpuid_level >= 0xd)
558 cpuid_count(0xd, 1,
559 &c->x86_capability[FEATURESET_Da1],
560 &tmp, &tmp, &tmp);
561
562 if (test_bit(X86_FEATURE_ARCH_CAPS, c->x86_capability))
563 rdmsr(MSR_ARCH_CAPABILITIES,
564 c->x86_capability[FEATURESET_m10Al],
565 c->x86_capability[FEATURESET_m10Ah]);
566 }
567
568 /*
569 * This does the hard work of actually picking apart the CPU stuff...
570 */
identify_cpu(struct cpuinfo_x86 * c)571 void identify_cpu(struct cpuinfo_x86 *c)
572 {
573 int i;
574
575 reset_cpuinfo(c, false);
576 generic_identify(c);
577
578 #ifdef NOISY_CAPS
579 printk(KERN_DEBUG "CPU: After vendor identify, caps:");
580 for (i = 0; i < NCAPINTS; i++)
581 printk(" %08x", c->x86_capability[i]);
582 printk("\n");
583 #endif
584
585 /*
586 * Vendor-specific initialization. In this section we
587 * canonicalize the feature flags, meaning if there are
588 * features a certain CPU supports which CPUID doesn't
589 * tell us, CPUID claiming incorrect flags, or other bugs,
590 * we handle them here.
591 *
592 * At the end of this section, c->x86_capability better
593 * indicate the features this CPU genuinely supports!
594 */
595 if (actual_cpu.c_init)
596 alternative_vcall(actual_cpu.c_init, c);
597
598 /*
599 * The vendor-specific functions might have changed features. Now
600 * we do "generic changes."
601 */
602 for (i = 0; i < FSCAPINTS; ++i)
603 c->x86_capability[i] &= known_features[i];
604
605 for (i = 0 ; i < NCAPINTS ; ++i) {
606 c->x86_capability[i] |= forced_caps[i];
607 c->x86_capability[i] &= ~cleared_caps[i];
608 }
609
610 /* If the model name is still unset, do table lookup. */
611 if ( !c->x86_model_id[0] ) {
612 /* Last resort... */
613 snprintf(c->x86_model_id, sizeof(c->x86_model_id),
614 "%02x/%02x", c->x86_vendor, c->x86_model);
615 }
616
617 /* Now the feature flags better reflect actual CPU features! */
618
619 xstate_init(c);
620
621 #ifdef NOISY_CAPS
622 printk(KERN_DEBUG "CPU: After all inits, caps:");
623 for (i = 0; i < NCAPINTS; i++)
624 printk(" %08x", c->x86_capability[i]);
625 printk("\n");
626 #endif
627
628 /*
629 * If RDRAND is available, make an attempt to check that it actually
630 * (still) works.
631 */
632 if (cpu_has(c, X86_FEATURE_RDRAND)) {
633 unsigned int prev = 0;
634
635 for (i = 0; i < 5; ++i)
636 {
637 unsigned int cur = arch_get_random();
638
639 if (prev && cur != prev)
640 break;
641 prev = cur;
642 }
643
644 if (i >= 5)
645 printk(XENLOG_WARNING "CPU%u: RDRAND appears to not work\n",
646 smp_processor_id());
647 }
648
649 if (system_state == SYS_STATE_resume)
650 return;
651
652 /*
653 * On SMP, boot_cpu_data holds the common feature set between
654 * all CPUs; so make sure that we indicate which features are
655 * common between the CPUs. The first time this routine gets
656 * executed, c == &boot_cpu_data.
657 */
658 if ( c != &boot_cpu_data ) {
659 /* AND the already accumulated flags with these */
660 for ( i = 0 ; i < NCAPINTS ; i++ )
661 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
662
663 mcheck_init(c, false);
664 } else {
665 mcheck_init(c, true);
666
667 mtrr_bp_init();
668 }
669
670 setup_doitm();
671 }
672
673 /* leaf 0xb SMT level */
674 #define SMT_LEVEL 0
675
676 /* leaf 0xb sub-leaf types */
677 #define INVALID_TYPE 0
678 #define SMT_TYPE 1
679 #define CORE_TYPE 2
680
681 #define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff)
682 #define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
683 #define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff)
684
685 /*
686 * Check for extended topology enumeration cpuid leaf 0xb and if it
687 * exists, use it for cpu topology detection.
688 */
detect_extended_topology(struct cpuinfo_x86 * c)689 bool detect_extended_topology(struct cpuinfo_x86 *c)
690 {
691 unsigned int eax, ebx, ecx, edx, sub_index;
692 unsigned int ht_mask_width, core_plus_mask_width;
693 unsigned int core_select_mask, core_level_siblings;
694 unsigned int initial_apicid;
695
696 if ( c->cpuid_level < 0xb )
697 return false;
698
699 cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
700
701 /* Check if the cpuid leaf 0xb is actually implemented */
702 if ( ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE) )
703 return false;
704
705 __set_bit(X86_FEATURE_XTOPOLOGY, c->x86_capability);
706
707 initial_apicid = edx;
708
709 /* Populate HT related information from sub-leaf level 0 */
710 core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
711 core_level_siblings = c->x86_num_siblings = 1u << ht_mask_width;
712
713 sub_index = 1;
714 do {
715 cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx);
716
717 /* Check for the Core type in the implemented sub leaves */
718 if ( LEAFB_SUBTYPE(ecx) == CORE_TYPE ) {
719 core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
720 core_level_siblings = 1u << core_plus_mask_width;
721 break;
722 }
723
724 sub_index++;
725 } while ( LEAFB_SUBTYPE(ecx) != INVALID_TYPE );
726
727 core_select_mask = (~(~0u << core_plus_mask_width)) >> ht_mask_width;
728
729 c->cpu_core_id = phys_pkg_id(initial_apicid, ht_mask_width)
730 & core_select_mask;
731 c->phys_proc_id = phys_pkg_id(initial_apicid, core_plus_mask_width);
732
733 c->apicid = phys_pkg_id(initial_apicid, 0);
734 c->x86_max_cores = (core_level_siblings / c->x86_num_siblings);
735
736 if ( opt_cpu_info )
737 {
738 printk("CPU: Physical Processor ID: %d\n",
739 c->phys_proc_id);
740 if ( c->x86_max_cores > 1 )
741 printk("CPU: Processor Core ID: %d\n",
742 c->cpu_core_id);
743 }
744
745 return true;
746 }
747
detect_ht(struct cpuinfo_x86 * c)748 void detect_ht(struct cpuinfo_x86 *c)
749 {
750 u32 eax, ebx, ecx, edx;
751 int index_msb, core_bits;
752
753 if (!cpu_has(c, X86_FEATURE_HTT) ||
754 cpu_has(c, X86_FEATURE_CMP_LEGACY) ||
755 cpu_has(c, X86_FEATURE_XTOPOLOGY))
756 return;
757
758 cpuid(1, &eax, &ebx, &ecx, &edx);
759 c->x86_num_siblings = (ebx & 0xff0000) >> 16;
760
761 if (c->x86_num_siblings == 1) {
762 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
763 } else if (c->x86_num_siblings > 1 ) {
764 index_msb = get_count_order(c->x86_num_siblings);
765 c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
766
767 if (opt_cpu_info)
768 printk("CPU: Physical Processor ID: %d\n",
769 c->phys_proc_id);
770
771 c->x86_num_siblings = c->x86_num_siblings / c->x86_max_cores;
772
773 index_msb = get_count_order(c->x86_num_siblings) ;
774
775 core_bits = get_count_order(c->x86_max_cores);
776
777 c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
778 ((1 << core_bits) - 1);
779
780 if (opt_cpu_info && c->x86_max_cores > 1)
781 printk("CPU: Processor Core ID: %d\n",
782 c->cpu_core_id);
783 }
784 }
785
apicid_to_socket(unsigned int apicid)786 unsigned int __init apicid_to_socket(unsigned int apicid)
787 {
788 unsigned int dummy;
789
790 if (boot_cpu_has(X86_FEATURE_XTOPOLOGY)) {
791 unsigned int eax, ecx, sub_index = 1, core_plus_mask_width;
792
793 cpuid_count(0xb, SMT_LEVEL, &eax, &dummy, &dummy, &dummy);
794 core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
795 do {
796 cpuid_count(0xb, sub_index, &eax, &dummy, &ecx,
797 &dummy);
798
799 if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
800 core_plus_mask_width =
801 BITS_SHIFT_NEXT_LEVEL(eax);
802 break;
803 }
804
805 sub_index++;
806 } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
807
808 return _phys_pkg_id(apicid, core_plus_mask_width);
809 }
810
811 if (boot_cpu_has(X86_FEATURE_HTT) &&
812 !boot_cpu_has(X86_FEATURE_CMP_LEGACY)) {
813 unsigned int num_siblings = (cpuid_ebx(1) & 0xff0000) >> 16;
814
815 if (num_siblings)
816 return _phys_pkg_id(apicid,
817 get_count_order(num_siblings));
818 }
819
820 return apicid;
821 }
822
print_cpu_info(unsigned int cpu)823 void print_cpu_info(unsigned int cpu)
824 {
825 const struct cpuinfo_x86 *c = cpu_data + cpu;
826 const char *vendor = NULL;
827
828 if (!opt_cpu_info)
829 return;
830
831 printk("CPU%u: ", cpu);
832
833 vendor = x86_cpuid_vendor_to_str(c->x86_vendor);
834 if (strncmp(c->x86_model_id, vendor, strlen(vendor)))
835 printk("%s ", vendor);
836
837 if (!c->x86_model_id[0])
838 printk("%d86", c->x86);
839 else
840 printk("%s", c->x86_model_id);
841
842 printk(" stepping %02x\n", c->x86_mask);
843 }
844
845 static cpumask_t cpu_initialized;
846
skinit_enable_intr(void)847 static void skinit_enable_intr(void)
848 {
849 uint64_t val;
850
851 /*
852 * If the platform is performing a Secure Launch via SKINIT
853 * INIT_REDIRECTION flag will be active.
854 */
855 if ( !cpu_has_skinit || rdmsr_safe(MSR_K8_VM_CR, val) ||
856 !(val & VM_CR_INIT_REDIRECTION) )
857 return;
858
859 ap_boot_method = AP_BOOT_SKINIT;
860
861 /*
862 * We don't yet handle #SX. Disable INIT_REDIRECTION first, before
863 * enabling GIF, so a pending INIT resets us, rather than causing a
864 * panic due to an unknown exception.
865 */
866 wrmsrl(MSR_K8_VM_CR, val & ~VM_CR_INIT_REDIRECTION);
867 asm volatile ( "stgi" ::: "memory" );
868 }
869
870 /*
871 * cpu_init() initializes state that is per-CPU. Some data is already
872 * initialized (naturally) in the bootstrap process, such as the GDT
873 * and IDT. We reload them nevertheless, this function acts as a
874 * 'CPU state barrier', nothing should get across.
875 */
cpu_init(void)876 void cpu_init(void)
877 {
878 int cpu = smp_processor_id();
879
880 if (cpumask_test_and_set_cpu(cpu, &cpu_initialized)) {
881 printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
882 for (;;) local_irq_enable();
883 }
884 if (opt_cpu_info)
885 printk("Initializing CPU#%d\n", cpu);
886
887 /* Install correct page table. */
888 write_ptbase(current);
889
890 /* Ensure FPU gets initialised for each domain. */
891 stts();
892
893 /* Reset debug registers: */
894 write_debugreg(0, 0);
895 write_debugreg(1, 0);
896 write_debugreg(2, 0);
897 write_debugreg(3, 0);
898 write_debugreg(6, X86_DR6_DEFAULT);
899 write_debugreg(7, X86_DR7_DEFAULT);
900
901 if (cpu_has_pku)
902 wrpkru(0);
903
904 /*
905 * If the platform is performing a Secure Launch via SKINIT, GIF is
906 * clear to prevent external interrupts interfering with Secure
907 * Startup. Re-enable all interrupts now that we are suitably set up.
908 *
909 * Refer to AMD APM Vol2 15.27 "Secure Startup with SKINIT".
910 */
911 skinit_enable_intr();
912
913 /* Enable NMIs. Our loader (e.g. Tboot) may have left them disabled. */
914 enable_nmis();
915 }
916
cpu_uninit(unsigned int cpu)917 void cpu_uninit(unsigned int cpu)
918 {
919 cpumask_clear_cpu(cpu, &cpu_initialized);
920 }
921
x86_match_cpu(const struct x86_cpu_id table[])922 const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id table[])
923 {
924 const struct x86_cpu_id *m;
925 const struct cpuinfo_x86 *c = &boot_cpu_data;
926
927 /*
928 * Although derived from Linux originally, Xen has no valid rows where
929 * ->vendor is zero, so used this in place of checking all metadata.
930 */
931 for ( m = table; m->vendor; m++ )
932 {
933 if ( c->vendor != m->vendor )
934 continue;
935 if ( c->family != m->family )
936 continue;
937 if ( c->model != m->model )
938 continue;
939 if ( !((1U << c->stepping) & m->steppings) )
940 continue;
941 if ( !cpu_has(c, m->feature) )
942 continue;
943
944 return m;
945 }
946
947 return NULL;
948 }
949