1 #include <xen/delay.h>
2 #include <xen/init.h>
3 #include <xen/param.h>
4 #include <xen/smp.h>
5 #include <xen/string.h>
6 
7 #include <asm/amd.h>
8 #include <asm/apic.h>
9 #include <asm/cpu-policy.h>
10 #include <asm/current.h>
11 #include <asm/debugreg.h>
12 #include <asm/guest-msr.h>
13 #include <asm/idt.h>
14 #include <asm/io.h>
15 #include <asm/match-cpu.h>
16 #include <asm/mpspec.h>
17 #include <asm/msr.h>
18 #include <asm/prot-key.h>
19 #include <asm/random.h>
20 #include <asm/setup.h>
21 #include <asm/shstk.h>
22 #include <asm/xstate.h>
23 
24 #include <public/sysctl.h>
25 
26 #include "cpu.h"
27 #include "mcheck/x86_mca.h"
28 
29 bool __read_mostly opt_dom0_cpuid_faulting = true;
30 
31 bool opt_arat = true;
32 boolean_param("arat", opt_arat);
33 
34 unsigned int opt_cpuid_mask_ecx = ~0u;
35 integer_param("cpuid_mask_ecx", opt_cpuid_mask_ecx);
36 unsigned int opt_cpuid_mask_edx = ~0u;
37 integer_param("cpuid_mask_edx", opt_cpuid_mask_edx);
38 
39 unsigned int opt_cpuid_mask_xsave_eax = ~0u;
40 integer_param("cpuid_mask_xsave_eax", opt_cpuid_mask_xsave_eax);
41 
42 unsigned int opt_cpuid_mask_ext_ecx = ~0u;
43 integer_param("cpuid_mask_ext_ecx", opt_cpuid_mask_ext_ecx);
44 unsigned int opt_cpuid_mask_ext_edx = ~0u;
45 integer_param("cpuid_mask_ext_edx", opt_cpuid_mask_ext_edx);
46 
47 unsigned int __initdata expected_levelling_cap;
48 unsigned int __read_mostly levelling_caps;
49 
50 DEFINE_PER_CPU(struct cpuidmasks, cpuidmasks);
51 struct cpuidmasks __read_mostly cpuidmask_defaults;
52 
53 unsigned int paddr_bits __read_mostly = 36;
54 unsigned int hap_paddr_bits __read_mostly = 36;
55 unsigned int vaddr_bits __read_mostly = VADDR_BITS;
56 
57 static unsigned int cleared_caps[NCAPINTS];
58 static unsigned int forced_caps[NCAPINTS];
59 
60 DEFINE_PER_CPU(bool, full_gdt_loaded);
61 
62 DEFINE_PER_CPU(uint32_t, pkrs);
63 
64 extern uint32_t clear_page_clzero_post_count[];
65 extern int8_t clear_page_clzero_post_neg_size[];
66 
setup_clear_cpu_cap(unsigned int cap)67 void __init setup_clear_cpu_cap(unsigned int cap)
68 {
69 	const uint32_t *dfs;
70 	unsigned int i;
71 
72 	if (__test_and_set_bit(cap, cleared_caps))
73 		return;
74 
75 	if (test_bit(cap, forced_caps))
76 		printk("%pS clearing previously forced feature %#x\n",
77 		       __builtin_return_address(0), cap);
78 
79 	__clear_bit(cap, boot_cpu_data.x86_capability);
80 	dfs = x86_cpu_policy_lookup_deep_deps(cap);
81 
82 	if (!dfs)
83 		return;
84 
85 	for (i = 0; i < FSCAPINTS; ++i) {
86 		cleared_caps[i] |= dfs[i];
87 		boot_cpu_data.x86_capability[i] &= ~dfs[i];
88 		if (!(forced_caps[i] & dfs[i]))
89 			continue;
90 		printk("%pS implicitly clearing previously forced feature(s) %u:%#x\n",
91 		       __builtin_return_address(0),
92 		       i, forced_caps[i] & dfs[i]);
93 	}
94 }
95 
setup_force_cpu_cap(unsigned int cap)96 void __init setup_force_cpu_cap(unsigned int cap)
97 {
98 	if (__test_and_set_bit(cap, forced_caps))
99 		return;
100 
101 	if (test_bit(cap, cleared_caps)) {
102 		printk("%pS tries to force previously cleared feature %#x\n",
103 		       __builtin_return_address(0), cap);
104 		return;
105 	}
106 
107 	__set_bit(cap, boot_cpu_data.x86_capability);
108 }
109 
is_forced_cpu_cap(unsigned int cap)110 bool __init is_forced_cpu_cap(unsigned int cap)
111 {
112 	return test_bit(cap, forced_caps);
113 }
114 
default_init(struct cpuinfo_x86 * c)115 static void cf_check default_init(struct cpuinfo_x86 * c)
116 {
117 	/* Not much we can do here... */
118 	__clear_bit(X86_FEATURE_SEP, c->x86_capability);
119 }
120 
121 static const struct cpu_dev __initconst_cf_clobber __used default_cpu = {
122 	.c_init	= default_init,
123 };
124 static struct cpu_dev __ro_after_init actual_cpu;
125 
126 static DEFINE_PER_CPU(uint64_t, msr_misc_features);
127 void (* __ro_after_init ctxt_switch_masking)(const struct vcpu *next);
128 
probe_cpuid_faulting(void)129 bool __init probe_cpuid_faulting(void)
130 {
131 	uint64_t val;
132 	int rc;
133 
134 	if ((rc = rdmsr_safe(MSR_INTEL_PLATFORM_INFO, val)) == 0)
135 		raw_cpu_policy.platform_info.cpuid_faulting =
136 			val & MSR_PLATFORM_INFO_CPUID_FAULTING;
137 
138 	if (rc ||
139 	    !(val & MSR_PLATFORM_INFO_CPUID_FAULTING) ||
140 	    rdmsr_safe(MSR_INTEL_MISC_FEATURES_ENABLES,
141 		       this_cpu(msr_misc_features)))
142 	{
143 		setup_clear_cpu_cap(X86_FEATURE_CPUID_FAULTING);
144 		return false;
145 	}
146 
147 	setup_force_cpu_cap(X86_FEATURE_CPUID_FAULTING);
148 
149 	return true;
150 }
151 
set_cpuid_faulting(bool enable)152 static void set_cpuid_faulting(bool enable)
153 {
154 	uint64_t *this_misc_features = &this_cpu(msr_misc_features);
155 	uint64_t val = *this_misc_features;
156 
157 	if (!!(val & MSR_MISC_FEATURES_CPUID_FAULTING) == enable)
158 		return;
159 
160 	val ^= MSR_MISC_FEATURES_CPUID_FAULTING;
161 
162 	wrmsrl(MSR_INTEL_MISC_FEATURES_ENABLES, val);
163 	*this_misc_features = val;
164 }
165 
ctxt_switch_levelling(const struct vcpu * next)166 void ctxt_switch_levelling(const struct vcpu *next)
167 {
168 	const struct domain *nextd = next ? next->domain : NULL;
169 	bool enable_cpuid_faulting;
170 
171 	if (cpu_has_cpuid_faulting ||
172 	    boot_cpu_has(X86_FEATURE_CPUID_USER_DIS)) {
173 		/*
174 		 * No need to alter the faulting setting if we are switching
175 		 * to idle; it won't affect any code running in idle context.
176 		 */
177 		if (nextd && is_idle_domain(nextd))
178 			return;
179 		/*
180 		 * We *should* be enabling faulting for PV control domains.
181 		 *
182 		 * The domain builder has now been updated to not depend on
183 		 * seeing host CPUID values.  This makes it compatible with
184 		 * PVH toolstack domains, and lets us enable faulting by
185 		 * default for all PV domains.
186 		 *
187 		 * However, as PV control domains have never had faulting
188 		 * enforced on them before, there might plausibly be other
189 		 * dependenices on host CPUID data.  Therefore, we have left
190 		 * an interim escape hatch in the form of
191 		 * `dom0=no-cpuid-faulting` to restore the older behaviour.
192 		 */
193 		enable_cpuid_faulting = nextd && (opt_dom0_cpuid_faulting ||
194 		                                  !is_control_domain(nextd) ||
195 		                                  !is_pv_domain(nextd)) &&
196 		                        (is_pv_domain(nextd) ||
197 		                         next->arch.msrs->
198 		                         misc_features_enables.cpuid_faulting);
199 
200 		if (cpu_has_cpuid_faulting)
201 			set_cpuid_faulting(enable_cpuid_faulting);
202 		else
203 			amd_set_cpuid_user_dis(enable_cpuid_faulting);
204 
205 		return;
206 	}
207 
208 	if (ctxt_switch_masking)
209 		alternative_vcall(ctxt_switch_masking, next);
210 }
211 
setup_doitm(void)212 static void setup_doitm(void)
213 {
214     uint64_t msr;
215 
216     if ( !cpu_has_doitm )
217         return;
218 
219     /*
220      * We don't currently enumerate DOITM to guests.  As a conseqeuence, guest
221      * kernels will believe they're safe even when they are not.
222      *
223      * For now, set it unilaterally.  This prevents otherwise-correct crypto
224      * code from becoming vulnerable to timing sidechannels.
225      */
226 
227     rdmsrl(MSR_UARCH_MISC_CTRL, msr);
228     msr |= UARCH_CTRL_DOITM;
229     if ( !opt_dit )
230         msr &= ~UARCH_CTRL_DOITM;
231     wrmsrl(MSR_UARCH_MISC_CTRL, msr);
232 }
233 
234 bool opt_cpu_info;
235 boolean_param("cpuinfo", opt_cpu_info);
236 
get_model_name(struct cpuinfo_x86 * c)237 int get_model_name(struct cpuinfo_x86 *c)
238 {
239 	unsigned int *v;
240 	char *p, *q;
241 
242 	if (c->extended_cpuid_level < 0x80000004)
243 		return 0;
244 
245 	v = (unsigned int *) c->x86_model_id;
246 	cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
247 	cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
248 	cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
249 	c->x86_model_id[48] = 0;
250 
251 	/* Intel chips right-justify this string for some dumb reason;
252 	   undo that brain damage */
253 	p = q = &c->x86_model_id[0];
254 	while ( *p == ' ' )
255 	     p++;
256 	if ( p != q ) {
257 	     while ( *p )
258 		  *q++ = *p++;
259 	     while ( q <= &c->x86_model_id[48] )
260 		  *q++ = '\0';	/* Zero-pad the rest */
261 	}
262 
263 	return 1;
264 }
265 
266 
display_cacheinfo(struct cpuinfo_x86 * c)267 void display_cacheinfo(struct cpuinfo_x86 *c)
268 {
269 	unsigned int dummy, ecx, edx, size;
270 
271 	if (c->extended_cpuid_level >= 0x80000005) {
272 		cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
273 		if ((edx | ecx) >> 24) {
274 			if (opt_cpu_info)
275 				printk("CPU: L1 I cache %uK (%u bytes/line),"
276 				              " D cache %uK (%u bytes/line)\n",
277 				       edx >> 24, edx & 0xFF, ecx >> 24, ecx & 0xFF);
278 			c->x86_cache_size = (ecx >> 24) + (edx >> 24);
279 		}
280 	}
281 
282 	if (c->extended_cpuid_level < 0x80000006)	/* Some chips just has a large L1. */
283 		return;
284 
285 	cpuid(0x80000006, &dummy, &dummy, &ecx, &edx);
286 
287 	size = ecx >> 16;
288 	if (size) {
289 		c->x86_cache_size = size;
290 
291 		if (opt_cpu_info)
292 			printk("CPU: L2 Cache: %uK (%u bytes/line)\n",
293 			       size, ecx & 0xFF);
294 	}
295 
296 	size = edx >> 18;
297 	if (size) {
298 		c->x86_cache_size = size * 512;
299 
300 		if (opt_cpu_info)
301 			printk("CPU: L3 Cache: %uM (%u bytes/line)\n",
302 			       (size + (size & 1)) >> 1, edx & 0xFF);
303 	}
304 }
305 
_phys_pkg_id(u32 cpuid_apic,int index_msb)306 static inline u32 _phys_pkg_id(u32 cpuid_apic, int index_msb)
307 {
308 	return cpuid_apic >> index_msb;
309 }
310 
311 /*
312  * cpuid returns the value latched in the HW at reset, not the APIC ID
313  * register's value.  For any box whose BIOS changes APIC IDs, like
314  * clustered APIC systems, we must use get_apic_id().
315  *
316  * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
317  */
phys_pkg_id(u32 cpuid_apic,int index_msb)318 static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
319 {
320 	return _phys_pkg_id(get_apic_id(), index_msb);
321 }
322 
323 /* Do minimum CPU detection early.
324    Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
325    The others are not touched to avoid unwanted side effects.
326 
327    WARNING: this function is only called on the BP.  Don't add code here
328    that is supposed to run on all CPUs. */
early_cpu_init(bool verbose)329 void __init early_cpu_init(bool verbose)
330 {
331 	struct cpuinfo_x86 *c = &boot_cpu_data;
332 	u32 eax, ebx, ecx, edx;
333 
334 	c->x86_cache_alignment = 32;
335 
336 	/* Get vendor name */
337 	cpuid(0x00000000, &c->cpuid_level, &ebx, &ecx, &edx);
338 	*(u32 *)&c->x86_vendor_id[0] = ebx;
339 	*(u32 *)&c->x86_vendor_id[8] = ecx;
340 	*(u32 *)&c->x86_vendor_id[4] = edx;
341 
342 	c->x86_vendor = x86_cpuid_lookup_vendor(ebx, ecx, edx);
343 	switch (c->x86_vendor) {
344 	case X86_VENDOR_INTEL:    intel_unlock_cpuid_leaves(c);
345 				  actual_cpu = intel_cpu_dev;    break;
346 	case X86_VENDOR_AMD:      actual_cpu = amd_cpu_dev;      break;
347 	case X86_VENDOR_CENTAUR:  actual_cpu = centaur_cpu_dev;  break;
348 	case X86_VENDOR_SHANGHAI: actual_cpu = shanghai_cpu_dev; break;
349 	case X86_VENDOR_HYGON:    actual_cpu = hygon_cpu_dev;    break;
350 	default:
351 		actual_cpu = default_cpu;
352 		if (!verbose)
353 			break;
354 		printk(XENLOG_ERR
355 		       "Unrecognised or unsupported CPU vendor '%.12s'\n",
356 		       c->x86_vendor_id);
357 	}
358 
359 	cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
360 	c->x86 = get_cpu_family(eax, &c->x86_model, &c->x86_mask);
361 
362 	edx &= ~cleared_caps[FEATURESET_1d];
363 	ecx &= ~cleared_caps[FEATURESET_1c];
364 	if (edx & cpufeat_mask(X86_FEATURE_CLFLUSH)) {
365 		unsigned int size = ((ebx >> 8) & 0xff) * 8;
366 
367 		c->x86_cache_alignment = size;
368 
369 		/*
370 		 * Patch in parameters of clear_page_cold()'s CLZERO
371 		 * alternative. Note that for now we cap this at 128 bytes.
372 		 * Larger cache line sizes would still be dealt with
373 		 * correctly, but would cause redundant work done.
374 		 */
375 		if (size > 128)
376 			size = 128;
377 		if (size && !(size & (size - 1))) {
378 			/*
379 			 * Need to play some games to keep the compiler from
380 			 * recognizing the negative array index as being out
381 			 * of bounds. The labels in assembler code really are
382 			 * _after_ the locations to be patched, so the
383 			 * negative index is intentional.
384 			 */
385 			uint32_t *pcount = clear_page_clzero_post_count;
386 			int8_t *neg_size = clear_page_clzero_post_neg_size;
387 
388 			OPTIMIZER_HIDE_VAR(pcount);
389 			OPTIMIZER_HIDE_VAR(neg_size);
390 			pcount[-1] = PAGE_SIZE / size;
391 			neg_size[-1] = -size;
392 		}
393 		else
394 			setup_clear_cpu_cap(X86_FEATURE_CLZERO);
395 	}
396 	/* Leaf 0x1 capabilities filled in early for Xen. */
397 	c->x86_capability[FEATURESET_1d] = edx;
398 	c->x86_capability[FEATURESET_1c] = ecx;
399 
400 	if (verbose)
401 		printk(XENLOG_INFO
402 		       "CPU Vendor: %s, Family %u (%#x), "
403 		       "Model %u (%#x), Stepping %u (raw %08x)\n",
404 		       x86_cpuid_vendor_to_str(c->x86_vendor), c->x86,
405 		       c->x86, c->x86_model, c->x86_model, c->x86_mask,
406 		       eax);
407 
408 	if (c->cpuid_level >= 7) {
409 		uint32_t max_subleaf;
410 
411 		cpuid_count(7, 0, &max_subleaf, &ebx,
412 			    &c->x86_capability[FEATURESET_7c0],
413 			    &c->x86_capability[FEATURESET_7d0]);
414 
415 		if (test_bit(X86_FEATURE_ARCH_CAPS, c->x86_capability))
416 			rdmsr(MSR_ARCH_CAPABILITIES,
417 			      c->x86_capability[FEATURESET_m10Al],
418 			      c->x86_capability[FEATURESET_m10Ah]);
419 
420 		if (max_subleaf >= 1)
421 			cpuid_count(7, 1, &eax, &ebx, &ecx,
422 				    &c->x86_capability[FEATURESET_7d1]);
423 	}
424 
425 	eax = cpuid_eax(0x80000000);
426 	if ((eax >> 16) == 0x8000 && eax >= 0x80000008) {
427 		ebx = eax >= 0x8000001f ? cpuid_ebx(0x8000001f) : 0;
428 		eax = cpuid_eax(0x80000008);
429 
430 		paddr_bits = eax & 0xff;
431 		if (paddr_bits > PADDR_BITS)
432 			paddr_bits = PADDR_BITS;
433 
434 		vaddr_bits = (eax >> 8) & 0xff;
435 		if (vaddr_bits > VADDR_BITS)
436 			vaddr_bits = VADDR_BITS;
437 
438 		hap_paddr_bits = ((eax >> 16) & 0xff) ?: paddr_bits;
439 		if (hap_paddr_bits > PADDR_BITS)
440 			hap_paddr_bits = PADDR_BITS;
441 
442 		/* Account for SME's physical address space reduction. */
443 		paddr_bits -= (ebx >> 6) & 0x3f;
444 	}
445 
446 	if (!(c->x86_vendor & (X86_VENDOR_AMD | X86_VENDOR_HYGON)))
447 		park_offline_cpus = opt_mce;
448 
449 	initialize_cpu_data(0);
450 }
451 
reset_cpuinfo(struct cpuinfo_x86 * c,bool keep_basic)452 void reset_cpuinfo(struct cpuinfo_x86 *c, bool keep_basic)
453 {
454     if ( !keep_basic )
455     {
456         c->x86_vendor = 0;
457         c->x86 = 0;
458         c->x86_model = 0;
459         c->x86_mask = 0;
460         memset(&c->x86_capability, 0, sizeof(c->x86_capability));
461         memset(&c->x86_vendor_id, 0, sizeof(c->x86_vendor_id));
462         memset(&c->x86_model_id, 0, sizeof(c->x86_model_id));
463     }
464 
465     CPU_DATA_INIT((*c));
466 }
467 
generic_identify(struct cpuinfo_x86 * c)468 static void generic_identify(struct cpuinfo_x86 *c)
469 {
470 	u32 eax, ebx, ecx, edx, tmp;
471 
472 	/* Get vendor name */
473 	cpuid(0, &c->cpuid_level, &ebx, &ecx, &edx);
474 	*(u32 *)&c->x86_vendor_id[0] = ebx;
475 	*(u32 *)&c->x86_vendor_id[8] = ecx;
476 	*(u32 *)&c->x86_vendor_id[4] = edx;
477 
478 	c->x86_vendor = x86_cpuid_lookup_vendor(ebx, ecx, edx);
479 	if (boot_cpu_data.x86_vendor != c->x86_vendor)
480 		printk(XENLOG_ERR "CPU%u vendor %u mismatch against BSP %u\n",
481 		       smp_processor_id(), c->x86_vendor,
482 		       boot_cpu_data.x86_vendor);
483 
484 	/* Initialize the standard set of capabilities */
485 	/* Note that the vendor-specific code below might override */
486 
487 	/* Model and family information. */
488 	cpuid(1, &eax, &ebx, &ecx, &edx);
489 	c->x86 = get_cpu_family(eax, &c->x86_model, &c->x86_mask);
490 	c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
491 	c->phys_proc_id = c->apicid;
492 
493 	eax = cpuid_eax(0x80000000);
494 	if ((eax >> 16) == 0x8000)
495 		c->extended_cpuid_level = eax;
496 
497 	/*
498 	 * These AMD-defined flags are out of place, but we need
499 	 * them early for the CPUID faulting probe code
500 	 */
501 	if (c->extended_cpuid_level >= 0x80000021)
502 		c->x86_capability[FEATURESET_e21a] = cpuid_eax(0x80000021);
503 
504 	if (actual_cpu.c_early_init)
505 		alternative_vcall(actual_cpu.c_early_init, c);
506 
507 	/* c_early_init() may have adjusted cpuid levels/features.  Reread. */
508 	c->cpuid_level = cpuid_eax(0);
509 	cpuid(1, &eax, &ebx,
510 	      &c->x86_capability[FEATURESET_1c],
511 	      &c->x86_capability[FEATURESET_1d]);
512 
513 	if ( cpu_has(c, X86_FEATURE_CLFLUSH) )
514 		c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
515 
516 	if ( (c->cpuid_level >= CPUID_PM_LEAF) &&
517 	     (cpuid_ecx(CPUID_PM_LEAF) & CPUID6_ECX_APERFMPERF_CAPABILITY) )
518 		__set_bit(X86_FEATURE_APERFMPERF, c->x86_capability);
519 
520 	/* AMD-defined flags: level 0x80000001 */
521 	if (c->extended_cpuid_level >= 0x80000001)
522 		cpuid(0x80000001, &tmp, &tmp,
523 		      &c->x86_capability[FEATURESET_e1c],
524 		      &c->x86_capability[FEATURESET_e1d]);
525 
526 	if (c->extended_cpuid_level >= 0x80000004)
527 		get_model_name(c); /* Default name */
528 	if (c->extended_cpuid_level >= 0x80000007)
529 		c->x86_capability[FEATURESET_e7d] = cpuid_edx(0x80000007);
530 	if (c->extended_cpuid_level >= 0x80000008)
531 		c->x86_capability[FEATURESET_e8b] = cpuid_ebx(0x80000008);
532 	if (c->extended_cpuid_level >= 0x80000021)
533 		cpuid(0x80000021,
534 		      &c->x86_capability[FEATURESET_e21a], &tmp,
535 		      &c->x86_capability[FEATURESET_e21c], &tmp);
536 
537 	/* Intel-defined flags: level 0x00000007 */
538 	if (c->cpuid_level >= 7) {
539 		uint32_t max_subleaf;
540 
541 		cpuid_count(7, 0, &max_subleaf,
542 			    &c->x86_capability[FEATURESET_7b0],
543 			    &c->x86_capability[FEATURESET_7c0],
544 			    &c->x86_capability[FEATURESET_7d0]);
545 		if (max_subleaf >= 1)
546 			cpuid_count(7, 1,
547 				    &c->x86_capability[FEATURESET_7a1],
548 				    &c->x86_capability[FEATURESET_7b1],
549 				    &c->x86_capability[FEATURESET_7c1],
550 				    &c->x86_capability[FEATURESET_7d1]);
551 		if (max_subleaf >= 2)
552 			cpuid_count(7, 2,
553 				    &tmp, &tmp, &tmp,
554 				    &c->x86_capability[FEATURESET_7d2]);
555 	}
556 
557 	if (c->cpuid_level >= 0xd)
558 		cpuid_count(0xd, 1,
559 			    &c->x86_capability[FEATURESET_Da1],
560 			    &tmp, &tmp, &tmp);
561 
562 	if (test_bit(X86_FEATURE_ARCH_CAPS, c->x86_capability))
563 		rdmsr(MSR_ARCH_CAPABILITIES,
564 		      c->x86_capability[FEATURESET_m10Al],
565 		      c->x86_capability[FEATURESET_m10Ah]);
566 }
567 
568 /*
569  * This does the hard work of actually picking apart the CPU stuff...
570  */
identify_cpu(struct cpuinfo_x86 * c)571 void identify_cpu(struct cpuinfo_x86 *c)
572 {
573 	int i;
574 
575 	reset_cpuinfo(c, false);
576 	generic_identify(c);
577 
578 #ifdef NOISY_CAPS
579 	printk(KERN_DEBUG "CPU: After vendor identify, caps:");
580 	for (i = 0; i < NCAPINTS; i++)
581 		printk(" %08x", c->x86_capability[i]);
582 	printk("\n");
583 #endif
584 
585 	/*
586 	 * Vendor-specific initialization.  In this section we
587 	 * canonicalize the feature flags, meaning if there are
588 	 * features a certain CPU supports which CPUID doesn't
589 	 * tell us, CPUID claiming incorrect flags, or other bugs,
590 	 * we handle them here.
591 	 *
592 	 * At the end of this section, c->x86_capability better
593 	 * indicate the features this CPU genuinely supports!
594 	 */
595 	if (actual_cpu.c_init)
596 		alternative_vcall(actual_cpu.c_init, c);
597 
598 	/*
599 	 * The vendor-specific functions might have changed features.  Now
600 	 * we do "generic changes."
601 	 */
602 	for (i = 0; i < FSCAPINTS; ++i)
603 		c->x86_capability[i] &= known_features[i];
604 
605 	for (i = 0 ; i < NCAPINTS ; ++i) {
606 		c->x86_capability[i] |= forced_caps[i];
607 		c->x86_capability[i] &= ~cleared_caps[i];
608 	}
609 
610 	/* If the model name is still unset, do table lookup. */
611 	if ( !c->x86_model_id[0] ) {
612 		/* Last resort... */
613 		snprintf(c->x86_model_id, sizeof(c->x86_model_id),
614 			"%02x/%02x", c->x86_vendor, c->x86_model);
615 	}
616 
617 	/* Now the feature flags better reflect actual CPU features! */
618 
619 	xstate_init(c);
620 
621 #ifdef NOISY_CAPS
622 	printk(KERN_DEBUG "CPU: After all inits, caps:");
623 	for (i = 0; i < NCAPINTS; i++)
624 		printk(" %08x", c->x86_capability[i]);
625 	printk("\n");
626 #endif
627 
628 	/*
629 	 * If RDRAND is available, make an attempt to check that it actually
630 	 * (still) works.
631 	 */
632 	if (cpu_has(c, X86_FEATURE_RDRAND)) {
633 		unsigned int prev = 0;
634 
635 		for (i = 0; i < 5; ++i)
636 		{
637 			unsigned int cur = arch_get_random();
638 
639 			if (prev && cur != prev)
640 				break;
641 			prev = cur;
642 		}
643 
644 		if (i >= 5)
645 			printk(XENLOG_WARNING "CPU%u: RDRAND appears to not work\n",
646 			       smp_processor_id());
647 	}
648 
649 	if (system_state == SYS_STATE_resume)
650 		return;
651 
652 	/*
653 	 * On SMP, boot_cpu_data holds the common feature set between
654 	 * all CPUs; so make sure that we indicate which features are
655 	 * common between the CPUs.  The first time this routine gets
656 	 * executed, c == &boot_cpu_data.
657 	 */
658 	if ( c != &boot_cpu_data ) {
659 		/* AND the already accumulated flags with these */
660 		for ( i = 0 ; i < NCAPINTS ; i++ )
661 			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
662 
663 		mcheck_init(c, false);
664 	} else {
665 		mcheck_init(c, true);
666 
667 		mtrr_bp_init();
668 	}
669 
670 	setup_doitm();
671 }
672 
673 /* leaf 0xb SMT level */
674 #define SMT_LEVEL       0
675 
676 /* leaf 0xb sub-leaf types */
677 #define INVALID_TYPE    0
678 #define SMT_TYPE        1
679 #define CORE_TYPE       2
680 
681 #define LEAFB_SUBTYPE(ecx)          (((ecx) >> 8) & 0xff)
682 #define BITS_SHIFT_NEXT_LEVEL(eax)  ((eax) & 0x1f)
683 #define LEVEL_MAX_SIBLINGS(ebx)     ((ebx) & 0xffff)
684 
685 /*
686  * Check for extended topology enumeration cpuid leaf 0xb and if it
687  * exists, use it for cpu topology detection.
688  */
detect_extended_topology(struct cpuinfo_x86 * c)689 bool detect_extended_topology(struct cpuinfo_x86 *c)
690 {
691 	unsigned int eax, ebx, ecx, edx, sub_index;
692 	unsigned int ht_mask_width, core_plus_mask_width;
693 	unsigned int core_select_mask, core_level_siblings;
694 	unsigned int initial_apicid;
695 
696 	if ( c->cpuid_level < 0xb )
697 		return false;
698 
699 	cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
700 
701 	/* Check if the cpuid leaf 0xb is actually implemented */
702 	if ( ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE) )
703 		return false;
704 
705 	__set_bit(X86_FEATURE_XTOPOLOGY, c->x86_capability);
706 
707 	initial_apicid = edx;
708 
709 	/* Populate HT related information from sub-leaf level 0 */
710 	core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
711 	core_level_siblings = c->x86_num_siblings = 1u << ht_mask_width;
712 
713 	sub_index = 1;
714 	do {
715 		cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx);
716 
717 		/* Check for the Core type in the implemented sub leaves */
718 		if ( LEAFB_SUBTYPE(ecx) == CORE_TYPE ) {
719 			core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
720 			core_level_siblings = 1u << core_plus_mask_width;
721 			break;
722 		}
723 
724 		sub_index++;
725 	} while ( LEAFB_SUBTYPE(ecx) != INVALID_TYPE );
726 
727 	core_select_mask = (~(~0u << core_plus_mask_width)) >> ht_mask_width;
728 
729 	c->cpu_core_id = phys_pkg_id(initial_apicid, ht_mask_width)
730 		& core_select_mask;
731 	c->phys_proc_id = phys_pkg_id(initial_apicid, core_plus_mask_width);
732 
733 	c->apicid = phys_pkg_id(initial_apicid, 0);
734 	c->x86_max_cores = (core_level_siblings / c->x86_num_siblings);
735 
736 	if ( opt_cpu_info )
737 	{
738 		printk("CPU: Physical Processor ID: %d\n",
739 		       c->phys_proc_id);
740 		if ( c->x86_max_cores > 1 )
741 			printk("CPU: Processor Core ID: %d\n",
742 			       c->cpu_core_id);
743 	}
744 
745 	return true;
746 }
747 
detect_ht(struct cpuinfo_x86 * c)748 void detect_ht(struct cpuinfo_x86 *c)
749 {
750 	u32 	eax, ebx, ecx, edx;
751 	int 	index_msb, core_bits;
752 
753 	if (!cpu_has(c, X86_FEATURE_HTT) ||
754 	    cpu_has(c, X86_FEATURE_CMP_LEGACY) ||
755 	    cpu_has(c, X86_FEATURE_XTOPOLOGY))
756 		return;
757 
758 	cpuid(1, &eax, &ebx, &ecx, &edx);
759 	c->x86_num_siblings = (ebx & 0xff0000) >> 16;
760 
761 	if (c->x86_num_siblings == 1) {
762 		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
763 	} else if (c->x86_num_siblings > 1 ) {
764 		index_msb = get_count_order(c->x86_num_siblings);
765 		c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
766 
767 		if (opt_cpu_info)
768 			printk("CPU: Physical Processor ID: %d\n",
769 			       c->phys_proc_id);
770 
771 		c->x86_num_siblings = c->x86_num_siblings / c->x86_max_cores;
772 
773 		index_msb = get_count_order(c->x86_num_siblings) ;
774 
775 		core_bits = get_count_order(c->x86_max_cores);
776 
777 		c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
778 					       ((1 << core_bits) - 1);
779 
780 		if (opt_cpu_info && c->x86_max_cores > 1)
781 			printk("CPU: Processor Core ID: %d\n",
782 			       c->cpu_core_id);
783 	}
784 }
785 
apicid_to_socket(unsigned int apicid)786 unsigned int __init apicid_to_socket(unsigned int apicid)
787 {
788 	unsigned int dummy;
789 
790 	if (boot_cpu_has(X86_FEATURE_XTOPOLOGY)) {
791 		unsigned int eax, ecx, sub_index = 1, core_plus_mask_width;
792 
793 		cpuid_count(0xb, SMT_LEVEL, &eax, &dummy, &dummy, &dummy);
794 		core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
795 		do {
796 			cpuid_count(0xb, sub_index, &eax, &dummy, &ecx,
797 			            &dummy);
798 
799 			if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
800 				core_plus_mask_width =
801 					BITS_SHIFT_NEXT_LEVEL(eax);
802 				break;
803 			}
804 
805 			sub_index++;
806 		} while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
807 
808 		return _phys_pkg_id(apicid, core_plus_mask_width);
809 	}
810 
811 	if (boot_cpu_has(X86_FEATURE_HTT) &&
812 	    !boot_cpu_has(X86_FEATURE_CMP_LEGACY)) {
813 		unsigned int num_siblings = (cpuid_ebx(1) & 0xff0000) >> 16;
814 
815 		if (num_siblings)
816 			return _phys_pkg_id(apicid,
817 			                    get_count_order(num_siblings));
818 	}
819 
820 	return apicid;
821 }
822 
print_cpu_info(unsigned int cpu)823 void print_cpu_info(unsigned int cpu)
824 {
825 	const struct cpuinfo_x86 *c = cpu_data + cpu;
826 	const char *vendor = NULL;
827 
828 	if (!opt_cpu_info)
829 		return;
830 
831 	printk("CPU%u: ", cpu);
832 
833 	vendor = x86_cpuid_vendor_to_str(c->x86_vendor);
834 	if (strncmp(c->x86_model_id, vendor, strlen(vendor)))
835 		printk("%s ", vendor);
836 
837 	if (!c->x86_model_id[0])
838 		printk("%d86", c->x86);
839 	else
840 		printk("%s", c->x86_model_id);
841 
842 	printk(" stepping %02x\n", c->x86_mask);
843 }
844 
845 static cpumask_t cpu_initialized;
846 
skinit_enable_intr(void)847 static void skinit_enable_intr(void)
848 {
849 	uint64_t val;
850 
851 	/*
852 	 * If the platform is performing a Secure Launch via SKINIT
853 	 * INIT_REDIRECTION flag will be active.
854 	 */
855 	if ( !cpu_has_skinit || rdmsr_safe(MSR_K8_VM_CR, val) ||
856 	     !(val & VM_CR_INIT_REDIRECTION) )
857 		return;
858 
859 	ap_boot_method = AP_BOOT_SKINIT;
860 
861 	/*
862 	 * We don't yet handle #SX.  Disable INIT_REDIRECTION first, before
863 	 * enabling GIF, so a pending INIT resets us, rather than causing a
864 	 * panic due to an unknown exception.
865 	 */
866 	wrmsrl(MSR_K8_VM_CR, val & ~VM_CR_INIT_REDIRECTION);
867 	asm volatile ( "stgi" ::: "memory" );
868 }
869 
870 /*
871  * cpu_init() initializes state that is per-CPU. Some data is already
872  * initialized (naturally) in the bootstrap process, such as the GDT
873  * and IDT. We reload them nevertheless, this function acts as a
874  * 'CPU state barrier', nothing should get across.
875  */
cpu_init(void)876 void cpu_init(void)
877 {
878 	int cpu = smp_processor_id();
879 
880 	if (cpumask_test_and_set_cpu(cpu, &cpu_initialized)) {
881 		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
882 		for (;;) local_irq_enable();
883 	}
884 	if (opt_cpu_info)
885 		printk("Initializing CPU#%d\n", cpu);
886 
887 	/* Install correct page table. */
888 	write_ptbase(current);
889 
890 	/* Ensure FPU gets initialised for each domain. */
891 	stts();
892 
893 	/* Reset debug registers: */
894 	write_debugreg(0, 0);
895 	write_debugreg(1, 0);
896 	write_debugreg(2, 0);
897 	write_debugreg(3, 0);
898 	write_debugreg(6, X86_DR6_DEFAULT);
899 	write_debugreg(7, X86_DR7_DEFAULT);
900 
901 	if (cpu_has_pku)
902 		wrpkru(0);
903 
904 	/*
905 	 * If the platform is performing a Secure Launch via SKINIT, GIF is
906 	 * clear to prevent external interrupts interfering with Secure
907 	 * Startup.  Re-enable all interrupts now that we are suitably set up.
908 	 *
909 	 * Refer to AMD APM Vol2 15.27 "Secure Startup with SKINIT".
910 	 */
911 	skinit_enable_intr();
912 
913 	/* Enable NMIs.  Our loader (e.g. Tboot) may have left them disabled. */
914 	enable_nmis();
915 }
916 
cpu_uninit(unsigned int cpu)917 void cpu_uninit(unsigned int cpu)
918 {
919 	cpumask_clear_cpu(cpu, &cpu_initialized);
920 }
921 
x86_match_cpu(const struct x86_cpu_id table[])922 const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id table[])
923 {
924     const struct x86_cpu_id *m;
925     const struct cpuinfo_x86 *c = &boot_cpu_data;
926 
927     /*
928      * Although derived from Linux originally, Xen has no valid rows where
929      * ->vendor is zero, so used this in place of checking all metadata.
930      */
931     for ( m = table; m->vendor; m++ )
932     {
933         if ( c->vendor != m->vendor )
934             continue;
935         if ( c->family != m->family )
936             continue;
937         if ( c->model != m->model )
938             continue;
939         if ( !((1U << c->stepping) & m->steppings) )
940             continue;
941         if ( !cpu_has(c, m->feature) )
942             continue;
943 
944         return m;
945     }
946 
947     return NULL;
948 }
949