1 #include <xen/init.h>
2 #include <xen/kernel.h>
3 #include <xen/string.h>
4 #include <xen/bitops.h>
5 #include <xen/smp.h>
6 #include <asm/processor.h>
7 #include <asm/msr.h>
8 #include <asm/uaccess.h>
9 #include <asm/mpspec.h>
10 #include <asm/apic.h>
11 #include <asm/i387.h>
12 #include <mach_apic.h>
13 #include <asm/hvm/support.h>
14 #include <asm/setup.h>
15 
16 #include "cpu.h"
17 
18 #define select_idle_routine(x) ((void)0)
19 
20 /*
21  * Set caps in expected_levelling_cap, probe a specific masking MSR, and set
22  * caps in levelling_caps if it is found, or clobber the MSR index if missing.
23  * If preset, reads the default value into msr_val.
24  */
_probe_mask_msr(unsigned int * msr,uint64_t caps)25 static uint64_t __init _probe_mask_msr(unsigned int *msr, uint64_t caps)
26 {
27 	uint64_t val = 0;
28 
29 	expected_levelling_cap |= caps;
30 
31 	if (rdmsr_safe(*msr, val) || wrmsr_safe(*msr, val))
32 		*msr = 0;
33 	else
34 		levelling_caps |= caps;
35 
36 	return val;
37 }
38 
39 /* Indices of the masking MSRs, or 0 if unavailable. */
40 static unsigned int __read_mostly msr_basic, __read_mostly msr_ext,
41 	__read_mostly msr_xsave;
42 
43 /*
44  * Probe for the existance of the expected masking MSRs.  They might easily
45  * not be available if Xen is running virtualised.
46  */
probe_masking_msrs(void)47 static void __init probe_masking_msrs(void)
48 {
49 	const struct cpuinfo_x86 *c = &boot_cpu_data;
50 	unsigned int exp_msr_basic, exp_msr_ext, exp_msr_xsave;
51 
52 	/* Only family 6 supports this feature. */
53 	if (c->x86 != 6)
54 		return;
55 
56 	switch (c->x86_model) {
57 	case 0x17: /* Yorkfield, Wolfdale, Penryn, Harpertown(DP) */
58 	case 0x1d: /* Dunnington(MP) */
59 		msr_basic = MSR_INTEL_MASK_V1_CPUID1;
60 		break;
61 
62 	case 0x1a: /* Bloomfield, Nehalem-EP(Gainestown) */
63 	case 0x1e: /* Clarksfield, Lynnfield, Jasper Forest */
64 	case 0x1f: /* Something Nehalem-based - perhaps Auburndale/Havendale? */
65 	case 0x25: /* Arrandale, Clarksdale */
66 	case 0x2c: /* Gulftown, Westmere-EP */
67 	case 0x2e: /* Nehalem-EX(Beckton) */
68 	case 0x2f: /* Westmere-EX */
69 		msr_basic = MSR_INTEL_MASK_V2_CPUID1;
70 		msr_ext   = MSR_INTEL_MASK_V2_CPUID80000001;
71 		break;
72 
73 	case 0x2a: /* SandyBridge */
74 	case 0x2d: /* SandyBridge-E, SandyBridge-EN, SandyBridge-EP */
75 		msr_basic = MSR_INTEL_MASK_V3_CPUID1;
76 		msr_ext   = MSR_INTEL_MASK_V3_CPUID80000001;
77 		msr_xsave = MSR_INTEL_MASK_V3_CPUIDD_01;
78 		break;
79 	}
80 
81 	exp_msr_basic = msr_basic;
82 	exp_msr_ext   = msr_ext;
83 	exp_msr_xsave = msr_xsave;
84 
85 	if (msr_basic)
86 		cpuidmask_defaults._1cd = _probe_mask_msr(&msr_basic, LCAP_1cd);
87 
88 	if (msr_ext)
89 		cpuidmask_defaults.e1cd = _probe_mask_msr(&msr_ext, LCAP_e1cd);
90 
91 	if (msr_xsave)
92 		cpuidmask_defaults.Da1 = _probe_mask_msr(&msr_xsave, LCAP_Da1);
93 
94 	/*
95 	 * Don't bother warning about a mismatch if virtualised.  These MSRs
96 	 * are not architectural and almost never virtualised.
97 	 */
98 	if ((expected_levelling_cap == levelling_caps) ||
99 	    cpu_has_hypervisor)
100 		return;
101 
102 	printk(XENLOG_WARNING "Mismatch between expected (%#x) "
103 	       "and real (%#x) levelling caps: missing %#x\n",
104 	       expected_levelling_cap, levelling_caps,
105 	       (expected_levelling_cap ^ levelling_caps) & levelling_caps);
106 	printk(XENLOG_WARNING "Fam %#x, model %#x expected (%#x/%#x/%#x), "
107 	       "got (%#x/%#x/%#x)\n", c->x86, c->x86_model,
108 	       exp_msr_basic, exp_msr_ext, exp_msr_xsave,
109 	       msr_basic, msr_ext, msr_xsave);
110 	printk(XENLOG_WARNING
111 	       "If not running virtualised, please report a bug\n");
112 }
113 
114 /*
115  * Context switch CPUID masking state to the next domain.  Only called if
116  * CPUID Faulting isn't available, but masking MSRs have been detected.  A
117  * parameter of NULL is used to context switch to the default host state (by
118  * the cpu bringup-code, crash path, etc).
119  */
intel_ctxt_switch_masking(const struct vcpu * next)120 static void intel_ctxt_switch_masking(const struct vcpu *next)
121 {
122 	struct cpuidmasks *these_masks = &this_cpu(cpuidmasks);
123 	const struct domain *nextd = next ? next->domain : NULL;
124 	const struct cpuidmasks *masks =
125 		(nextd && is_pv_domain(nextd) && nextd->arch.pv_domain.cpuidmasks)
126 		? nextd->arch.pv_domain.cpuidmasks : &cpuidmask_defaults;
127 
128         if (msr_basic) {
129 		uint64_t val = masks->_1cd;
130 
131 		/*
132 		 * OSXSAVE defaults to 1, which causes fast-forwarding of
133 		 * Xen's real setting.  Clobber it if disabled by the guest
134 		 * kernel.
135 		 */
136 		if (next && is_pv_vcpu(next) && !is_idle_vcpu(next) &&
137 		    !(next->arch.pv_vcpu.ctrlreg[4] & X86_CR4_OSXSAVE))
138 			val &= ~(uint64_t)cpufeat_mask(X86_FEATURE_OSXSAVE);
139 
140 		if (unlikely(these_masks->_1cd != val)) {
141 			wrmsrl(msr_basic, val);
142 			these_masks->_1cd = val;
143 		}
144         }
145 
146 #define LAZY(msr, field)						\
147 	({								\
148 		if (unlikely(these_masks->field != masks->field) &&	\
149 		    (msr))						\
150 		{							\
151 			wrmsrl((msr), masks->field);			\
152 			these_masks->field = masks->field;		\
153 		}							\
154 	})
155 
156 	LAZY(msr_ext,   e1cd);
157 	LAZY(msr_xsave, Da1);
158 
159 #undef LAZY
160 }
161 
162 /*
163  * opt_cpuid_mask_ecx/edx: cpuid.1[ecx, edx] feature mask.
164  * For example, E8400[Intel Core 2 Duo Processor series] ecx = 0x0008E3FD,
165  * edx = 0xBFEBFBFF when executing CPUID.EAX = 1 normally. If you want to
166  * 'rev down' to E8400, you can set these values in these Xen boot parameters.
167  */
intel_init_levelling(void)168 static void __init noinline intel_init_levelling(void)
169 {
170 	if (probe_cpuid_faulting())
171 		return;
172 
173 	probe_masking_msrs();
174 
175 	if (msr_basic) {
176 		uint32_t ecx, edx, tmp;
177 
178 		cpuid(0x00000001, &tmp, &tmp, &ecx, &edx);
179 
180 		ecx &= opt_cpuid_mask_ecx;
181 		edx &= opt_cpuid_mask_edx;
182 
183 		/* Fast-forward bits - Must be set. */
184 		if (ecx & cpufeat_mask(X86_FEATURE_XSAVE))
185 			ecx |= cpufeat_mask(X86_FEATURE_OSXSAVE);
186 		edx |= cpufeat_mask(X86_FEATURE_APIC);
187 
188 		cpuidmask_defaults._1cd &= ((u64)edx << 32) | ecx;
189 	}
190 
191 	if (msr_ext) {
192 		uint32_t ecx, edx, tmp;
193 
194 		cpuid(0x80000001, &tmp, &tmp, &ecx, &edx);
195 
196 		ecx &= opt_cpuid_mask_ext_ecx;
197 		edx &= opt_cpuid_mask_ext_edx;
198 
199 		cpuidmask_defaults.e1cd &= ((u64)edx << 32) | ecx;
200 	}
201 
202 	if (msr_xsave) {
203 		uint32_t eax, tmp;
204 
205 		cpuid_count(0x0000000d, 1, &eax, &tmp, &tmp, &tmp);
206 
207 		eax &= opt_cpuid_mask_xsave_eax;
208 
209 		cpuidmask_defaults.Da1 &= (~0ULL << 32) | eax;
210 	}
211 
212 	if (opt_cpu_info) {
213 		printk(XENLOG_INFO "Levelling caps: %#x\n", levelling_caps);
214 
215 		if (!cpu_has_cpuid_faulting)
216 			printk(XENLOG_INFO
217 			       "MSR defaults: 1d 0x%08x, 1c 0x%08x, e1d 0x%08x, "
218 			       "e1c 0x%08x, Da1 0x%08x\n",
219 			       (uint32_t)(cpuidmask_defaults._1cd >> 32),
220 			       (uint32_t)cpuidmask_defaults._1cd,
221 			       (uint32_t)(cpuidmask_defaults.e1cd >> 32),
222 			       (uint32_t)cpuidmask_defaults.e1cd,
223 			       (uint32_t)cpuidmask_defaults.Da1);
224 	}
225 
226 	if (levelling_caps)
227 		ctxt_switch_masking = intel_ctxt_switch_masking;
228 }
229 
early_init_intel(struct cpuinfo_x86 * c)230 static void early_init_intel(struct cpuinfo_x86 *c)
231 {
232 	/* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
233 	if (c->x86 == 15 && c->x86_cache_alignment == 64)
234 		c->x86_cache_alignment = 128;
235 
236 	/* Unmask CPUID levels and NX if masked: */
237 	if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
238 		u64 misc_enable, disable;
239 
240 		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
241 
242 		disable = misc_enable & (MSR_IA32_MISC_ENABLE_LIMIT_CPUID |
243 					 MSR_IA32_MISC_ENABLE_XD_DISABLE);
244 		if (disable) {
245 			wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & ~disable);
246 			bootsym(trampoline_misc_enable_off) |= disable;
247 		}
248 
249 		if (disable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID)
250 			printk(KERN_INFO "revised cpuid level: %d\n",
251 			       cpuid_eax(0));
252 		if (disable & MSR_IA32_MISC_ENABLE_XD_DISABLE) {
253 			write_efer(read_efer() | EFER_NX);
254 			printk(KERN_INFO
255 			       "re-enabled NX (Execute Disable) protection\n");
256 		}
257 	}
258 
259 	/* CPUID workaround for Intel 0F33/0F34 CPU */
260 	if (boot_cpu_data.x86 == 0xF && boot_cpu_data.x86_model == 3 &&
261 	    (boot_cpu_data.x86_mask == 3 || boot_cpu_data.x86_mask == 4))
262 		paddr_bits = 36;
263 
264 	if (c == &boot_cpu_data)
265 		intel_init_levelling();
266 
267 	ctxt_switch_levelling(NULL);
268 }
269 
270 /*
271  * P4 Xeon errata 037 workaround.
272  * Hardware prefetcher may cause stale data to be loaded into the cache.
273  *
274  * Xeon 7400 erratum AAI65 (and further newer Xeons)
275  * MONITOR/MWAIT may have excessive false wakeups
276  */
Intel_errata_workarounds(struct cpuinfo_x86 * c)277 static void Intel_errata_workarounds(struct cpuinfo_x86 *c)
278 {
279 	unsigned long lo, hi;
280 
281 	if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
282 		rdmsr (MSR_IA32_MISC_ENABLE, lo, hi);
283 		if ((lo & (1<<9)) == 0) {
284 			printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
285 			printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
286 			lo |= (1<<9);	/* Disable hw prefetching */
287 			wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
288 		}
289 	}
290 
291 	if (c->x86 == 6 && cpu_has_clflush &&
292 	    (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
293 		__set_bit(X86_FEATURE_CLFLUSH_MONITOR, c->x86_capability);
294 }
295 
296 
297 /*
298  * find out the number of processor cores on the die
299  */
num_cpu_cores(struct cpuinfo_x86 * c)300 static int num_cpu_cores(struct cpuinfo_x86 *c)
301 {
302 	unsigned int eax, ebx, ecx, edx;
303 
304 	if (c->cpuid_level < 4)
305 		return 1;
306 
307 	/* Intel has a non-standard dependency on %ecx for this CPUID level. */
308 	cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
309 	if (eax & 0x1f)
310 		return ((eax >> 26) + 1);
311 	else
312 		return 1;
313 }
314 
init_intel(struct cpuinfo_x86 * c)315 static void init_intel(struct cpuinfo_x86 *c)
316 {
317 	unsigned int l2 = 0;
318 
319 	/* Detect the extended topology information if available */
320 	detect_extended_topology(c);
321 
322 	select_idle_routine(c);
323 	l2 = init_intel_cacheinfo(c);
324 	if (c->cpuid_level > 9) {
325 		unsigned eax = cpuid_eax(10);
326 		/* Check for version and the number of counters */
327 		if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
328 			__set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
329 	}
330 
331 	if ( !cpu_has(c, X86_FEATURE_XTOPOLOGY) )
332 	{
333 		c->x86_max_cores = num_cpu_cores(c);
334 		detect_ht(c);
335 	}
336 
337 	/* Work around errata */
338 	Intel_errata_workarounds(c);
339 
340 	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
341 		(c->x86 == 0x6 && c->x86_model >= 0x0e))
342 		__set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
343 	if (cpu_has(c, X86_FEATURE_ITSC)) {
344 		__set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
345 		__set_bit(X86_FEATURE_NONSTOP_TSC, c->x86_capability);
346 		__set_bit(X86_FEATURE_TSC_RELIABLE, c->x86_capability);
347 	}
348 	if ( opt_arat &&
349 	     ( c->cpuid_level >= 0x00000006 ) &&
350 	     ( cpuid_eax(0x00000006) & (1u<<2) ) )
351 		__set_bit(X86_FEATURE_ARAT, c->x86_capability);
352 }
353 
354 static const struct cpu_dev intel_cpu_dev = {
355 	.c_vendor	= "Intel",
356 	.c_ident 	= { "GenuineIntel" },
357 	.c_early_init	= early_init_intel,
358 	.c_init		= init_intel,
359 };
360 
intel_cpu_init(void)361 int __init intel_cpu_init(void)
362 {
363 	cpu_devs[X86_VENDOR_INTEL] = &intel_cpu_dev;
364 	return 0;
365 }
366 
367 // arch_initcall(intel_cpu_init);
368 
369