1 /* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
2 because MTRRs can span upto 40 bits (36bits on most modern x86) */
3 #include <xen/lib.h>
4 #include <xen/init.h>
5 #include <xen/mm.h>
6 #include <xen/param.h>
7 #include <xen/stdbool.h>
8 #include <asm/flushtlb.h>
9 #include <asm/invpcid.h>
10 #include <asm/io.h>
11 #include <asm/mtrr.h>
12 #include <asm/msr.h>
13 #include <asm/system.h>
14 #include <asm/cpufeature.h>
15 #include "mtrr.h"
16
17 static const struct fixed_range_block {
18 uint32_t base_msr; /* start address of an MTRR block */
19 unsigned int ranges; /* number of MTRRs in this block */
20 } fixed_range_blocks[] = {
21 { MSR_MTRRfix64K_00000, (0x80000 - 0x00000) >> (16 + 3) },
22 { MSR_MTRRfix16K_80000, (0xC0000 - 0x80000) >> (14 + 3) },
23 { MSR_MTRRfix4K_C0000, (0x100000 - 0xC0000) >> (12 + 3) },
24 {}
25 };
26
27 static unsigned long smp_changes_mask;
28 struct mtrr_state mtrr_state = {};
29
30 /* Get the MSR pair relating to a var range */
31 static void
get_mtrr_var_range(unsigned int index,struct mtrr_var_range * vr)32 get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
33 {
34 rdmsrl(MSR_IA32_MTRR_PHYSBASE(index), vr->base);
35 rdmsrl(MSR_IA32_MTRR_PHYSMASK(index), vr->mask);
36 }
37
38 static void
get_fixed_ranges(mtrr_type * frs)39 get_fixed_ranges(mtrr_type * frs)
40 {
41 uint64_t *p = (uint64_t *) frs;
42 const struct fixed_range_block *block;
43
44 if (!mtrr_state.have_fixed)
45 return;
46
47 for (block = fixed_range_blocks; block->ranges; ++block) {
48 unsigned int i;
49
50 for (i = 0; i < block->ranges; ++i, ++p)
51 rdmsrl(block->base_msr + i, *p);
52 }
53 }
54
is_var_mtrr_overlapped(const struct mtrr_state * m)55 bool is_var_mtrr_overlapped(const struct mtrr_state *m)
56 {
57 unsigned int seg, i;
58 unsigned int num_var_ranges = MASK_EXTR(m->mtrr_cap, MTRRcap_VCNT);
59
60 for ( i = 0; i < num_var_ranges; i++ )
61 {
62 uint64_t base1 = m->var_ranges[i].base >> PAGE_SHIFT;
63 uint64_t mask1 = m->var_ranges[i].mask >> PAGE_SHIFT;
64
65 if ( !(m->var_ranges[i].mask & MTRR_PHYSMASK_VALID) )
66 continue;
67
68 for ( seg = i + 1; seg < num_var_ranges; seg++ )
69 {
70 uint64_t base2 = m->var_ranges[seg].base >> PAGE_SHIFT;
71 uint64_t mask2 = m->var_ranges[seg].mask >> PAGE_SHIFT;
72
73 if ( !(m->var_ranges[seg].mask & MTRR_PHYSMASK_VALID) )
74 continue;
75
76 if ( (base1 & mask1 & mask2) == (base2 & mask2 & mask1) )
77 {
78 /* MTRRs overlap. */
79 return true;
80 }
81 }
82 }
83
84 return false;
85 }
86
mtrr_save_fixed_ranges(void * info)87 void cf_check mtrr_save_fixed_ranges(void *info)
88 {
89 get_fixed_ranges(mtrr_state.fixed_ranges);
90 }
91
92 /* Grab all of the MTRR state for this CPU into *state */
get_mtrr_state(void)93 void __init get_mtrr_state(void)
94 {
95 unsigned int i;
96 struct mtrr_var_range *vrs;
97 uint64_t msr_content;
98
99 if (!mtrr_state.var_ranges) {
100 mtrr_state.var_ranges = xmalloc_array(struct mtrr_var_range,
101 num_var_ranges);
102 if (!mtrr_state.var_ranges)
103 return;
104 }
105 vrs = mtrr_state.var_ranges;
106
107 rdmsrl(MSR_MTRRcap, msr_content);
108 mtrr_state.have_fixed = (msr_content >> 8) & 1;
109
110 for (i = 0; i < num_var_ranges; i++)
111 get_mtrr_var_range(i, &vrs[i]);
112 get_fixed_ranges(mtrr_state.fixed_ranges);
113
114 rdmsrl(MSR_MTRRdefType, msr_content);
115 mtrr_state.def_type = (msr_content & 0xff);
116 mtrr_state.enabled = MASK_EXTR(msr_content, MTRRdefType_E);
117 mtrr_state.fixed_enabled = MASK_EXTR(msr_content, MTRRdefType_FE);
118
119 /* Store mtrr_cap for HVM MTRR virtualisation. */
120 rdmsrl(MSR_MTRRcap, mtrr_state.mtrr_cap);
121 }
122
123 static bool __initdata mtrr_show;
124 boolean_param("mtrr.show", mtrr_show);
125
mtrr_attrib_to_str(mtrr_type x)126 static const char *__init mtrr_attrib_to_str(mtrr_type x)
127 {
128 static const char __initconst strings[MTRR_NUM_TYPES][16] =
129 {
130 [X86_MT_UC] = "uncachable",
131 [X86_MT_WC] = "write-combining",
132 [X86_MT_WT] = "write-through",
133 [X86_MT_WP] = "write-protect",
134 [X86_MT_WB] = "write-back",
135 };
136
137 return (x < ARRAY_SIZE(strings) && strings[x][0]) ? strings[x] : "?";
138 }
139
140 static unsigned int __initdata last_fixed_start;
141 static unsigned int __initdata last_fixed_end;
142 static mtrr_type __initdata last_fixed_type;
143
print_fixed_last(const char * level)144 static void __init print_fixed_last(const char *level)
145 {
146 if (!last_fixed_end)
147 return;
148
149 printk("%s %05x-%05x %s\n", level, last_fixed_start,
150 last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type));
151
152 last_fixed_end = 0;
153 }
154
update_fixed_last(unsigned int base,unsigned int end,mtrr_type type)155 static void __init update_fixed_last(unsigned int base, unsigned int end,
156 mtrr_type type)
157 {
158 last_fixed_start = base;
159 last_fixed_end = end;
160 last_fixed_type = type;
161 }
162
print_fixed(unsigned int base,unsigned int step,const mtrr_type * types,const char * level)163 static void __init print_fixed(unsigned int base, unsigned int step,
164 const mtrr_type *types, const char *level)
165 {
166 unsigned i;
167
168 for (i = 0; i < 8; ++i, ++types, base += step) {
169 if (last_fixed_end == 0) {
170 update_fixed_last(base, base + step, *types);
171 continue;
172 }
173 if (last_fixed_end == base && last_fixed_type == *types) {
174 last_fixed_end = base + step;
175 continue;
176 }
177 /* new segments: gap or different type */
178 print_fixed_last(level);
179 update_fixed_last(base, base + step, *types);
180 }
181 }
182
print_mtrr_state(const char * level)183 static void __init print_mtrr_state(const char *level)
184 {
185 unsigned int i;
186 int width;
187
188 printk("%sMTRR default type: %s\n", level,
189 mtrr_attrib_to_str(mtrr_state.def_type));
190 if (mtrr_state.have_fixed) {
191 const mtrr_type *fr = mtrr_state.fixed_ranges;
192 const struct fixed_range_block *block = fixed_range_blocks;
193 unsigned int base = 0, step = 0x10000;
194
195 printk("%sMTRR fixed ranges %sabled:\n", level,
196 mtrr_state.fixed_enabled ? "en" : "dis");
197 for (; block->ranges; ++block, step >>= 2) {
198 for (i = 0; i < block->ranges; ++i, fr += 8) {
199 print_fixed(base, step, fr, level);
200 base += 8 * step;
201 }
202 }
203 print_fixed_last(level);
204 }
205 printk("%sMTRR variable ranges %sabled:\n", level,
206 mtrr_state.enabled ? "en" : "dis");
207 width = (paddr_bits - PAGE_SHIFT + 3) / 4;
208
209 for (i = 0; i < num_var_ranges; ++i) {
210 if (mtrr_state.var_ranges[i].mask & MTRR_PHYSMASK_VALID)
211 printk("%s %u base %0*"PRIx64"000 mask %0*"PRIx64"000 %s\n",
212 level, i,
213 width, mtrr_state.var_ranges[i].base >> 12,
214 width, mtrr_state.var_ranges[i].mask >> 12,
215 mtrr_attrib_to_str(mtrr_state.var_ranges[i].base &
216 MTRR_PHYSBASE_TYPE_MASK));
217 else
218 printk("%s %u disabled\n", level, i);
219 }
220
221 if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
222 boot_cpu_data.x86 >= 0xf) ||
223 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
224 uint64_t syscfg, tom2;
225
226 rdmsrl(MSR_K8_SYSCFG, syscfg);
227 if (syscfg & SYSCFG_MTRR_TOM2_EN) {
228 rdmsrl(MSR_K8_TOP_MEM2, tom2);
229 printk("%sTOM2: %012"PRIx64"%s\n", level, tom2,
230 syscfg & (1 << 22) ? " (WB)" : "");
231 }
232 }
233 }
234
235 /* Some BIOS's are fucked and don't set all MTRRs the same! */
mtrr_state_warn(void)236 void __init mtrr_state_warn(void)
237 {
238 unsigned long mask = smp_changes_mask;
239
240 if (mtrr_show)
241 print_mtrr_state(mask ? KERN_WARNING : "");
242 if (!mask)
243 return;
244 if (mask & MTRR_CHANGE_MASK_FIXED)
245 printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed MTRR settings\n");
246 if (mask & MTRR_CHANGE_MASK_VARIABLE)
247 printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable MTRR settings\n");
248 if (mask & MTRR_CHANGE_MASK_DEFTYPE)
249 printk(KERN_WARNING "mtrr: your CPUs had inconsistent MTRRdefType settings\n");
250 printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
251 printk(KERN_INFO "mtrr: corrected configuration.\n");
252 if (!mtrr_show)
253 print_mtrr_state(KERN_INFO);
254 }
255
256 /* Doesn't attempt to pass an error out to MTRR users
257 because it's quite complicated in some cases and probably not
258 worth it because the best error handling is to ignore it. */
mtrr_wrmsr(unsigned int msr,uint64_t msr_content)259 static void mtrr_wrmsr(unsigned int msr, uint64_t msr_content)
260 {
261 if (wrmsr_safe(msr, msr_content) < 0)
262 printk(KERN_ERR
263 "MTRR: CPU %u: Writing MSR %x to %"PRIx64" failed\n",
264 smp_processor_id(), msr, msr_content);
265 /* Cache overlap status for efficient HVM MTRR virtualisation. */
266 mtrr_state.overlapped = is_var_mtrr_overlapped(&mtrr_state);
267 }
268
269 /**
270 * Checks and updates an fixed-range MTRR if it differs from the value it
271 * should have. If K8 extenstions are wanted, update the K8 SYSCFG MSR also.
272 * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information
273 * \param msr MSR address of the MTTR which should be checked and updated
274 * \param changed pointer which indicates whether the MTRR needed to be changed
275 * \param msrwords pointer to the MSR values which the MSR should have
276 */
set_fixed_range(int msr,bool * changed,unsigned int * msrwords)277 static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
278 {
279 uint64_t msr_content, val;
280
281 rdmsrl(msr, msr_content);
282 val = ((uint64_t)msrwords[1] << 32) | msrwords[0];
283
284 if (msr_content != val) {
285 mtrr_wrmsr(msr, val);
286 *changed = true;
287 }
288 }
289
mtrr_get_free_region(unsigned long base,unsigned long size,int replace_reg)290 int mtrr_get_free_region(
291 unsigned long base, unsigned long size, int replace_reg)
292 /* [SUMMARY] Get a free MTRR.
293 <base> The starting (base) address of the region.
294 <size> The size (in bytes) of the region.
295 [RETURNS] The index of the region on success, else -1 on error.
296 */
297 {
298 int i, max;
299 mtrr_type ltype;
300 unsigned long lbase, lsize;
301
302 max = num_var_ranges;
303 if (replace_reg >= 0 && replace_reg < max)
304 return replace_reg;
305 for (i = 0; i < max; ++i) {
306 mtrr_get(i, &lbase, &lsize, <ype);
307 if (lsize == 0)
308 return i;
309 }
310 return -ENOSPC;
311 }
312
mtrr_get(unsigned int reg,unsigned long * base,unsigned long * size,mtrr_type * type)313 void mtrr_get(
314 unsigned int reg, unsigned long *base, unsigned long *size, mtrr_type *type)
315 {
316 uint64_t _mask, _base;
317
318 rdmsrl(MSR_IA32_MTRR_PHYSMASK(reg), _mask);
319 if (!(_mask & MTRR_PHYSMASK_VALID)) {
320 /* Invalid (i.e. free) range */
321 *base = 0;
322 *size = 0;
323 *type = 0;
324 return;
325 }
326
327 rdmsrl(MSR_IA32_MTRR_PHYSBASE(reg), _base);
328
329 /* Work out the shifted address mask. */
330 _mask = size_or_mask | (_mask >> PAGE_SHIFT);
331
332 /* This works correctly if size is a power of two, i.e. a
333 contiguous range. */
334 *size = -(uint32_t)_mask;
335 *base = _base >> PAGE_SHIFT;
336 *type = _base & 0xff;
337 }
338
339 /**
340 * Checks and updates the fixed-range MTRRs if they differ from the saved set
341 * \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges()
342 */
set_fixed_ranges(mtrr_type * frs)343 static bool set_fixed_ranges(mtrr_type *frs)
344 {
345 unsigned long long *saved = (unsigned long long *) frs;
346 bool changed = false;
347 int block=-1, range;
348
349 while (fixed_range_blocks[++block].ranges)
350 for (range=0; range < fixed_range_blocks[block].ranges; range++)
351 set_fixed_range(fixed_range_blocks[block].base_msr + range,
352 &changed, (unsigned int *) saved++);
353
354 return changed;
355 }
356
357 /* Set the MSR pair relating to a var range. Returns true if
358 changes are made */
set_mtrr_var_ranges(unsigned int index,struct mtrr_var_range * vr)359 static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
360 {
361 uint32_t lo, hi, base_lo, base_hi, mask_lo, mask_hi;
362 uint64_t msr_content;
363 bool changed = false;
364
365 rdmsrl(MSR_IA32_MTRR_PHYSBASE(index), msr_content);
366 lo = (uint32_t)msr_content;
367 hi = (uint32_t)(msr_content >> 32);
368 base_lo = (uint32_t)vr->base;
369 base_hi = (uint32_t)(vr->base >> 32);
370
371 lo &= 0xfffff0ffUL;
372 base_lo &= 0xfffff0ffUL;
373 hi &= size_and_mask >> (32 - PAGE_SHIFT);
374 base_hi &= size_and_mask >> (32 - PAGE_SHIFT);
375
376 if ((base_lo != lo) || (base_hi != hi)) {
377 mtrr_wrmsr(MSR_IA32_MTRR_PHYSBASE(index), vr->base);
378 changed = true;
379 }
380
381 rdmsrl(MSR_IA32_MTRR_PHYSMASK(index), msr_content);
382 lo = (uint32_t)msr_content;
383 hi = (uint32_t)(msr_content >> 32);
384 mask_lo = (uint32_t)vr->mask;
385 mask_hi = (uint32_t)(vr->mask >> 32);
386
387 lo &= 0xfffff800UL;
388 mask_lo &= 0xfffff800UL;
389 hi &= size_and_mask >> (32 - PAGE_SHIFT);
390 mask_hi &= size_and_mask >> (32 - PAGE_SHIFT);
391
392 if ((mask_lo != lo) || (mask_hi != hi)) {
393 mtrr_wrmsr(MSR_IA32_MTRR_PHYSMASK(index), vr->mask);
394 changed = true;
395 }
396 return changed;
397 }
398
399 static uint64_t deftype;
400
set_mtrr_state(void)401 static unsigned long set_mtrr_state(void)
402 /* [SUMMARY] Set the MTRR state for this CPU.
403 <state> The MTRR state information to read.
404 <ctxt> Some relevant CPU context.
405 [NOTE] The CPU must already be in a safe state for MTRR changes.
406 [RETURNS] 0 if no changes made, else a mask indication what was changed.
407 */
408 {
409 unsigned int i;
410 unsigned long change_mask = 0;
411
412 for (i = 0; i < num_var_ranges; i++)
413 if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i]))
414 change_mask |= MTRR_CHANGE_MASK_VARIABLE;
415
416 if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges))
417 change_mask |= MTRR_CHANGE_MASK_FIXED;
418
419 /* Set_mtrr_restore restores the old value of MTRRdefType,
420 so to set it we fiddle with the saved value */
421 if ((deftype & 0xff) != mtrr_state.def_type
422 || MASK_EXTR(deftype, MTRRdefType_E) != mtrr_state.enabled
423 || MASK_EXTR(deftype, MTRRdefType_FE) != mtrr_state.fixed_enabled) {
424 deftype = (deftype & ~0xcff) | mtrr_state.def_type |
425 MASK_INSR(mtrr_state.enabled, MTRRdefType_E) |
426 MASK_INSR(mtrr_state.fixed_enabled, MTRRdefType_FE);
427 change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
428 }
429
430 return change_mask;
431 }
432
433
434 static DEFINE_SPINLOCK(set_atomicity_lock);
435
436 /*
437 * Since we are disabling the cache don't allow any interrupts - they
438 * would run extremely slow and would only increase the pain. The caller must
439 * ensure that local interrupts are disabled and are reenabled after post_set()
440 * has been called.
441 */
442
prepare_set(void)443 static bool prepare_set(void)
444 {
445 unsigned long cr4;
446
447 /* Note that this is not ideal, since the cache is only flushed/disabled
448 for this CPU while the MTRRs are changed, but changing this requires
449 more invasive changes to the way the kernel boots */
450
451 spin_lock(&set_atomicity_lock);
452
453 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
454 write_cr0(read_cr0() | X86_CR0_CD);
455
456 /*
457 * Cache flushing is the most time-consuming step when programming
458 * the MTRRs. Fortunately, as per the Intel Software Development
459 * Manual, we can skip it if the processor supports cache self-
460 * snooping.
461 */
462 alternative("wbinvd", "", X86_FEATURE_XEN_SELFSNOOP);
463
464 cr4 = read_cr4();
465 if (cr4 & X86_CR4_PGE)
466 write_cr4(cr4 & ~X86_CR4_PGE);
467 else if (use_invpcid)
468 invpcid_flush_all();
469 else
470 write_cr3(read_cr3());
471
472 /* Save MTRR state */
473 rdmsrl(MSR_MTRRdefType, deftype);
474
475 /* Disable MTRRs, and set the default type to uncached */
476 mtrr_wrmsr(MSR_MTRRdefType, deftype & ~0xcff);
477
478 /* Again, only flush caches if we have to. */
479 alternative("wbinvd", "", X86_FEATURE_XEN_SELFSNOOP);
480
481 return cr4 & X86_CR4_PGE;
482 }
483
post_set(bool pge)484 static void post_set(bool pge)
485 {
486 /* Intel (P6) standard MTRRs */
487 mtrr_wrmsr(MSR_MTRRdefType, deftype);
488
489 /* Enable caches */
490 write_cr0(read_cr0() & ~X86_CR0_CD);
491
492 /* Reenable CR4.PGE (also flushes the TLB) */
493 if (pge)
494 write_cr4(read_cr4() | X86_CR4_PGE);
495 else if (use_invpcid)
496 invpcid_flush_all();
497 else
498 write_cr3(read_cr3());
499
500 spin_unlock(&set_atomicity_lock);
501 }
502
mtrr_set_all(void)503 void mtrr_set_all(void)
504 {
505 unsigned long mask, count;
506 unsigned long flags;
507 bool pge;
508
509 local_irq_save(flags);
510 pge = prepare_set();
511
512 /* Actually set the state */
513 mask = set_mtrr_state();
514
515 post_set(pge);
516 local_irq_restore(flags);
517
518 /* Use the atomic bitops to update the global mask */
519 for (count = 0; count < sizeof mask * 8; ++count) {
520 if (mask & 0x01)
521 set_bit(count, &smp_changes_mask);
522 mask >>= 1;
523 }
524 }
525
mtrr_set(unsigned int reg,unsigned long base,unsigned long size,mtrr_type type)526 void mtrr_set(
527 unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
528 /* [SUMMARY] Set variable MTRR register on the local CPU.
529 <reg> The register to set.
530 <base> The base address of the region.
531 <size> The size of the region. If this is 0 the region is disabled.
532 <type> The type of the region.
533 <do_safe> If true, do the change safely. If false, safety measures should
534 be done externally.
535 [RETURNS] Nothing.
536 */
537 {
538 unsigned long flags;
539 struct mtrr_var_range *vr;
540 bool pge;
541
542 vr = &mtrr_state.var_ranges[reg];
543
544 local_irq_save(flags);
545 pge = prepare_set();
546
547 if (size == 0) {
548 /* The invalid bit is kept in the mask, so we simply clear the
549 relevant mask register to disable a range. */
550 memset(vr, 0, sizeof(*vr));
551 mtrr_wrmsr(MSR_IA32_MTRR_PHYSMASK(reg), 0);
552 } else {
553 uint32_t base_lo, base_hi, mask_lo, mask_hi;
554
555 base_lo = base << PAGE_SHIFT | type;
556 base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT);
557 mask_lo = (-size << PAGE_SHIFT) | MTRR_PHYSMASK_VALID;
558 mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT);
559 vr->base = ((uint64_t)base_hi << 32) | base_lo;
560 vr->mask = ((uint64_t)mask_hi << 32) | mask_lo;
561
562 mtrr_wrmsr(MSR_IA32_MTRR_PHYSBASE(reg), vr->base);
563 mtrr_wrmsr(MSR_IA32_MTRR_PHYSMASK(reg), vr->mask);
564 }
565
566 post_set(pge);
567 local_irq_restore(flags);
568 }
569
mtrr_validate_add_page(unsigned long base,unsigned long size,unsigned int type)570 int mtrr_validate_add_page(
571 unsigned long base, unsigned long size, unsigned int type)
572 {
573 unsigned long lbase, last;
574
575 /* Check upper bits of base and last are equal and lower bits are 0
576 for base and 1 for last */
577 last = base + size - 1;
578 for (lbase = base; !(lbase & 1) && (last & 1);
579 lbase = lbase >> 1, last = last >> 1) ;
580 if (lbase != last) {
581 printk(KERN_WARNING "mtrr: base(%#lx000) is not aligned on a size(%#lx000) boundary\n",
582 base, size);
583 return -EINVAL;
584 }
585 return 0;
586 }
587
588
mtrr_have_wrcomb(void)589 bool mtrr_have_wrcomb(void)
590 {
591 unsigned long config;
592 rdmsrl(MSR_MTRRcap, config);
593 return (config & (1ULL << 10));
594 }
595