1 /*  Generic MTRR (Memory Type Range Register) driver.
2 
3     Copyright (C) 1997-2000  Richard Gooch
4     Copyright (c) 2002	     Patrick Mochel
5 
6     This library is free software; you can redistribute it and/or
7     modify it under the terms of the GNU Library General Public
8     License as published by the Free Software Foundation; either
9     version 2 of the License, or (at your option) any later version.
10 
11     This library is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14     Library General Public License for more details.
15 
16     You should have received a copy of the GNU Library General Public
17     License along with this library; If not, see <http://www.gnu.org/licenses/>.
18 
19     Richard Gooch may be reached by email at  rgooch@atnf.csiro.au
20     The postal address is:
21       Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
22 
23     Source: "Pentium Pro Family Developer's Manual, Volume 3:
24     Operating System Writer's Guide" (Intel document number 242692),
25     section 11.11.7
26 
27     This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
28     on 6-7 March 2002.
29     Source: Intel Architecture Software Developers Manual, Volume 3:
30     System Programming Guide; Section 9.11. (1997 edition - PPro).
31 */
32 
33 #include <xen/init.h>
34 #include <xen/lib.h>
35 #include <xen/smp.h>
36 #include <xen/spinlock.h>
37 #include <asm/atomic.h>
38 #include <asm/mtrr.h>
39 #include <asm/uaccess.h>
40 #include <asm/processor.h>
41 #include <asm/msr.h>
42 #include "mtrr.h"
43 
44 /* No blocking mutexes in Xen. Spin instead. */
45 #define DEFINE_MUTEX(_m) DEFINE_SPINLOCK(_m)
46 #define mutex_lock(_m) spin_lock(_m)
47 #define mutex_unlock(_m) spin_unlock(_m)
48 #define dump_stack() ((void)0)
49 #define	get_cpu()	smp_processor_id()
50 #define put_cpu()	do {} while(0)
51 
52 u32 __read_mostly num_var_ranges = 0;
53 
54 unsigned int *__read_mostly usage_table;
55 static DEFINE_MUTEX(mtrr_mutex);
56 
57 u64 __read_mostly size_or_mask;
58 u64 __read_mostly size_and_mask;
59 
60 const struct mtrr_ops *__read_mostly mtrr_if = NULL;
61 
62 static void set_mtrr(unsigned int reg, unsigned long base,
63 		     unsigned long size, mtrr_type type);
64 
65 static const char *const mtrr_strings[MTRR_NUM_TYPES] =
66 {
67     "uncachable",               /* 0 */
68     "write-combining",          /* 1 */
69     "?",                        /* 2 */
70     "?",                        /* 3 */
71     "write-through",            /* 4 */
72     "write-protect",            /* 5 */
73     "write-back",               /* 6 */
74 };
75 
mtrr_attrib_to_str(int x)76 static const char *mtrr_attrib_to_str(int x)
77 {
78 	return (x <= 6) ? mtrr_strings[x] : "?";
79 }
80 
81 /*  Returns non-zero if we have the write-combining memory type  */
have_wrcomb(void)82 static int have_wrcomb(void)
83 {
84 	return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0);
85 }
86 
87 /*  This function returns the number of variable MTRRs  */
set_num_var_ranges(void)88 static void __init set_num_var_ranges(void)
89 {
90 	unsigned long config = 0;
91 
92 	if (use_intel()) {
93 		rdmsrl(MSR_MTRRcap, config);
94 	} else if (is_cpu(AMD))
95 		config = 2;
96 	else if (is_cpu(CENTAUR))
97 		config = 8;
98 	num_var_ranges = config & 0xff;
99 }
100 
init_table(void)101 static void __init init_table(void)
102 {
103 	int i, max;
104 
105 	max = num_var_ranges;
106 	if ((usage_table = xmalloc_array(unsigned int, max)) == NULL) {
107 		printk(KERN_ERR "mtrr: could not allocate\n");
108 		return;
109 	}
110 	for (i = 0; i < max; i++)
111 		usage_table[i] = 1;
112 }
113 
114 struct set_mtrr_data {
115 	atomic_t	count;
116 	atomic_t	gate;
117 	unsigned long	smp_base;
118 	unsigned long	smp_size;
119 	unsigned int	smp_reg;
120 	mtrr_type	smp_type;
121 };
122 
123 /* As per the IA32 SDM vol-3: 10.11.8 MTRR Considerations in MP Systems section
124  * MTRRs updates must to be synchronized across all the processors.
125  * This flags avoids multiple cpu synchronization while booting each cpu.
126  * At the boot & resume time, this flag is turned on in mtrr_aps_sync_begin().
127  * Using this flag the mtrr initialization (and the all cpus sync up) in the
128  * mtrr_ap_init() is avoided while booting each cpu.
129  * After all the cpus have came up, then mtrr_aps_sync_end() synchronizes all
130  * the cpus and updates mtrrs on all of them. Then this flag is turned off.
131  */
132 int hold_mtrr_updates_on_aps;
133 
ipi_handler(void * info)134 static void ipi_handler(void *info)
135 /*  [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
136     [RETURNS] Nothing.
137 */
138 {
139 	struct set_mtrr_data *data = info;
140 	unsigned long flags;
141 
142 	local_irq_save(flags);
143 
144 	atomic_dec(&data->count);
145 	while(!atomic_read(&data->gate))
146 		cpu_relax();
147 
148 	/*  The master has cleared me to execute  */
149 	if (data->smp_reg == ~0U) /* update all mtrr registers */
150 		/* At the cpu hot-add time this will reinitialize mtrr
151  		 * registres on the existing cpus. It is ok.  */
152 		mtrr_if->set_all();
153 	else /* single mtrr register update */
154 		mtrr_if->set(data->smp_reg, data->smp_base,
155 			     data->smp_size, data->smp_type);
156 
157 	atomic_dec(&data->count);
158 	while(atomic_read(&data->gate))
159 		cpu_relax();
160 
161 	atomic_dec(&data->count);
162 	local_irq_restore(flags);
163 }
164 
types_compatible(mtrr_type type1,mtrr_type type2)165 static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
166 	return type1 == MTRR_TYPE_UNCACHABLE ||
167 	       type2 == MTRR_TYPE_UNCACHABLE ||
168 	       (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
169 	       (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH);
170 }
171 
172 /**
173  * set_mtrr - update mtrrs on all processors
174  * @reg:	mtrr in question
175  * @base:	mtrr base
176  * @size:	mtrr size
177  * @type:	mtrr type
178  *
179  * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
180  *
181  * 1. Send IPI to do the following:
182  * 2. Disable Interrupts
183  * 3. Wait for all procs to do so
184  * 4. Enter no-fill cache mode
185  * 5. Flush caches
186  * 6. Clear PGE bit
187  * 7. Flush all TLBs
188  * 8. Disable all range registers
189  * 9. Update the MTRRs
190  * 10. Enable all range registers
191  * 11. Flush all TLBs and caches again
192  * 12. Enter normal cache mode and reenable caching
193  * 13. Set PGE
194  * 14. Wait for buddies to catch up
195  * 15. Enable interrupts.
196  *
197  * What does that mean for us? Well, first we set data.count to the number
198  * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait
199  * until it hits 0 and proceed. We set the data.gate flag and reset data.count.
200  * Meanwhile, they are waiting for that flag to be set. Once it's set, each
201  * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it
202  * differently, so we call mtrr_if->set() callback and let them take care of it.
203  * When they're done, they again decrement data->count and wait for data.gate to
204  * be reset.
205  * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag.
206  * Everyone then enables interrupts and we all continue on.
207  *
208  * Note that the mechanism is the same for UP systems, too; all the SMP stuff
209  * becomes nops.
210  */
set_mtrr(unsigned int reg,unsigned long base,unsigned long size,mtrr_type type)211 static void set_mtrr(unsigned int reg, unsigned long base,
212 		     unsigned long size, mtrr_type type)
213 {
214 	cpumask_t allbutself;
215 	unsigned int nr_cpus;
216 	struct set_mtrr_data data;
217 	unsigned long flags;
218 
219 	cpumask_andnot(&allbutself, &cpu_online_map,
220                       cpumask_of(smp_processor_id()));
221 	nr_cpus = cpumask_weight(&allbutself);
222 
223 	data.smp_reg = reg;
224 	data.smp_base = base;
225 	data.smp_size = size;
226 	data.smp_type = type;
227 	atomic_set(&data.count, nr_cpus);
228 	atomic_set(&data.gate,0);
229 
230 	/* Start the ball rolling on other CPUs */
231 	on_selected_cpus(&allbutself, ipi_handler, &data, 0);
232 
233 	local_irq_save(flags);
234 
235 	while (atomic_read(&data.count))
236 		cpu_relax();
237 
238 	/* ok, reset count and toggle gate */
239 	atomic_set(&data.count, nr_cpus);
240 	smp_wmb();
241 	atomic_set(&data.gate,1);
242 
243 	/* do our MTRR business */
244 
245 	/* HACK!
246 	 * We use this same function to initialize the mtrrs on boot.
247 	 * The state of the boot cpu's mtrrs has been saved, and we want
248 	 * to replicate across all the APs.
249 	 * If we're doing that @reg is set to something special...
250 	 */
251 	if (reg == ~0U)  /* update all mtrr registers */
252 		/* at boot or resume time, this will reinitialize the mtrrs on
253 		 * the bp. It is ok. */
254 		mtrr_if->set_all();
255 	else /* update the single mtrr register */
256 		mtrr_if->set(reg,base,size,type);
257 
258 	/* wait for the others */
259 	while (atomic_read(&data.count))
260 		cpu_relax();
261 
262 	atomic_set(&data.count, nr_cpus);
263 	smp_wmb();
264 	atomic_set(&data.gate,0);
265 
266 	/*
267 	 * Wait here for everyone to have seen the gate change
268 	 * So we're the last ones to touch 'data'
269 	 */
270 	while (atomic_read(&data.count))
271 		cpu_relax();
272 
273 	local_irq_restore(flags);
274 }
275 
276 /**
277  *	mtrr_add_page - Add a memory type region
278  *	@base: Physical base address of region in pages (in units of 4 kB!)
279  *	@size: Physical size of region in pages (4 kB)
280  *	@type: Type of MTRR desired
281  *	@increment: If this is true do usage counting on the region
282  *
283  *	Memory type region registers control the caching on newer Intel and
284  *	non Intel processors. This function allows drivers to request an
285  *	MTRR is added. The details and hardware specifics of each processor's
286  *	implementation are hidden from the caller, but nevertheless the
287  *	caller should expect to need to provide a power of two size on an
288  *	equivalent power of two boundary.
289  *
290  *	If the region cannot be added either because all regions are in use
291  *	or the CPU cannot support it a negative value is returned. On success
292  *	the register number for this entry is returned, but should be treated
293  *	as a cookie only.
294  *
295  *	On a multiprocessor machine the changes are made to all processors.
296  *	This is required on x86 by the Intel processors.
297  *
298  *	The available types are
299  *
300  *	%MTRR_TYPE_UNCACHABLE	-	No caching
301  *
302  *	%MTRR_TYPE_WRBACK	-	Write data back in bursts whenever
303  *
304  *	%MTRR_TYPE_WRCOMB	-	Write data back soon but allow bursts
305  *
306  *	%MTRR_TYPE_WRTHROUGH	-	Cache reads but not writes
307  *
308  *	BUGS: Needs a quiet flag for the cases where drivers do not mind
309  *	failures and do not wish system log messages to be sent.
310  */
311 
mtrr_add_page(unsigned long base,unsigned long size,unsigned int type,char increment)312 int mtrr_add_page(unsigned long base, unsigned long size,
313 		  unsigned int type, char increment)
314 {
315 	int i, replace, error;
316 	mtrr_type ltype;
317 	unsigned long lbase, lsize;
318 
319 	if (!mtrr_if)
320 		return -ENXIO;
321 
322 	if ((error = mtrr_if->validate_add_page(base,size,type)))
323 		return error;
324 
325 	if (type >= MTRR_NUM_TYPES) {
326 		printk(KERN_WARNING "mtrr: type: %u invalid\n", type);
327 		return -EINVAL;
328 	}
329 
330 	/*  If the type is WC, check that this processor supports it  */
331 	if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
332 		printk(KERN_WARNING
333 		       "mtrr: your processor doesn't support write-combining\n");
334 		return -EOPNOTSUPP;
335 	}
336 
337 	if (!size) {
338 		printk(KERN_WARNING "mtrr: zero sized request\n");
339 		return -EINVAL;
340 	}
341 
342 	if ((base | (base + size - 1)) >> (paddr_bits - PAGE_SHIFT)) {
343 		printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n");
344 		return -EINVAL;
345 	}
346 
347 	error = -EINVAL;
348 	replace = -1;
349 
350 	/*  Search for existing MTRR  */
351 	mutex_lock(&mtrr_mutex);
352 	for (i = 0; i < num_var_ranges; ++i) {
353 		mtrr_if->get(i, &lbase, &lsize, &ltype);
354 		if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase)
355 			continue;
356 		/*  At this point we know there is some kind of overlap/enclosure  */
357 		if (base < lbase || base + size - 1 > lbase + lsize - 1) {
358 			if (base <= lbase && base + size - 1 >= lbase + lsize - 1) {
359 				/*  New region encloses an existing region  */
360 				if (type == ltype) {
361 					replace = replace == -1 ? i : -2;
362 					continue;
363 				}
364 				else if (types_compatible(type, ltype))
365 					continue;
366 			}
367 			printk(KERN_WARNING
368 			       "mtrr: %#lx000,%#lx000 overlaps existing"
369 			       " %#lx000,%#lx000\n", base, size, lbase,
370 			       lsize);
371 			goto out;
372 		}
373 		/*  New region is enclosed by an existing region  */
374 		if (ltype != type) {
375 			if (types_compatible(type, ltype))
376 				continue;
377 			printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
378 			     base, size, mtrr_attrib_to_str(ltype),
379 			     mtrr_attrib_to_str(type));
380 			goto out;
381 		}
382 		if (increment)
383 			++usage_table[i];
384 		error = i;
385 		goto out;
386 	}
387 	/*  Search for an empty MTRR  */
388 	i = mtrr_if->get_free_region(base, size, replace);
389 	if (i >= 0) {
390 		set_mtrr(i, base, size, type);
391 		if (likely(replace < 0))
392 			usage_table[i] = 1;
393 		else {
394 			usage_table[i] = usage_table[replace] + !!increment;
395 			if (unlikely(replace != i)) {
396 				set_mtrr(replace, 0, 0, 0);
397 				usage_table[replace] = 0;
398 			}
399 		}
400 	} else
401 		printk(KERN_INFO "mtrr: no more MTRRs available\n");
402 	error = i;
403  out:
404 	mutex_unlock(&mtrr_mutex);
405 	return error;
406 }
407 
mtrr_check(unsigned long base,unsigned long size)408 static int mtrr_check(unsigned long base, unsigned long size)
409 {
410 	if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
411 		printk(KERN_WARNING
412 			"mtrr: size and base must be multiples of 4 kiB\n");
413 		printk(KERN_DEBUG
414 			"mtrr: size: %#lx  base: %#lx\n", size, base);
415 		dump_stack();
416 		return -1;
417 	}
418 	return 0;
419 }
420 
421 /**
422  *	mtrr_add - Add a memory type region
423  *	@base: Physical base address of region
424  *	@size: Physical size of region
425  *	@type: Type of MTRR desired
426  *	@increment: If this is true do usage counting on the region
427  *
428  *	Memory type region registers control the caching on newer Intel and
429  *	non Intel processors. This function allows drivers to request an
430  *	MTRR is added. The details and hardware specifics of each processor's
431  *	implementation are hidden from the caller, but nevertheless the
432  *	caller should expect to need to provide a power of two size on an
433  *	equivalent power of two boundary.
434  *
435  *	If the region cannot be added either because all regions are in use
436  *	or the CPU cannot support it a negative value is returned. On success
437  *	the register number for this entry is returned, but should be treated
438  *	as a cookie only.
439  *
440  *	On a multiprocessor machine the changes are made to all processors.
441  *	This is required on x86 by the Intel processors.
442  *
443  *	The available types are
444  *
445  *	%MTRR_TYPE_UNCACHABLE	-	No caching
446  *
447  *	%MTRR_TYPE_WRBACK	-	Write data back in bursts whenever
448  *
449  *	%MTRR_TYPE_WRCOMB	-	Write data back soon but allow bursts
450  *
451  *	%MTRR_TYPE_WRTHROUGH	-	Cache reads but not writes
452  *
453  *	BUGS: Needs a quiet flag for the cases where drivers do not mind
454  *	failures and do not wish system log messages to be sent.
455  */
456 
457 int __init
mtrr_add(unsigned long base,unsigned long size,unsigned int type,char increment)458 mtrr_add(unsigned long base, unsigned long size, unsigned int type,
459 	 char increment)
460 {
461 	if (mtrr_check(base, size))
462 		return -EINVAL;
463 	return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
464 			     increment);
465 }
466 
467 /**
468  *	mtrr_del_page - delete a memory type region
469  *	@reg: Register returned by mtrr_add
470  *	@base: Physical base address
471  *	@size: Size of region
472  *
473  *	If register is supplied then base and size are ignored. This is
474  *	how drivers should call it.
475  *
476  *	Releases an MTRR region. If the usage count drops to zero the
477  *	register is freed and the region returns to default state.
478  *	On success the register is returned, on failure a negative error
479  *	code.
480  */
481 
mtrr_del_page(int reg,unsigned long base,unsigned long size)482 int mtrr_del_page(int reg, unsigned long base, unsigned long size)
483 {
484 	int i, max;
485 	mtrr_type ltype;
486 	unsigned long lbase, lsize;
487 	int error = -EINVAL;
488 
489 	if (!mtrr_if)
490 		return -ENXIO;
491 
492 	max = num_var_ranges;
493 	mutex_lock(&mtrr_mutex);
494 	if (reg < 0) {
495 		/*  Search for existing MTRR  */
496 		for (i = 0; i < max; ++i) {
497 			mtrr_if->get(i, &lbase, &lsize, &ltype);
498 			if (lbase == base && lsize == size) {
499 				reg = i;
500 				break;
501 			}
502 		}
503 		if (reg < 0) {
504 			printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base,
505 			       size);
506 			goto out;
507 		}
508 	}
509 	if (reg >= max) {
510 		printk(KERN_WARNING "mtrr: register: %d too big\n", reg);
511 		goto out;
512 	}
513 	mtrr_if->get(reg, &lbase, &lsize, &ltype);
514 	if (lsize < 1) {
515 		printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg);
516 		goto out;
517 	}
518 	if (usage_table[reg] < 1) {
519 		printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
520 		goto out;
521 	}
522 	if (--usage_table[reg] < 1)
523 		set_mtrr(reg, 0, 0, 0);
524 	error = reg;
525  out:
526 	mutex_unlock(&mtrr_mutex);
527 	return error;
528 }
529 /**
530  *	mtrr_del - delete a memory type region
531  *	@reg: Register returned by mtrr_add
532  *	@base: Physical base address
533  *	@size: Size of region
534  *
535  *	If register is supplied then base and size are ignored. This is
536  *	how drivers should call it.
537  *
538  *	Releases an MTRR region. If the usage count drops to zero the
539  *	register is freed and the region returns to default state.
540  *	On success the register is returned, on failure a negative error
541  *	code.
542  */
543 
544 int __init
mtrr_del(int reg,unsigned long base,unsigned long size)545 mtrr_del(int reg, unsigned long base, unsigned long size)
546 {
547 	if (mtrr_check(base, size))
548 		return -EINVAL;
549 	return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
550 }
551 
552 /* The suspend/resume methods are only for CPU without MTRR. CPU using generic
553  * MTRR driver doesn't require this
554  */
555 struct mtrr_value {
556 	mtrr_type	ltype;
557 	unsigned long	lbase;
558 	unsigned long	lsize;
559 };
560 
561 /**
562  * mtrr_bp_init - initialize mtrrs on the boot CPU
563  *
564  * This needs to be called early; before any of the other CPUs are
565  * initialized (i.e. before smp_init()).
566  *
567  */
mtrr_bp_init(void)568 void __init mtrr_bp_init(void)
569 {
570 	if (cpu_has_mtrr) {
571 		mtrr_if = &generic_mtrr_ops;
572 		size_or_mask = ~((1ULL << (paddr_bits - PAGE_SHIFT)) - 1);
573 		size_and_mask = ~size_or_mask & 0xfffff00000ULL;
574 	}
575 
576 	if (mtrr_if) {
577 		set_num_var_ranges();
578 		init_table();
579 		if (use_intel())
580 			get_mtrr_state();
581 	}
582 }
583 
mtrr_ap_init(void)584 void mtrr_ap_init(void)
585 {
586 	if (!mtrr_if || !use_intel() || hold_mtrr_updates_on_aps)
587 		return;
588 	/*
589 	 * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed,
590 	 * but this routine will be called in cpu boot time, holding the lock
591 	 * breaks it. This routine is called in two cases: 1.very earily time
592 	 * of software resume, when there absolutely isn't mtrr entry changes;
593 	 * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to
594 	 * prevent mtrr entry changes
595 	 */
596 	set_mtrr(~0U, 0, 0, 0);
597 }
598 
599 /**
600  * Save current fixed-range MTRR state of the BSP
601  */
mtrr_save_state(void)602 void mtrr_save_state(void)
603 {
604 	int cpu = get_cpu();
605 
606 	if (cpu == 0)
607 		mtrr_save_fixed_ranges(NULL);
608 	else
609 		on_selected_cpus(cpumask_of(0), mtrr_save_fixed_ranges, NULL, 1);
610 	put_cpu();
611 }
612 
mtrr_aps_sync_begin(void)613 void mtrr_aps_sync_begin(void)
614 {
615 	if (!use_intel())
616 		return;
617 	hold_mtrr_updates_on_aps = 1;
618 }
619 
mtrr_aps_sync_end(void)620 void mtrr_aps_sync_end(void)
621 {
622 	if (!use_intel())
623 		return;
624 	set_mtrr(~0U, 0, 0, 0);
625 	hold_mtrr_updates_on_aps = 0;
626 }
627 
mtrr_bp_restore(void)628 void mtrr_bp_restore(void)
629 {
630 	if (!use_intel())
631 		return;
632 	mtrr_if->set_all();
633 }
634 
mtrr_init_finialize(void)635 static int __init mtrr_init_finialize(void)
636 {
637 	if (!mtrr_if)
638 		return 0;
639 	if (use_intel())
640 		mtrr_state_warn();
641 	return 0;
642 }
643 __initcall(mtrr_init_finialize);
644