1 /*
2 * mtrr.c: MTRR/PAT virtualization
3 *
4 * Copyright (c) 2007, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include <xen/domain_page.h>
20 #include <asm/e820.h>
21 #include <asm/iocap.h>
22 #include <asm/paging.h>
23 #include <asm/p2m.h>
24 #include <asm/mtrr.h>
25 #include <asm/hvm/support.h>
26 #include <asm/hvm/cacheattr.h>
27 #include <public/hvm/e820.h>
28
29 /* Get page attribute fields (PAn) from PAT MSR. */
30 #define pat_cr_2_paf(pat_cr,n) ((((uint64_t)pat_cr) >> ((n)<<3)) & 0xff)
31
32 /* PAT entry to PTE flags (PAT, PCD, PWT bits). */
33 static const uint8_t pat_entry_2_pte_flags[8] = {
34 0, _PAGE_PWT,
35 _PAGE_PCD, _PAGE_PCD | _PAGE_PWT,
36 _PAGE_PAT, _PAGE_PAT | _PAGE_PWT,
37 _PAGE_PAT | _PAGE_PCD, _PAGE_PAT | _PAGE_PCD | _PAGE_PWT };
38
39 /* Effective mm type lookup table, according to MTRR and PAT. */
40 static const uint8_t mm_type_tbl[MTRR_NUM_TYPES][PAT_TYPE_NUMS] = {
41 #define RS MEMORY_NUM_TYPES
42 #define UC MTRR_TYPE_UNCACHABLE
43 #define WB MTRR_TYPE_WRBACK
44 #define WC MTRR_TYPE_WRCOMB
45 #define WP MTRR_TYPE_WRPROT
46 #define WT MTRR_TYPE_WRTHROUGH
47
48 /* PAT(UC, WC, RS, RS, WT, WP, WB, UC-) */
49 /* MTRR(UC) */ {UC, WC, RS, RS, UC, UC, UC, UC},
50 /* MTRR(WC) */ {UC, WC, RS, RS, UC, UC, WC, WC},
51 /* MTRR(RS) */ {RS, RS, RS, RS, RS, RS, RS, RS},
52 /* MTRR(RS) */ {RS, RS, RS, RS, RS, RS, RS, RS},
53 /* MTRR(WT) */ {UC, WC, RS, RS, WT, WP, WT, UC},
54 /* MTRR(WP) */ {UC, WC, RS, RS, WT, WP, WP, WC},
55 /* MTRR(WB) */ {UC, WC, RS, RS, WT, WP, WB, UC}
56
57 #undef UC
58 #undef WC
59 #undef WT
60 #undef WP
61 #undef WB
62 #undef RS
63 };
64
65 /*
66 * Reverse lookup table, to find a pat type according to MTRR and effective
67 * memory type. This table is dynamically generated.
68 */
69 static uint8_t __read_mostly mtrr_epat_tbl[MTRR_NUM_TYPES][MEMORY_NUM_TYPES] =
70 { [0 ... MTRR_NUM_TYPES-1] =
71 { [0 ... MEMORY_NUM_TYPES-1] = INVALID_MEM_TYPE }
72 };
73
74 /* Lookup table for PAT entry of a given PAT value in host PAT. */
75 static uint8_t __read_mostly pat_entry_tbl[PAT_TYPE_NUMS] =
76 { [0 ... PAT_TYPE_NUMS-1] = INVALID_MEM_TYPE };
77
is_var_mtrr_overlapped(const struct mtrr_state * m)78 bool_t is_var_mtrr_overlapped(const struct mtrr_state *m)
79 {
80 unsigned int seg, i;
81 unsigned int num_var_ranges = (uint8_t)m->mtrr_cap;
82
83 for ( i = 0; i < num_var_ranges; i++ )
84 {
85 uint64_t base1 = m->var_ranges[i].base >> PAGE_SHIFT;
86 uint64_t mask1 = m->var_ranges[i].mask >> PAGE_SHIFT;
87
88 if ( !(m->var_ranges[i].mask & MTRR_PHYSMASK_VALID) )
89 continue;
90
91 for ( seg = i + 1; seg < num_var_ranges; seg ++ )
92 {
93 uint64_t base2 = m->var_ranges[seg].base >> PAGE_SHIFT;
94 uint64_t mask2 = m->var_ranges[seg].mask >> PAGE_SHIFT;
95
96 if ( !(m->var_ranges[seg].mask & MTRR_PHYSMASK_VALID) )
97 continue;
98
99 if ( (base1 & mask1 & mask2) == (base2 & mask2 & mask1) )
100 {
101 /* MTRR is overlapped. */
102 return 1;
103 }
104 }
105 }
106 return 0;
107 }
108
hvm_mtrr_pat_init(void)109 static int __init hvm_mtrr_pat_init(void)
110 {
111 unsigned int i, j;
112
113 for ( i = 0; i < MTRR_NUM_TYPES; i++ )
114 {
115 for ( j = 0; j < PAT_TYPE_NUMS; j++ )
116 {
117 unsigned int tmp = mm_type_tbl[i][j];
118
119 if ( tmp < MEMORY_NUM_TYPES )
120 mtrr_epat_tbl[i][tmp] = j;
121 }
122 }
123
124 for ( i = 0; i < PAT_TYPE_NUMS; i++ )
125 {
126 for ( j = 0; j < PAT_TYPE_NUMS; j++ )
127 {
128 if ( pat_cr_2_paf(host_pat, j) == i )
129 {
130 pat_entry_tbl[i] = j;
131 break;
132 }
133 }
134 }
135
136 return 0;
137 }
138 __initcall(hvm_mtrr_pat_init);
139
pat_type_2_pte_flags(uint8_t pat_type)140 uint8_t pat_type_2_pte_flags(uint8_t pat_type)
141 {
142 unsigned int pat_entry = pat_entry_tbl[pat_type];
143
144 /*
145 * INVALID_MEM_TYPE, means doesn't find the pat_entry in host PAT for a
146 * given pat_type. If host PAT covers all the PAT types, it can't happen.
147 */
148 if ( unlikely(pat_entry == INVALID_MEM_TYPE) )
149 pat_entry = pat_entry_tbl[PAT_TYPE_UNCACHABLE];
150
151 return pat_entry_2_pte_flags[pat_entry];
152 }
153
hvm_vcpu_cacheattr_init(struct vcpu * v)154 int hvm_vcpu_cacheattr_init(struct vcpu *v)
155 {
156 struct mtrr_state *m = &v->arch.hvm_vcpu.mtrr;
157
158 memset(m, 0, sizeof(*m));
159
160 m->var_ranges = xzalloc_array(struct mtrr_var_range, MTRR_VCNT);
161 if ( m->var_ranges == NULL )
162 return -ENOMEM;
163
164 m->mtrr_cap = (1u << 10) | (1u << 8) | MTRR_VCNT;
165
166 v->arch.hvm_vcpu.pat_cr =
167 ((uint64_t)PAT_TYPE_WRBACK) | /* PAT0: WB */
168 ((uint64_t)PAT_TYPE_WRTHROUGH << 8) | /* PAT1: WT */
169 ((uint64_t)PAT_TYPE_UC_MINUS << 16) | /* PAT2: UC- */
170 ((uint64_t)PAT_TYPE_UNCACHABLE << 24) | /* PAT3: UC */
171 ((uint64_t)PAT_TYPE_WRBACK << 32) | /* PAT4: WB */
172 ((uint64_t)PAT_TYPE_WRTHROUGH << 40) | /* PAT5: WT */
173 ((uint64_t)PAT_TYPE_UC_MINUS << 48) | /* PAT6: UC- */
174 ((uint64_t)PAT_TYPE_UNCACHABLE << 56); /* PAT7: UC */
175
176 return 0;
177 }
178
hvm_vcpu_cacheattr_destroy(struct vcpu * v)179 void hvm_vcpu_cacheattr_destroy(struct vcpu *v)
180 {
181 xfree(v->arch.hvm_vcpu.mtrr.var_ranges);
182 }
183
184 /*
185 * Get MTRR memory type for physical address pa.
186 *
187 * May return a negative value when order > 0, indicating to the caller
188 * that the respective mapping needs splitting.
189 */
get_mtrr_type(const struct mtrr_state * m,paddr_t pa,unsigned int order)190 static int get_mtrr_type(const struct mtrr_state *m,
191 paddr_t pa, unsigned int order)
192 {
193 uint8_t overlap_mtrr = 0;
194 uint8_t overlap_mtrr_pos = 0;
195 uint64_t mask = -(uint64_t)PAGE_SIZE << order;
196 unsigned int seg, num_var_ranges = m->mtrr_cap & 0xff;
197
198 if ( unlikely(!(m->enabled & 0x2)) )
199 return MTRR_TYPE_UNCACHABLE;
200
201 pa &= mask;
202 if ( (pa < 0x100000) && (m->enabled & 1) )
203 {
204 /* Fixed range MTRR takes effect. */
205 uint32_t addr = (uint32_t)pa, index;
206
207 if ( addr < 0x80000 )
208 {
209 /* 0x00000 ... 0x7FFFF in 64k steps */
210 if ( order > 4 )
211 return -1;
212 seg = (addr >> 16);
213 return m->fixed_ranges[seg];
214 }
215 else if ( addr < 0xc0000 )
216 {
217 /* 0x80000 ... 0xBFFFF in 16k steps */
218 if ( order > 2 )
219 return -1;
220 seg = (addr - 0x80000) >> 14;
221 index = (seg >> 3) + 1;
222 seg &= 7; /* select 0-7 segments */
223 return m->fixed_ranges[index*8 + seg];
224 }
225 else
226 {
227 /* 0xC0000 ... 0xFFFFF in 4k steps */
228 if ( order )
229 return -1;
230 seg = (addr - 0xc0000) >> 12;
231 index = (seg >> 3) + 3;
232 seg &= 7; /* select 0-7 segments */
233 return m->fixed_ranges[index*8 + seg];
234 }
235 }
236
237 /* Match with variable MTRRs. */
238 for ( seg = 0; seg < num_var_ranges; seg++ )
239 {
240 uint64_t phys_base = m->var_ranges[seg].base;
241 uint64_t phys_mask = m->var_ranges[seg].mask;
242
243 if ( phys_mask & MTRR_PHYSMASK_VALID )
244 {
245 phys_mask &= mask;
246 if ( (pa & phys_mask) == (phys_base & phys_mask) )
247 {
248 if ( unlikely(m->overlapped) || order )
249 {
250 overlap_mtrr |= 1 << (phys_base & MTRR_PHYSBASE_TYPE_MASK);
251 overlap_mtrr_pos = phys_base & MTRR_PHYSBASE_TYPE_MASK;
252 }
253 else
254 {
255 /* If no overlap, return the found one */
256 return (phys_base & MTRR_PHYSBASE_TYPE_MASK);
257 }
258 }
259 }
260 }
261
262 /* Not found? */
263 if ( unlikely(overlap_mtrr == 0) )
264 return m->def_type;
265
266 /* One match, or multiple identical ones? */
267 if ( likely(overlap_mtrr == (1 << overlap_mtrr_pos)) )
268 return overlap_mtrr_pos;
269
270 if ( order )
271 return -1;
272
273 /* Two or more matches, one being UC? */
274 if ( overlap_mtrr & (1 << MTRR_TYPE_UNCACHABLE) )
275 return MTRR_TYPE_UNCACHABLE;
276
277 /* Two or more matches, all of them WT and WB? */
278 if ( overlap_mtrr ==
279 ((1 << MTRR_TYPE_WRTHROUGH) | (1 << MTRR_TYPE_WRBACK)) )
280 return MTRR_TYPE_WRTHROUGH;
281
282 /* Behaviour is undefined, but return the last overlapped type. */
283 return overlap_mtrr_pos;
284 }
285
286 /*
287 * return the memory type from PAT.
288 * NOTE: valid only when paging is enabled.
289 * Only 4K page PTE is supported now.
290 */
page_pat_type(uint64_t pat_cr,uint32_t pte_flags)291 static uint8_t page_pat_type(uint64_t pat_cr, uint32_t pte_flags)
292 {
293 int32_t pat_entry;
294
295 /* PCD/PWT -> bit 1/0 of PAT entry */
296 pat_entry = ( pte_flags >> 3 ) & 0x3;
297 /* PAT bits as bit 2 of PAT entry */
298 if ( pte_flags & _PAGE_PAT )
299 pat_entry |= 4;
300
301 return (uint8_t)pat_cr_2_paf(pat_cr, pat_entry);
302 }
303
304 /*
305 * Effective memory type for leaf page.
306 */
effective_mm_type(struct mtrr_state * m,uint64_t pat,paddr_t gpa,uint32_t pte_flags,uint8_t gmtrr_mtype)307 static uint8_t effective_mm_type(struct mtrr_state *m,
308 uint64_t pat,
309 paddr_t gpa,
310 uint32_t pte_flags,
311 uint8_t gmtrr_mtype)
312 {
313 uint8_t mtrr_mtype, pat_value, effective;
314
315 /* if get_pat_flags() gives a dedicated MTRR type,
316 * just use it
317 */
318 if ( gmtrr_mtype == NO_HARDCODE_MEM_TYPE )
319 mtrr_mtype = get_mtrr_type(m, gpa, 0);
320 else
321 mtrr_mtype = gmtrr_mtype;
322
323 pat_value = page_pat_type(pat, pte_flags);
324
325 effective = mm_type_tbl[mtrr_mtype][pat_value];
326
327 return effective;
328 }
329
get_pat_flags(struct vcpu * v,uint32_t gl1e_flags,paddr_t gpaddr,paddr_t spaddr,uint8_t gmtrr_mtype)330 uint32_t get_pat_flags(struct vcpu *v,
331 uint32_t gl1e_flags,
332 paddr_t gpaddr,
333 paddr_t spaddr,
334 uint8_t gmtrr_mtype)
335 {
336 uint8_t guest_eff_mm_type;
337 uint8_t shadow_mtrr_type;
338 uint8_t pat_entry_value;
339 uint64_t pat = v->arch.hvm_vcpu.pat_cr;
340 struct mtrr_state *g = &v->arch.hvm_vcpu.mtrr;
341
342 /* 1. Get the effective memory type of guest physical address,
343 * with the pair of guest MTRR and PAT
344 */
345 guest_eff_mm_type = effective_mm_type(g, pat, gpaddr,
346 gl1e_flags, gmtrr_mtype);
347 /* 2. Get the memory type of host physical address, with MTRR */
348 shadow_mtrr_type = get_mtrr_type(&mtrr_state, spaddr, 0);
349
350 /* 3. Find the memory type in PAT, with host MTRR memory type
351 * and guest effective memory type.
352 */
353 pat_entry_value = mtrr_epat_tbl[shadow_mtrr_type][guest_eff_mm_type];
354 /* If conflit occurs(e.g host MTRR is UC, guest memory type is
355 * WB),set UC as effective memory. Here, returning PAT_TYPE_UNCACHABLE will
356 * always set effective memory as UC.
357 */
358 if ( pat_entry_value == INVALID_MEM_TYPE )
359 {
360 struct domain *d = v->domain;
361 p2m_type_t p2mt;
362 get_gfn_query_unlocked(d, paddr_to_pfn(gpaddr), &p2mt);
363 if (p2m_is_ram(p2mt))
364 gdprintk(XENLOG_WARNING,
365 "Conflict occurs for a given guest l1e flags:%x "
366 "at %"PRIx64" (the effective mm type:%d), "
367 "because the host mtrr type is:%d\n",
368 gl1e_flags, (uint64_t)gpaddr, guest_eff_mm_type,
369 shadow_mtrr_type);
370 pat_entry_value = PAT_TYPE_UNCACHABLE;
371 }
372 /* 4. Get the pte flags */
373 return pat_type_2_pte_flags(pat_entry_value);
374 }
375
valid_mtrr_type(uint8_t type)376 static inline bool_t valid_mtrr_type(uint8_t type)
377 {
378 switch ( type )
379 {
380 case MTRR_TYPE_UNCACHABLE:
381 case MTRR_TYPE_WRBACK:
382 case MTRR_TYPE_WRCOMB:
383 case MTRR_TYPE_WRPROT:
384 case MTRR_TYPE_WRTHROUGH:
385 return 1;
386 }
387 return 0;
388 }
389
mtrr_def_type_msr_set(struct domain * d,struct mtrr_state * m,uint64_t msr_content)390 bool_t mtrr_def_type_msr_set(struct domain *d, struct mtrr_state *m,
391 uint64_t msr_content)
392 {
393 uint8_t def_type = msr_content & 0xff;
394 uint8_t enabled = (msr_content >> 10) & 0x3;
395
396 if ( unlikely(!valid_mtrr_type(def_type)) )
397 {
398 HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid MTRR def type:%x\n", def_type);
399 return 0;
400 }
401
402 if ( unlikely(msr_content && (msr_content & ~0xcffUL)) )
403 {
404 HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid msr content:%"PRIx64"\n",
405 msr_content);
406 return 0;
407 }
408
409 if ( m->enabled != enabled || m->def_type != def_type )
410 {
411 m->enabled = enabled;
412 m->def_type = def_type;
413 memory_type_changed(d);
414 }
415
416 return 1;
417 }
418
mtrr_fix_range_msr_set(struct domain * d,struct mtrr_state * m,uint32_t row,uint64_t msr_content)419 bool_t mtrr_fix_range_msr_set(struct domain *d, struct mtrr_state *m,
420 uint32_t row, uint64_t msr_content)
421 {
422 uint64_t *fixed_range_base = (uint64_t *)m->fixed_ranges;
423
424 if ( fixed_range_base[row] != msr_content )
425 {
426 uint8_t *range = (uint8_t*)&msr_content;
427 unsigned int i;
428
429 for ( i = 0; i < 8; i++ )
430 if ( unlikely(!valid_mtrr_type(range[i])) )
431 return 0;
432
433 fixed_range_base[row] = msr_content;
434 memory_type_changed(d);
435 }
436
437 return 1;
438 }
439
mtrr_var_range_msr_set(struct domain * d,struct mtrr_state * m,uint32_t msr,uint64_t msr_content)440 bool_t mtrr_var_range_msr_set(
441 struct domain *d, struct mtrr_state *m, uint32_t msr, uint64_t msr_content)
442 {
443 uint32_t index, phys_addr;
444 uint64_t msr_mask;
445 uint64_t *var_range_base = (uint64_t*)m->var_ranges;
446
447 index = msr - MSR_IA32_MTRR_PHYSBASE(0);
448 if ( var_range_base[index] == msr_content )
449 return 1;
450
451 if ( unlikely(!valid_mtrr_type((uint8_t)msr_content)) )
452 return 0;
453
454 if ( d == current->domain )
455 phys_addr = d->arch.cpuid->extd.maxphysaddr;
456 else
457 phys_addr = paddr_bits;
458 msr_mask = ~((((uint64_t)1) << phys_addr) - 1);
459 msr_mask |= (index & 1) ? 0x7ffUL : 0xf00UL;
460 if ( unlikely(msr_content & msr_mask) )
461 {
462 HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid msr content:%"PRIx64"\n",
463 msr_content);
464 return 0;
465 }
466
467 var_range_base[index] = msr_content;
468
469 m->overlapped = is_var_mtrr_overlapped(m);
470
471 memory_type_changed(d);
472
473 return 1;
474 }
475
mtrr_pat_not_equal(struct vcpu * vd,struct vcpu * vs)476 bool_t mtrr_pat_not_equal(struct vcpu *vd, struct vcpu *vs)
477 {
478 struct mtrr_state *md = &vd->arch.hvm_vcpu.mtrr;
479 struct mtrr_state *ms = &vs->arch.hvm_vcpu.mtrr;
480 int32_t res;
481 uint8_t num_var_ranges = (uint8_t)md->mtrr_cap;
482
483 /* Test fixed ranges. */
484 res = memcmp(md->fixed_ranges, ms->fixed_ranges,
485 NUM_FIXED_RANGES*sizeof(mtrr_type));
486 if ( res )
487 return 1;
488
489 /* Test var ranges. */
490 res = memcmp(md->var_ranges, ms->var_ranges,
491 num_var_ranges*sizeof(struct mtrr_var_range));
492 if ( res )
493 return 1;
494
495 /* Test default type MSR. */
496 if ( (md->def_type != ms->def_type)
497 && (md->enabled != ms->enabled) )
498 return 1;
499
500 /* Test PAT. */
501 if ( vd->arch.hvm_vcpu.pat_cr != vs->arch.hvm_vcpu.pat_cr )
502 return 1;
503
504 return 0;
505 }
506
507 struct hvm_mem_pinned_cacheattr_range {
508 struct list_head list;
509 uint64_t start, end;
510 uint32_t type;
511 struct rcu_head rcu;
512 };
513
514 static DEFINE_RCU_READ_LOCK(pinned_cacheattr_rcu_lock);
515
hvm_init_cacheattr_region_list(struct domain * d)516 void hvm_init_cacheattr_region_list(struct domain *d)
517 {
518 INIT_LIST_HEAD(&d->arch.hvm_domain.pinned_cacheattr_ranges);
519 }
520
hvm_destroy_cacheattr_region_list(struct domain * d)521 void hvm_destroy_cacheattr_region_list(struct domain *d)
522 {
523 struct list_head *head = &d->arch.hvm_domain.pinned_cacheattr_ranges;
524 struct hvm_mem_pinned_cacheattr_range *range;
525
526 while ( !list_empty(head) )
527 {
528 range = list_entry(head->next,
529 struct hvm_mem_pinned_cacheattr_range,
530 list);
531 list_del(&range->list);
532 xfree(range);
533 }
534 }
535
hvm_get_mem_pinned_cacheattr(struct domain * d,gfn_t gfn,unsigned int order)536 int hvm_get_mem_pinned_cacheattr(struct domain *d, gfn_t gfn,
537 unsigned int order)
538 {
539 struct hvm_mem_pinned_cacheattr_range *range;
540 uint64_t mask = ~(uint64_t)0 << order;
541 int rc = -ENXIO;
542
543 ASSERT(is_hvm_domain(d));
544
545 rcu_read_lock(&pinned_cacheattr_rcu_lock);
546 list_for_each_entry_rcu ( range,
547 &d->arch.hvm_domain.pinned_cacheattr_ranges,
548 list )
549 {
550 if ( ((gfn_x(gfn) & mask) >= range->start) &&
551 ((gfn_x(gfn) | ~mask) <= range->end) )
552 {
553 rc = range->type;
554 break;
555 }
556 if ( ((gfn_x(gfn) & mask) <= range->end) &&
557 ((gfn_x(gfn) | ~mask) >= range->start) )
558 {
559 rc = -EADDRNOTAVAIL;
560 break;
561 }
562 }
563 rcu_read_unlock(&pinned_cacheattr_rcu_lock);
564
565 return rc;
566 }
567
free_pinned_cacheattr_entry(struct rcu_head * rcu)568 static void free_pinned_cacheattr_entry(struct rcu_head *rcu)
569 {
570 xfree(container_of(rcu, struct hvm_mem_pinned_cacheattr_range, rcu));
571 }
572
hvm_set_mem_pinned_cacheattr(struct domain * d,uint64_t gfn_start,uint64_t gfn_end,uint32_t type)573 int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
574 uint64_t gfn_end, uint32_t type)
575 {
576 struct hvm_mem_pinned_cacheattr_range *range;
577 int rc = 1;
578
579 if ( !is_hvm_domain(d) )
580 return -EOPNOTSUPP;
581
582 if ( gfn_end < gfn_start || (gfn_start | gfn_end) >> paddr_bits )
583 return -EINVAL;
584
585 switch ( type )
586 {
587 case XEN_DOMCTL_DELETE_MEM_CACHEATTR:
588 /* Remove the requested range. */
589 rcu_read_lock(&pinned_cacheattr_rcu_lock);
590 list_for_each_entry_rcu ( range,
591 &d->arch.hvm_domain.pinned_cacheattr_ranges,
592 list )
593 if ( range->start == gfn_start && range->end == gfn_end )
594 {
595 rcu_read_unlock(&pinned_cacheattr_rcu_lock);
596 list_del_rcu(&range->list);
597 type = range->type;
598 call_rcu(&range->rcu, free_pinned_cacheattr_entry);
599 p2m_memory_type_changed(d);
600 switch ( type )
601 {
602 case PAT_TYPE_UC_MINUS:
603 /*
604 * For EPT we can also avoid the flush in this case;
605 * see epte_get_entry_emt().
606 */
607 if ( hap_enabled(d) && cpu_has_vmx )
608 case PAT_TYPE_UNCACHABLE:
609 break;
610 /* fall through */
611 default:
612 flush_all(FLUSH_CACHE);
613 break;
614 }
615 return 0;
616 }
617 rcu_read_unlock(&pinned_cacheattr_rcu_lock);
618 return -ENOENT;
619
620 case PAT_TYPE_UC_MINUS:
621 case PAT_TYPE_UNCACHABLE:
622 case PAT_TYPE_WRBACK:
623 case PAT_TYPE_WRCOMB:
624 case PAT_TYPE_WRPROT:
625 case PAT_TYPE_WRTHROUGH:
626 break;
627
628 default:
629 return -EINVAL;
630 }
631
632 rcu_read_lock(&pinned_cacheattr_rcu_lock);
633 list_for_each_entry_rcu ( range,
634 &d->arch.hvm_domain.pinned_cacheattr_ranges,
635 list )
636 {
637 if ( range->start == gfn_start && range->end == gfn_end )
638 {
639 range->type = type;
640 rc = 0;
641 break;
642 }
643 if ( range->start <= gfn_end && gfn_start <= range->end )
644 {
645 rc = -EBUSY;
646 break;
647 }
648 }
649 rcu_read_unlock(&pinned_cacheattr_rcu_lock);
650 if ( rc <= 0 )
651 return rc;
652
653 range = xzalloc(struct hvm_mem_pinned_cacheattr_range);
654 if ( range == NULL )
655 return -ENOMEM;
656
657 range->start = gfn_start;
658 range->end = gfn_end;
659 range->type = type;
660
661 list_add_rcu(&range->list, &d->arch.hvm_domain.pinned_cacheattr_ranges);
662 p2m_memory_type_changed(d);
663 if ( type != PAT_TYPE_WRBACK )
664 flush_all(FLUSH_CACHE);
665
666 return 0;
667 }
668
hvm_save_mtrr_msr(struct domain * d,hvm_domain_context_t * h)669 static int hvm_save_mtrr_msr(struct domain *d, hvm_domain_context_t *h)
670 {
671 int i;
672 struct vcpu *v;
673 struct hvm_hw_mtrr hw_mtrr;
674 struct mtrr_state *mtrr_state;
675 /* save mtrr&pat */
676 for_each_vcpu(d, v)
677 {
678 mtrr_state = &v->arch.hvm_vcpu.mtrr;
679
680 hvm_get_guest_pat(v, &hw_mtrr.msr_pat_cr);
681
682 hw_mtrr.msr_mtrr_def_type = mtrr_state->def_type
683 | (mtrr_state->enabled << 10);
684 hw_mtrr.msr_mtrr_cap = mtrr_state->mtrr_cap;
685
686 for ( i = 0; i < MTRR_VCNT; i++ )
687 {
688 /* save physbase */
689 hw_mtrr.msr_mtrr_var[i*2] =
690 ((uint64_t*)mtrr_state->var_ranges)[i*2];
691 /* save physmask */
692 hw_mtrr.msr_mtrr_var[i*2+1] =
693 ((uint64_t*)mtrr_state->var_ranges)[i*2+1];
694 }
695
696 for ( i = 0; i < NUM_FIXED_MSR; i++ )
697 hw_mtrr.msr_mtrr_fixed[i] =
698 ((uint64_t*)mtrr_state->fixed_ranges)[i];
699
700 if ( hvm_save_entry(MTRR, v->vcpu_id, h, &hw_mtrr) != 0 )
701 return 1;
702 }
703 return 0;
704 }
705
hvm_load_mtrr_msr(struct domain * d,hvm_domain_context_t * h)706 static int hvm_load_mtrr_msr(struct domain *d, hvm_domain_context_t *h)
707 {
708 int vcpuid, i;
709 struct vcpu *v;
710 struct mtrr_state *mtrr_state;
711 struct hvm_hw_mtrr hw_mtrr;
712
713 vcpuid = hvm_load_instance(h);
714 if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
715 {
716 dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n",
717 d->domain_id, vcpuid);
718 return -EINVAL;
719 }
720
721 if ( hvm_load_entry(MTRR, h, &hw_mtrr) != 0 )
722 return -EINVAL;
723
724 mtrr_state = &v->arch.hvm_vcpu.mtrr;
725
726 hvm_set_guest_pat(v, hw_mtrr.msr_pat_cr);
727
728 mtrr_state->mtrr_cap = hw_mtrr.msr_mtrr_cap;
729
730 for ( i = 0; i < NUM_FIXED_MSR; i++ )
731 mtrr_fix_range_msr_set(d, mtrr_state, i, hw_mtrr.msr_mtrr_fixed[i]);
732
733 for ( i = 0; i < MTRR_VCNT; i++ )
734 {
735 mtrr_var_range_msr_set(d, mtrr_state,
736 MSR_IA32_MTRR_PHYSBASE(i),
737 hw_mtrr.msr_mtrr_var[i * 2]);
738 mtrr_var_range_msr_set(d, mtrr_state,
739 MSR_IA32_MTRR_PHYSMASK(i),
740 hw_mtrr.msr_mtrr_var[i * 2 + 1]);
741 }
742
743 mtrr_def_type_msr_set(d, mtrr_state, hw_mtrr.msr_mtrr_def_type);
744
745 return 0;
746 }
747
748 HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save_mtrr_msr, hvm_load_mtrr_msr,
749 1, HVMSR_PER_VCPU);
750
memory_type_changed(struct domain * d)751 void memory_type_changed(struct domain *d)
752 {
753 if ( need_iommu(d) && d->vcpu && d->vcpu[0] )
754 {
755 p2m_memory_type_changed(d);
756 flush_all(FLUSH_CACHE);
757 }
758 }
759
epte_get_entry_emt(struct domain * d,unsigned long gfn,mfn_t mfn,unsigned int order,uint8_t * ipat,bool_t direct_mmio)760 int epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn,
761 unsigned int order, uint8_t *ipat, bool_t direct_mmio)
762 {
763 int gmtrr_mtype, hmtrr_mtype;
764 struct vcpu *v = current;
765
766 *ipat = 0;
767
768 if ( v->domain != d )
769 v = d->vcpu ? d->vcpu[0] : NULL;
770
771 /* Mask, not add, for order so it works with INVALID_MFN on unmapping */
772 if ( rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn),
773 mfn_x(mfn) | ((1UL << order) - 1)) )
774 {
775 if ( !order || rangeset_contains_range(mmio_ro_ranges, mfn_x(mfn),
776 mfn_x(mfn) | ((1UL << order) - 1)) )
777 {
778 *ipat = 1;
779 return MTRR_TYPE_UNCACHABLE;
780 }
781 /* Force invalid memory type so resolve_misconfig() will split it */
782 return -1;
783 }
784
785 if ( direct_mmio )
786 {
787 if ( (mfn_x(mfn) ^ d->arch.hvm_domain.vmx.apic_access_mfn) >> order )
788 return MTRR_TYPE_UNCACHABLE;
789 if ( order )
790 return -1;
791 *ipat = 1;
792 return MTRR_TYPE_WRBACK;
793 }
794
795 if ( !mfn_valid(mfn) )
796 {
797 *ipat = 1;
798 return MTRR_TYPE_UNCACHABLE;
799 }
800
801 if ( !need_iommu(d) && !cache_flush_permitted(d) )
802 {
803 *ipat = 1;
804 return MTRR_TYPE_WRBACK;
805 }
806
807 gmtrr_mtype = hvm_get_mem_pinned_cacheattr(d, _gfn(gfn), order);
808 if ( gmtrr_mtype >= 0 )
809 {
810 *ipat = 1;
811 return gmtrr_mtype != PAT_TYPE_UC_MINUS ? gmtrr_mtype
812 : MTRR_TYPE_UNCACHABLE;
813 }
814 if ( gmtrr_mtype == -EADDRNOTAVAIL )
815 return -1;
816
817 gmtrr_mtype = is_hvm_domain(d) && v ?
818 get_mtrr_type(&v->arch.hvm_vcpu.mtrr,
819 gfn << PAGE_SHIFT, order) :
820 MTRR_TYPE_WRBACK;
821 hmtrr_mtype = get_mtrr_type(&mtrr_state, mfn_x(mfn) << PAGE_SHIFT, order);
822 if ( gmtrr_mtype < 0 || hmtrr_mtype < 0 )
823 return -1;
824
825 /* If both types match we're fine. */
826 if ( likely(gmtrr_mtype == hmtrr_mtype) )
827 return hmtrr_mtype;
828
829 /* If either type is UC, we have to go with that one. */
830 if ( gmtrr_mtype == MTRR_TYPE_UNCACHABLE ||
831 hmtrr_mtype == MTRR_TYPE_UNCACHABLE )
832 return MTRR_TYPE_UNCACHABLE;
833
834 /* If either type is WB, we have to go with the other one. */
835 if ( gmtrr_mtype == MTRR_TYPE_WRBACK )
836 return hmtrr_mtype;
837 if ( hmtrr_mtype == MTRR_TYPE_WRBACK )
838 return gmtrr_mtype;
839
840 /*
841 * At this point we have disagreeing WC, WT, or WP types. The only
842 * combination that can be cleanly resolved is WT:WP. The ones involving
843 * WC need to be converted to UC, both due to the memory ordering
844 * differences and because WC disallows reads to be cached (WT and WP
845 * permit this), while WT and WP require writes to go straight to memory
846 * (WC can buffer them).
847 */
848 if ( (gmtrr_mtype == MTRR_TYPE_WRTHROUGH &&
849 hmtrr_mtype == MTRR_TYPE_WRPROT) ||
850 (gmtrr_mtype == MTRR_TYPE_WRPROT &&
851 hmtrr_mtype == MTRR_TYPE_WRTHROUGH) )
852 return MTRR_TYPE_WRPROT;
853
854 return MTRR_TYPE_UNCACHABLE;
855 }
856