1 /*
2  * mtrr.c: MTRR/PAT virtualization
3  *
4  * Copyright (c) 2007, Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program; If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include <xen/domain_page.h>
20 #include <asm/e820.h>
21 #include <asm/iocap.h>
22 #include <asm/paging.h>
23 #include <asm/p2m.h>
24 #include <asm/mtrr.h>
25 #include <asm/hvm/support.h>
26 #include <asm/hvm/cacheattr.h>
27 #include <public/hvm/e820.h>
28 
29 /* Get page attribute fields (PAn) from PAT MSR. */
30 #define pat_cr_2_paf(pat_cr,n)  ((((uint64_t)pat_cr) >> ((n)<<3)) & 0xff)
31 
32 /* PAT entry to PTE flags (PAT, PCD, PWT bits). */
33 static const uint8_t pat_entry_2_pte_flags[8] = {
34     0,           _PAGE_PWT,
35     _PAGE_PCD,   _PAGE_PCD | _PAGE_PWT,
36     _PAGE_PAT,   _PAGE_PAT | _PAGE_PWT,
37     _PAGE_PAT | _PAGE_PCD, _PAGE_PAT | _PAGE_PCD | _PAGE_PWT };
38 
39 /* Effective mm type lookup table, according to MTRR and PAT. */
40 static const uint8_t mm_type_tbl[MTRR_NUM_TYPES][PAT_TYPE_NUMS] = {
41 #define RS MEMORY_NUM_TYPES
42 #define UC MTRR_TYPE_UNCACHABLE
43 #define WB MTRR_TYPE_WRBACK
44 #define WC MTRR_TYPE_WRCOMB
45 #define WP MTRR_TYPE_WRPROT
46 #define WT MTRR_TYPE_WRTHROUGH
47 
48 /*          PAT(UC, WC, RS, RS, WT, WP, WB, UC-) */
49 /* MTRR(UC) */ {UC, WC, RS, RS, UC, UC, UC, UC},
50 /* MTRR(WC) */ {UC, WC, RS, RS, UC, UC, WC, WC},
51 /* MTRR(RS) */ {RS, RS, RS, RS, RS, RS, RS, RS},
52 /* MTRR(RS) */ {RS, RS, RS, RS, RS, RS, RS, RS},
53 /* MTRR(WT) */ {UC, WC, RS, RS, WT, WP, WT, UC},
54 /* MTRR(WP) */ {UC, WC, RS, RS, WT, WP, WP, WC},
55 /* MTRR(WB) */ {UC, WC, RS, RS, WT, WP, WB, UC}
56 
57 #undef UC
58 #undef WC
59 #undef WT
60 #undef WP
61 #undef WB
62 #undef RS
63 };
64 
65 /*
66  * Reverse lookup table, to find a pat type according to MTRR and effective
67  * memory type. This table is dynamically generated.
68  */
69 static uint8_t __read_mostly mtrr_epat_tbl[MTRR_NUM_TYPES][MEMORY_NUM_TYPES] =
70     { [0 ... MTRR_NUM_TYPES-1] =
71         { [0 ... MEMORY_NUM_TYPES-1] = INVALID_MEM_TYPE }
72     };
73 
74 /* Lookup table for PAT entry of a given PAT value in host PAT. */
75 static uint8_t __read_mostly pat_entry_tbl[PAT_TYPE_NUMS] =
76     { [0 ... PAT_TYPE_NUMS-1] = INVALID_MEM_TYPE };
77 
is_var_mtrr_overlapped(const struct mtrr_state * m)78 bool_t is_var_mtrr_overlapped(const struct mtrr_state *m)
79 {
80     unsigned int seg, i;
81     unsigned int num_var_ranges = (uint8_t)m->mtrr_cap;
82 
83     for ( i = 0; i < num_var_ranges; i++ )
84     {
85         uint64_t base1 = m->var_ranges[i].base >> PAGE_SHIFT;
86         uint64_t mask1 = m->var_ranges[i].mask >> PAGE_SHIFT;
87 
88         if ( !(m->var_ranges[i].mask & MTRR_PHYSMASK_VALID) )
89             continue;
90 
91         for ( seg = i + 1; seg < num_var_ranges; seg ++ )
92         {
93             uint64_t base2 = m->var_ranges[seg].base >> PAGE_SHIFT;
94             uint64_t mask2 = m->var_ranges[seg].mask >> PAGE_SHIFT;
95 
96             if ( !(m->var_ranges[seg].mask & MTRR_PHYSMASK_VALID) )
97                 continue;
98 
99             if ( (base1 & mask1 & mask2) == (base2 & mask2 & mask1) )
100             {
101                 /* MTRR is overlapped. */
102                 return 1;
103             }
104         }
105     }
106     return 0;
107 }
108 
hvm_mtrr_pat_init(void)109 static int __init hvm_mtrr_pat_init(void)
110 {
111     unsigned int i, j;
112 
113     for ( i = 0; i < MTRR_NUM_TYPES; i++ )
114     {
115         for ( j = 0; j < PAT_TYPE_NUMS; j++ )
116         {
117             unsigned int tmp = mm_type_tbl[i][j];
118 
119             if ( tmp < MEMORY_NUM_TYPES )
120                 mtrr_epat_tbl[i][tmp] = j;
121         }
122     }
123 
124     for ( i = 0; i < PAT_TYPE_NUMS; i++ )
125     {
126         for ( j = 0; j < PAT_TYPE_NUMS; j++ )
127         {
128             if ( pat_cr_2_paf(host_pat, j) == i )
129             {
130                 pat_entry_tbl[i] = j;
131                 break;
132             }
133         }
134     }
135 
136     return 0;
137 }
138 __initcall(hvm_mtrr_pat_init);
139 
pat_type_2_pte_flags(uint8_t pat_type)140 uint8_t pat_type_2_pte_flags(uint8_t pat_type)
141 {
142     unsigned int pat_entry = pat_entry_tbl[pat_type];
143 
144     /*
145      * INVALID_MEM_TYPE, means doesn't find the pat_entry in host PAT for a
146      * given pat_type. If host PAT covers all the PAT types, it can't happen.
147      */
148     if ( unlikely(pat_entry == INVALID_MEM_TYPE) )
149         pat_entry = pat_entry_tbl[PAT_TYPE_UNCACHABLE];
150 
151     return pat_entry_2_pte_flags[pat_entry];
152 }
153 
hvm_vcpu_cacheattr_init(struct vcpu * v)154 int hvm_vcpu_cacheattr_init(struct vcpu *v)
155 {
156     struct mtrr_state *m = &v->arch.hvm_vcpu.mtrr;
157 
158     memset(m, 0, sizeof(*m));
159 
160     m->var_ranges = xzalloc_array(struct mtrr_var_range, MTRR_VCNT);
161     if ( m->var_ranges == NULL )
162         return -ENOMEM;
163 
164     m->mtrr_cap = (1u << 10) | (1u << 8) | MTRR_VCNT;
165 
166     v->arch.hvm_vcpu.pat_cr =
167         ((uint64_t)PAT_TYPE_WRBACK) |               /* PAT0: WB */
168         ((uint64_t)PAT_TYPE_WRTHROUGH << 8) |       /* PAT1: WT */
169         ((uint64_t)PAT_TYPE_UC_MINUS << 16) |       /* PAT2: UC- */
170         ((uint64_t)PAT_TYPE_UNCACHABLE << 24) |     /* PAT3: UC */
171         ((uint64_t)PAT_TYPE_WRBACK << 32) |         /* PAT4: WB */
172         ((uint64_t)PAT_TYPE_WRTHROUGH << 40) |      /* PAT5: WT */
173         ((uint64_t)PAT_TYPE_UC_MINUS << 48) |       /* PAT6: UC- */
174         ((uint64_t)PAT_TYPE_UNCACHABLE << 56);      /* PAT7: UC */
175 
176     return 0;
177 }
178 
hvm_vcpu_cacheattr_destroy(struct vcpu * v)179 void hvm_vcpu_cacheattr_destroy(struct vcpu *v)
180 {
181     xfree(v->arch.hvm_vcpu.mtrr.var_ranges);
182 }
183 
184 /*
185  * Get MTRR memory type for physical address pa.
186  *
187  * May return a negative value when order > 0, indicating to the caller
188  * that the respective mapping needs splitting.
189  */
get_mtrr_type(const struct mtrr_state * m,paddr_t pa,unsigned int order)190 static int get_mtrr_type(const struct mtrr_state *m,
191                          paddr_t pa, unsigned int order)
192 {
193    uint8_t     overlap_mtrr = 0;
194    uint8_t     overlap_mtrr_pos = 0;
195    uint64_t    mask = -(uint64_t)PAGE_SIZE << order;
196    unsigned int seg, num_var_ranges = m->mtrr_cap & 0xff;
197 
198    if ( unlikely(!(m->enabled & 0x2)) )
199        return MTRR_TYPE_UNCACHABLE;
200 
201    pa &= mask;
202    if ( (pa < 0x100000) && (m->enabled & 1) )
203    {
204        /* Fixed range MTRR takes effect. */
205        uint32_t addr = (uint32_t)pa, index;
206 
207        if ( addr < 0x80000 )
208        {
209            /* 0x00000 ... 0x7FFFF in 64k steps */
210            if ( order > 4 )
211                return -1;
212            seg = (addr >> 16);
213            return m->fixed_ranges[seg];
214        }
215        else if ( addr < 0xc0000 )
216        {
217            /* 0x80000 ... 0xBFFFF in 16k steps */
218            if ( order > 2 )
219                return -1;
220            seg = (addr - 0x80000) >> 14;
221            index = (seg >> 3) + 1;
222            seg &= 7;            /* select 0-7 segments */
223            return m->fixed_ranges[index*8 + seg];
224        }
225        else
226        {
227            /* 0xC0000 ... 0xFFFFF in 4k steps */
228            if ( order )
229                return -1;
230            seg = (addr - 0xc0000) >> 12;
231            index = (seg >> 3) + 3;
232            seg &= 7;            /* select 0-7 segments */
233            return m->fixed_ranges[index*8 + seg];
234        }
235    }
236 
237    /* Match with variable MTRRs. */
238    for ( seg = 0; seg < num_var_ranges; seg++ )
239    {
240        uint64_t phys_base = m->var_ranges[seg].base;
241        uint64_t phys_mask = m->var_ranges[seg].mask;
242 
243        if ( phys_mask & MTRR_PHYSMASK_VALID )
244        {
245            phys_mask &= mask;
246            if ( (pa & phys_mask) == (phys_base & phys_mask) )
247            {
248                if ( unlikely(m->overlapped) || order )
249                {
250                     overlap_mtrr |= 1 << (phys_base & MTRR_PHYSBASE_TYPE_MASK);
251                     overlap_mtrr_pos = phys_base & MTRR_PHYSBASE_TYPE_MASK;
252                }
253                else
254                {
255                    /* If no overlap, return the found one */
256                    return (phys_base & MTRR_PHYSBASE_TYPE_MASK);
257                }
258            }
259        }
260    }
261 
262    /* Not found? */
263    if ( unlikely(overlap_mtrr == 0) )
264        return m->def_type;
265 
266    /* One match, or multiple identical ones? */
267    if ( likely(overlap_mtrr == (1 << overlap_mtrr_pos)) )
268        return overlap_mtrr_pos;
269 
270    if ( order )
271        return -1;
272 
273    /* Two or more matches, one being UC? */
274    if ( overlap_mtrr & (1 << MTRR_TYPE_UNCACHABLE) )
275        return MTRR_TYPE_UNCACHABLE;
276 
277    /* Two or more matches, all of them WT and WB? */
278    if ( overlap_mtrr ==
279         ((1 << MTRR_TYPE_WRTHROUGH) | (1 << MTRR_TYPE_WRBACK)) )
280        return MTRR_TYPE_WRTHROUGH;
281 
282    /* Behaviour is undefined, but return the last overlapped type. */
283    return overlap_mtrr_pos;
284 }
285 
286 /*
287  * return the memory type from PAT.
288  * NOTE: valid only when paging is enabled.
289  *       Only 4K page PTE is supported now.
290  */
page_pat_type(uint64_t pat_cr,uint32_t pte_flags)291 static uint8_t page_pat_type(uint64_t pat_cr, uint32_t pte_flags)
292 {
293     int32_t pat_entry;
294 
295     /* PCD/PWT -> bit 1/0 of PAT entry */
296     pat_entry = ( pte_flags >> 3 ) & 0x3;
297     /* PAT bits as bit 2 of PAT entry */
298     if ( pte_flags & _PAGE_PAT )
299         pat_entry |= 4;
300 
301     return (uint8_t)pat_cr_2_paf(pat_cr, pat_entry);
302 }
303 
304 /*
305  * Effective memory type for leaf page.
306  */
effective_mm_type(struct mtrr_state * m,uint64_t pat,paddr_t gpa,uint32_t pte_flags,uint8_t gmtrr_mtype)307 static uint8_t effective_mm_type(struct mtrr_state *m,
308                                  uint64_t pat,
309                                  paddr_t gpa,
310                                  uint32_t pte_flags,
311                                  uint8_t gmtrr_mtype)
312 {
313     uint8_t mtrr_mtype, pat_value, effective;
314 
315     /* if get_pat_flags() gives a dedicated MTRR type,
316      * just use it
317      */
318     if ( gmtrr_mtype == NO_HARDCODE_MEM_TYPE )
319         mtrr_mtype = get_mtrr_type(m, gpa, 0);
320     else
321         mtrr_mtype = gmtrr_mtype;
322 
323     pat_value = page_pat_type(pat, pte_flags);
324 
325     effective = mm_type_tbl[mtrr_mtype][pat_value];
326 
327     return effective;
328 }
329 
get_pat_flags(struct vcpu * v,uint32_t gl1e_flags,paddr_t gpaddr,paddr_t spaddr,uint8_t gmtrr_mtype)330 uint32_t get_pat_flags(struct vcpu *v,
331                        uint32_t gl1e_flags,
332                        paddr_t gpaddr,
333                        paddr_t spaddr,
334                        uint8_t gmtrr_mtype)
335 {
336     uint8_t guest_eff_mm_type;
337     uint8_t shadow_mtrr_type;
338     uint8_t pat_entry_value;
339     uint64_t pat = v->arch.hvm_vcpu.pat_cr;
340     struct mtrr_state *g = &v->arch.hvm_vcpu.mtrr;
341 
342     /* 1. Get the effective memory type of guest physical address,
343      * with the pair of guest MTRR and PAT
344      */
345     guest_eff_mm_type = effective_mm_type(g, pat, gpaddr,
346                                           gl1e_flags, gmtrr_mtype);
347     /* 2. Get the memory type of host physical address, with MTRR */
348     shadow_mtrr_type = get_mtrr_type(&mtrr_state, spaddr, 0);
349 
350     /* 3. Find the memory type in PAT, with host MTRR memory type
351      * and guest effective memory type.
352      */
353     pat_entry_value = mtrr_epat_tbl[shadow_mtrr_type][guest_eff_mm_type];
354     /* If conflit occurs(e.g host MTRR is UC, guest memory type is
355      * WB),set UC as effective memory. Here, returning PAT_TYPE_UNCACHABLE will
356      * always set effective memory as UC.
357      */
358     if ( pat_entry_value == INVALID_MEM_TYPE )
359     {
360         struct domain *d = v->domain;
361         p2m_type_t p2mt;
362         get_gfn_query_unlocked(d, paddr_to_pfn(gpaddr), &p2mt);
363         if (p2m_is_ram(p2mt))
364             gdprintk(XENLOG_WARNING,
365                     "Conflict occurs for a given guest l1e flags:%x "
366                     "at %"PRIx64" (the effective mm type:%d), "
367                     "because the host mtrr type is:%d\n",
368                     gl1e_flags, (uint64_t)gpaddr, guest_eff_mm_type,
369                     shadow_mtrr_type);
370         pat_entry_value = PAT_TYPE_UNCACHABLE;
371     }
372     /* 4. Get the pte flags */
373     return pat_type_2_pte_flags(pat_entry_value);
374 }
375 
valid_mtrr_type(uint8_t type)376 static inline bool_t valid_mtrr_type(uint8_t type)
377 {
378     switch ( type )
379     {
380     case MTRR_TYPE_UNCACHABLE:
381     case MTRR_TYPE_WRBACK:
382     case MTRR_TYPE_WRCOMB:
383     case MTRR_TYPE_WRPROT:
384     case MTRR_TYPE_WRTHROUGH:
385         return 1;
386     }
387     return 0;
388 }
389 
mtrr_def_type_msr_set(struct domain * d,struct mtrr_state * m,uint64_t msr_content)390 bool_t mtrr_def_type_msr_set(struct domain *d, struct mtrr_state *m,
391                              uint64_t msr_content)
392 {
393     uint8_t def_type = msr_content & 0xff;
394     uint8_t enabled = (msr_content >> 10) & 0x3;
395 
396     if ( unlikely(!valid_mtrr_type(def_type)) )
397     {
398          HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid MTRR def type:%x\n", def_type);
399          return 0;
400     }
401 
402     if ( unlikely(msr_content && (msr_content & ~0xcffUL)) )
403     {
404          HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid msr content:%"PRIx64"\n",
405                      msr_content);
406          return 0;
407     }
408 
409     if ( m->enabled != enabled || m->def_type != def_type )
410     {
411         m->enabled = enabled;
412         m->def_type = def_type;
413         memory_type_changed(d);
414     }
415 
416     return 1;
417 }
418 
mtrr_fix_range_msr_set(struct domain * d,struct mtrr_state * m,uint32_t row,uint64_t msr_content)419 bool_t mtrr_fix_range_msr_set(struct domain *d, struct mtrr_state *m,
420                               uint32_t row, uint64_t msr_content)
421 {
422     uint64_t *fixed_range_base = (uint64_t *)m->fixed_ranges;
423 
424     if ( fixed_range_base[row] != msr_content )
425     {
426         uint8_t *range = (uint8_t*)&msr_content;
427         unsigned int i;
428 
429         for ( i = 0; i < 8; i++ )
430             if ( unlikely(!valid_mtrr_type(range[i])) )
431                 return 0;
432 
433         fixed_range_base[row] = msr_content;
434         memory_type_changed(d);
435     }
436 
437     return 1;
438 }
439 
mtrr_var_range_msr_set(struct domain * d,struct mtrr_state * m,uint32_t msr,uint64_t msr_content)440 bool_t mtrr_var_range_msr_set(
441     struct domain *d, struct mtrr_state *m, uint32_t msr, uint64_t msr_content)
442 {
443     uint32_t index, phys_addr;
444     uint64_t msr_mask;
445     uint64_t *var_range_base = (uint64_t*)m->var_ranges;
446 
447     index = msr - MSR_IA32_MTRR_PHYSBASE(0);
448     if ( var_range_base[index] == msr_content )
449         return 1;
450 
451     if ( unlikely(!valid_mtrr_type((uint8_t)msr_content)) )
452         return 0;
453 
454     if ( d == current->domain )
455         phys_addr = d->arch.cpuid->extd.maxphysaddr;
456     else
457         phys_addr = paddr_bits;
458     msr_mask = ~((((uint64_t)1) << phys_addr) - 1);
459     msr_mask |= (index & 1) ? 0x7ffUL : 0xf00UL;
460     if ( unlikely(msr_content & msr_mask) )
461     {
462         HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid msr content:%"PRIx64"\n",
463                     msr_content);
464         return 0;
465     }
466 
467     var_range_base[index] = msr_content;
468 
469     m->overlapped = is_var_mtrr_overlapped(m);
470 
471     memory_type_changed(d);
472 
473     return 1;
474 }
475 
mtrr_pat_not_equal(struct vcpu * vd,struct vcpu * vs)476 bool_t mtrr_pat_not_equal(struct vcpu *vd, struct vcpu *vs)
477 {
478     struct mtrr_state *md = &vd->arch.hvm_vcpu.mtrr;
479     struct mtrr_state *ms = &vs->arch.hvm_vcpu.mtrr;
480     int32_t res;
481     uint8_t num_var_ranges = (uint8_t)md->mtrr_cap;
482 
483     /* Test fixed ranges. */
484     res = memcmp(md->fixed_ranges, ms->fixed_ranges,
485             NUM_FIXED_RANGES*sizeof(mtrr_type));
486     if ( res )
487         return 1;
488 
489     /* Test var ranges. */
490     res = memcmp(md->var_ranges, ms->var_ranges,
491             num_var_ranges*sizeof(struct mtrr_var_range));
492     if ( res )
493         return 1;
494 
495     /* Test default type MSR. */
496     if ( (md->def_type != ms->def_type)
497             && (md->enabled != ms->enabled) )
498         return 1;
499 
500     /* Test PAT. */
501     if ( vd->arch.hvm_vcpu.pat_cr != vs->arch.hvm_vcpu.pat_cr )
502         return 1;
503 
504     return 0;
505 }
506 
507 struct hvm_mem_pinned_cacheattr_range {
508     struct list_head list;
509     uint64_t start, end;
510     uint32_t type;
511     struct rcu_head rcu;
512 };
513 
514 static DEFINE_RCU_READ_LOCK(pinned_cacheattr_rcu_lock);
515 
hvm_init_cacheattr_region_list(struct domain * d)516 void hvm_init_cacheattr_region_list(struct domain *d)
517 {
518     INIT_LIST_HEAD(&d->arch.hvm_domain.pinned_cacheattr_ranges);
519 }
520 
hvm_destroy_cacheattr_region_list(struct domain * d)521 void hvm_destroy_cacheattr_region_list(struct domain *d)
522 {
523     struct list_head *head = &d->arch.hvm_domain.pinned_cacheattr_ranges;
524     struct hvm_mem_pinned_cacheattr_range *range;
525 
526     while ( !list_empty(head) )
527     {
528         range = list_entry(head->next,
529                            struct hvm_mem_pinned_cacheattr_range,
530                            list);
531         list_del(&range->list);
532         xfree(range);
533     }
534 }
535 
hvm_get_mem_pinned_cacheattr(struct domain * d,gfn_t gfn,unsigned int order)536 int hvm_get_mem_pinned_cacheattr(struct domain *d, gfn_t gfn,
537                                  unsigned int order)
538 {
539     struct hvm_mem_pinned_cacheattr_range *range;
540     uint64_t mask = ~(uint64_t)0 << order;
541     int rc = -ENXIO;
542 
543     ASSERT(is_hvm_domain(d));
544 
545     rcu_read_lock(&pinned_cacheattr_rcu_lock);
546     list_for_each_entry_rcu ( range,
547                               &d->arch.hvm_domain.pinned_cacheattr_ranges,
548                               list )
549     {
550         if ( ((gfn_x(gfn) & mask) >= range->start) &&
551              ((gfn_x(gfn) | ~mask) <= range->end) )
552         {
553             rc = range->type;
554             break;
555         }
556         if ( ((gfn_x(gfn) & mask) <= range->end) &&
557              ((gfn_x(gfn) | ~mask) >= range->start) )
558         {
559             rc = -EADDRNOTAVAIL;
560             break;
561         }
562     }
563     rcu_read_unlock(&pinned_cacheattr_rcu_lock);
564 
565     return rc;
566 }
567 
free_pinned_cacheattr_entry(struct rcu_head * rcu)568 static void free_pinned_cacheattr_entry(struct rcu_head *rcu)
569 {
570     xfree(container_of(rcu, struct hvm_mem_pinned_cacheattr_range, rcu));
571 }
572 
hvm_set_mem_pinned_cacheattr(struct domain * d,uint64_t gfn_start,uint64_t gfn_end,uint32_t type)573 int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
574                                  uint64_t gfn_end, uint32_t type)
575 {
576     struct hvm_mem_pinned_cacheattr_range *range;
577     int rc = 1;
578 
579     if ( !is_hvm_domain(d) )
580         return -EOPNOTSUPP;
581 
582     if ( gfn_end < gfn_start || (gfn_start | gfn_end) >> paddr_bits )
583         return -EINVAL;
584 
585     switch ( type )
586     {
587     case XEN_DOMCTL_DELETE_MEM_CACHEATTR:
588         /* Remove the requested range. */
589         rcu_read_lock(&pinned_cacheattr_rcu_lock);
590         list_for_each_entry_rcu ( range,
591                                   &d->arch.hvm_domain.pinned_cacheattr_ranges,
592                                   list )
593             if ( range->start == gfn_start && range->end == gfn_end )
594             {
595                 rcu_read_unlock(&pinned_cacheattr_rcu_lock);
596                 list_del_rcu(&range->list);
597                 type = range->type;
598                 call_rcu(&range->rcu, free_pinned_cacheattr_entry);
599                 p2m_memory_type_changed(d);
600                 switch ( type )
601                 {
602                 case PAT_TYPE_UC_MINUS:
603                     /*
604                      * For EPT we can also avoid the flush in this case;
605                      * see epte_get_entry_emt().
606                      */
607                     if ( hap_enabled(d) && cpu_has_vmx )
608                 case PAT_TYPE_UNCACHABLE:
609                         break;
610                     /* fall through */
611                 default:
612                     flush_all(FLUSH_CACHE);
613                     break;
614                 }
615                 return 0;
616             }
617         rcu_read_unlock(&pinned_cacheattr_rcu_lock);
618         return -ENOENT;
619 
620     case PAT_TYPE_UC_MINUS:
621     case PAT_TYPE_UNCACHABLE:
622     case PAT_TYPE_WRBACK:
623     case PAT_TYPE_WRCOMB:
624     case PAT_TYPE_WRPROT:
625     case PAT_TYPE_WRTHROUGH:
626         break;
627 
628     default:
629         return -EINVAL;
630     }
631 
632     rcu_read_lock(&pinned_cacheattr_rcu_lock);
633     list_for_each_entry_rcu ( range,
634                               &d->arch.hvm_domain.pinned_cacheattr_ranges,
635                               list )
636     {
637         if ( range->start == gfn_start && range->end == gfn_end )
638         {
639             range->type = type;
640             rc = 0;
641             break;
642         }
643         if ( range->start <= gfn_end && gfn_start <= range->end )
644         {
645             rc = -EBUSY;
646             break;
647         }
648     }
649     rcu_read_unlock(&pinned_cacheattr_rcu_lock);
650     if ( rc <= 0 )
651         return rc;
652 
653     range = xzalloc(struct hvm_mem_pinned_cacheattr_range);
654     if ( range == NULL )
655         return -ENOMEM;
656 
657     range->start = gfn_start;
658     range->end = gfn_end;
659     range->type = type;
660 
661     list_add_rcu(&range->list, &d->arch.hvm_domain.pinned_cacheattr_ranges);
662     p2m_memory_type_changed(d);
663     if ( type != PAT_TYPE_WRBACK )
664         flush_all(FLUSH_CACHE);
665 
666     return 0;
667 }
668 
hvm_save_mtrr_msr(struct domain * d,hvm_domain_context_t * h)669 static int hvm_save_mtrr_msr(struct domain *d, hvm_domain_context_t *h)
670 {
671     int i;
672     struct vcpu *v;
673     struct hvm_hw_mtrr hw_mtrr;
674     struct mtrr_state *mtrr_state;
675     /* save mtrr&pat */
676     for_each_vcpu(d, v)
677     {
678         mtrr_state = &v->arch.hvm_vcpu.mtrr;
679 
680         hvm_get_guest_pat(v, &hw_mtrr.msr_pat_cr);
681 
682         hw_mtrr.msr_mtrr_def_type = mtrr_state->def_type
683                                 | (mtrr_state->enabled << 10);
684         hw_mtrr.msr_mtrr_cap = mtrr_state->mtrr_cap;
685 
686         for ( i = 0; i < MTRR_VCNT; i++ )
687         {
688             /* save physbase */
689             hw_mtrr.msr_mtrr_var[i*2] =
690                 ((uint64_t*)mtrr_state->var_ranges)[i*2];
691             /* save physmask */
692             hw_mtrr.msr_mtrr_var[i*2+1] =
693                 ((uint64_t*)mtrr_state->var_ranges)[i*2+1];
694         }
695 
696         for ( i = 0; i < NUM_FIXED_MSR; i++ )
697             hw_mtrr.msr_mtrr_fixed[i] =
698                 ((uint64_t*)mtrr_state->fixed_ranges)[i];
699 
700         if ( hvm_save_entry(MTRR, v->vcpu_id, h, &hw_mtrr) != 0 )
701             return 1;
702     }
703     return 0;
704 }
705 
hvm_load_mtrr_msr(struct domain * d,hvm_domain_context_t * h)706 static int hvm_load_mtrr_msr(struct domain *d, hvm_domain_context_t *h)
707 {
708     int vcpuid, i;
709     struct vcpu *v;
710     struct mtrr_state *mtrr_state;
711     struct hvm_hw_mtrr hw_mtrr;
712 
713     vcpuid = hvm_load_instance(h);
714     if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
715     {
716         dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n",
717                 d->domain_id, vcpuid);
718         return -EINVAL;
719     }
720 
721     if ( hvm_load_entry(MTRR, h, &hw_mtrr) != 0 )
722         return -EINVAL;
723 
724     mtrr_state = &v->arch.hvm_vcpu.mtrr;
725 
726     hvm_set_guest_pat(v, hw_mtrr.msr_pat_cr);
727 
728     mtrr_state->mtrr_cap = hw_mtrr.msr_mtrr_cap;
729 
730     for ( i = 0; i < NUM_FIXED_MSR; i++ )
731         mtrr_fix_range_msr_set(d, mtrr_state, i, hw_mtrr.msr_mtrr_fixed[i]);
732 
733     for ( i = 0; i < MTRR_VCNT; i++ )
734     {
735         mtrr_var_range_msr_set(d, mtrr_state,
736                                MSR_IA32_MTRR_PHYSBASE(i),
737                                hw_mtrr.msr_mtrr_var[i * 2]);
738         mtrr_var_range_msr_set(d, mtrr_state,
739                                MSR_IA32_MTRR_PHYSMASK(i),
740                                hw_mtrr.msr_mtrr_var[i * 2 + 1]);
741     }
742 
743     mtrr_def_type_msr_set(d, mtrr_state, hw_mtrr.msr_mtrr_def_type);
744 
745     return 0;
746 }
747 
748 HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save_mtrr_msr, hvm_load_mtrr_msr,
749                           1, HVMSR_PER_VCPU);
750 
memory_type_changed(struct domain * d)751 void memory_type_changed(struct domain *d)
752 {
753     if ( need_iommu(d) && d->vcpu && d->vcpu[0] )
754     {
755         p2m_memory_type_changed(d);
756         flush_all(FLUSH_CACHE);
757     }
758 }
759 
epte_get_entry_emt(struct domain * d,unsigned long gfn,mfn_t mfn,unsigned int order,uint8_t * ipat,bool_t direct_mmio)760 int epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn,
761                        unsigned int order, uint8_t *ipat, bool_t direct_mmio)
762 {
763     int gmtrr_mtype, hmtrr_mtype;
764     struct vcpu *v = current;
765 
766     *ipat = 0;
767 
768     if ( v->domain != d )
769         v = d->vcpu ? d->vcpu[0] : NULL;
770 
771     /* Mask, not add, for order so it works with INVALID_MFN on unmapping */
772     if ( rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn),
773                                  mfn_x(mfn) | ((1UL << order) - 1)) )
774     {
775         if ( !order || rangeset_contains_range(mmio_ro_ranges, mfn_x(mfn),
776                                                mfn_x(mfn) | ((1UL << order) - 1)) )
777         {
778             *ipat = 1;
779             return MTRR_TYPE_UNCACHABLE;
780         }
781         /* Force invalid memory type so resolve_misconfig() will split it */
782         return -1;
783     }
784 
785     if ( direct_mmio )
786     {
787         if ( (mfn_x(mfn) ^ d->arch.hvm_domain.vmx.apic_access_mfn) >> order )
788             return MTRR_TYPE_UNCACHABLE;
789         if ( order )
790             return -1;
791         *ipat = 1;
792         return MTRR_TYPE_WRBACK;
793     }
794 
795     if ( !mfn_valid(mfn) )
796     {
797         *ipat = 1;
798         return MTRR_TYPE_UNCACHABLE;
799     }
800 
801     if ( !need_iommu(d) && !cache_flush_permitted(d) )
802     {
803         *ipat = 1;
804         return MTRR_TYPE_WRBACK;
805     }
806 
807     gmtrr_mtype = hvm_get_mem_pinned_cacheattr(d, _gfn(gfn), order);
808     if ( gmtrr_mtype >= 0 )
809     {
810         *ipat = 1;
811         return gmtrr_mtype != PAT_TYPE_UC_MINUS ? gmtrr_mtype
812                                                 : MTRR_TYPE_UNCACHABLE;
813     }
814     if ( gmtrr_mtype == -EADDRNOTAVAIL )
815         return -1;
816 
817     gmtrr_mtype = is_hvm_domain(d) && v ?
818                   get_mtrr_type(&v->arch.hvm_vcpu.mtrr,
819                                 gfn << PAGE_SHIFT, order) :
820                   MTRR_TYPE_WRBACK;
821     hmtrr_mtype = get_mtrr_type(&mtrr_state, mfn_x(mfn) << PAGE_SHIFT, order);
822     if ( gmtrr_mtype < 0 || hmtrr_mtype < 0 )
823         return -1;
824 
825     /* If both types match we're fine. */
826     if ( likely(gmtrr_mtype == hmtrr_mtype) )
827         return hmtrr_mtype;
828 
829     /* If either type is UC, we have to go with that one. */
830     if ( gmtrr_mtype == MTRR_TYPE_UNCACHABLE ||
831          hmtrr_mtype == MTRR_TYPE_UNCACHABLE )
832         return MTRR_TYPE_UNCACHABLE;
833 
834     /* If either type is WB, we have to go with the other one. */
835     if ( gmtrr_mtype == MTRR_TYPE_WRBACK )
836         return hmtrr_mtype;
837     if ( hmtrr_mtype == MTRR_TYPE_WRBACK )
838         return gmtrr_mtype;
839 
840     /*
841      * At this point we have disagreeing WC, WT, or WP types. The only
842      * combination that can be cleanly resolved is WT:WP. The ones involving
843      * WC need to be converted to UC, both due to the memory ordering
844      * differences and because WC disallows reads to be cached (WT and WP
845      * permit this), while WT and WP require writes to go straight to memory
846      * (WC can buffer them).
847      */
848     if ( (gmtrr_mtype == MTRR_TYPE_WRTHROUGH &&
849           hmtrr_mtype == MTRR_TYPE_WRPROT) ||
850          (gmtrr_mtype == MTRR_TYPE_WRPROT &&
851           hmtrr_mtype == MTRR_TYPE_WRTHROUGH) )
852         return MTRR_TYPE_WRPROT;
853 
854     return MTRR_TYPE_UNCACHABLE;
855 }
856