1 /******************************************************************************
2  * arch/x86/mm/p2m-pt.c
3  *
4  * Implementation of p2m datastructures as pagetables, for use by
5  * NPT and shadow-pagetable code
6  *
7  * Parts of this code are Copyright (c) 2009-2011 by Citrix Systems, Inc.
8  * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
9  * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
10  * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
11  * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
12  *
13  * This program is free software; you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation; either version 2 of the License, or
16  * (at your option) any later version.
17  *
18  * This program is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with this program; If not, see <http://www.gnu.org/licenses/>.
25  */
26 
27 #include <xen/iommu.h>
28 #include <xen/vm_event.h>
29 #include <xen/event.h>
30 #include <xen/trace.h>
31 #include <public/vm_event.h>
32 #include <asm/domain.h>
33 #include <asm/page.h>
34 #include <asm/paging.h>
35 #include <asm/p2m.h>
36 #include <asm/mem_sharing.h>
37 #include <asm/hvm/nestedhvm.h>
38 #include <asm/hvm/svm/amd-iommu-proto.h>
39 
40 #include "mm-locks.h"
41 
42 /* Override macros from asm/page.h to make them work with mfn_t */
43 #undef mfn_to_page
44 #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
45 #undef page_to_mfn
46 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
47 
48 /*
49  * We may store INVALID_MFN in PTEs.  We need to clip this to avoid trampling
50  * over higher-order bits (NX, p2m type, IOMMU flags).  We seem to not need
51  * to unclip on the read path, as callers are concerned only with p2m type in
52  * such cases.
53  */
54 #define p2m_l1e_from_pfn(pfn, flags)    \
55     l1e_from_pfn((pfn) & (PADDR_MASK >> PAGE_SHIFT), (flags))
56 #define p2m_l2e_from_pfn(pfn, flags)    \
57     l2e_from_pfn((pfn) & ((PADDR_MASK & ~(_PAGE_PSE_PAT | 0UL)) \
58                           >> PAGE_SHIFT), (flags) | _PAGE_PSE)
59 #define p2m_l3e_from_pfn(pfn, flags)    \
60     l3e_from_pfn((pfn) & ((PADDR_MASK & ~(_PAGE_PSE_PAT | 0UL)) \
61                           >> PAGE_SHIFT), (flags) | _PAGE_PSE)
62 
63 /* PTE flags for the various types of p2m entry */
64 #define P2M_BASE_FLAGS \
65         (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
66 
67 #define RECALC_FLAGS (_PAGE_USER|_PAGE_ACCESSED)
68 #define set_recalc(level, ent) level##e_remove_flags(ent, RECALC_FLAGS)
69 #define clear_recalc(level, ent) level##e_add_flags(ent, RECALC_FLAGS)
70 #define _needs_recalc(flags) (!((flags) & _PAGE_USER))
71 #define needs_recalc(level, ent) _needs_recalc(level##e_get_flags(ent))
72 #define valid_recalc(level, ent) (!(level##e_get_flags(ent) & _PAGE_ACCESSED))
73 
p2m_type_to_flags(const struct p2m_domain * p2m,p2m_type_t t,mfn_t mfn,unsigned int level)74 static unsigned long p2m_type_to_flags(const struct p2m_domain *p2m,
75                                        p2m_type_t t,
76                                        mfn_t mfn,
77                                        unsigned int level)
78 {
79     unsigned long flags;
80     /*
81      * AMD IOMMU: When we share p2m table with iommu, bit 9 - bit 11 will be
82      * used for iommu hardware to encode next io page level. Bit 59 - bit 62
83      * are used for iommu flags, We could not use these bits to store p2m types.
84      */
85     flags = (unsigned long)(t & 0x7f) << 12;
86 
87     switch(t)
88     {
89     case p2m_invalid:
90     case p2m_mmio_dm:
91     case p2m_populate_on_demand:
92     case p2m_ram_paging_out:
93     case p2m_ram_paged:
94     case p2m_ram_paging_in:
95     default:
96         return flags | _PAGE_NX_BIT;
97     case p2m_grant_map_ro:
98         return flags | P2M_BASE_FLAGS | _PAGE_NX_BIT;
99     case p2m_ioreq_server:
100         flags |= P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT;
101         if ( p2m->ioreq.flags & XEN_DMOP_IOREQ_MEM_ACCESS_WRITE )
102             return flags & ~_PAGE_RW;
103         return flags;
104     case p2m_ram_ro:
105     case p2m_ram_logdirty:
106     case p2m_ram_shared:
107         return flags | P2M_BASE_FLAGS;
108     case p2m_ram_rw:
109         return flags | P2M_BASE_FLAGS | _PAGE_RW;
110     case p2m_grant_map_rw:
111     case p2m_map_foreign:
112         return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT;
113     case p2m_mmio_direct:
114         if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) )
115             flags |= _PAGE_RW;
116         else
117         {
118             flags |= _PAGE_PWT;
119             ASSERT(!level);
120         }
121         return flags | P2M_BASE_FLAGS | _PAGE_PCD;
122     }
123 }
124 
125 
126 // Find the next level's P2M entry, checking for out-of-range gfn's...
127 // Returns NULL on error.
128 //
129 static l1_pgentry_t *
p2m_find_entry(void * table,unsigned long * gfn_remainder,unsigned long gfn,uint32_t shift,uint32_t max)130 p2m_find_entry(void *table, unsigned long *gfn_remainder,
131                    unsigned long gfn, uint32_t shift, uint32_t max)
132 {
133     u32 index;
134 
135     index = *gfn_remainder >> shift;
136     if ( index >= max )
137     {
138         P2M_DEBUG("gfn=%#lx out of range "
139                   "(gfn_remainder=%#lx shift=%d index=%#x max=%#x)\n",
140                   gfn, *gfn_remainder, shift, index, max);
141         return NULL;
142     }
143     *gfn_remainder &= (1 << shift) - 1;
144     return (l1_pgentry_t *)table + index;
145 }
146 
147 /* Free intermediate tables from a p2m sub-tree */
148 static void
p2m_free_entry(struct p2m_domain * p2m,l1_pgentry_t * p2m_entry,int page_order)149 p2m_free_entry(struct p2m_domain *p2m, l1_pgentry_t *p2m_entry, int page_order)
150 {
151     /* End if the entry is a leaf entry. */
152     if ( page_order == PAGE_ORDER_4K
153          || !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT)
154          || (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
155         return;
156 
157     if ( page_order > PAGE_ORDER_2M )
158     {
159         l1_pgentry_t *l3_table = map_domain_page(l1e_get_mfn(*p2m_entry));
160 
161         for ( int i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
162             p2m_free_entry(p2m, l3_table + i, page_order - 9);
163         unmap_domain_page(l3_table);
164     }
165 
166     p2m_free_ptp(p2m, l1e_get_page(*p2m_entry));
167 }
168 
169 // Walk one level of the P2M table, allocating a new table if required.
170 // Returns 0 on error.
171 //
172 
173 /* AMD IOMMU: Convert next level bits and r/w bits into 24 bits p2m flags */
174 #define iommu_nlevel_to_flags(nl, f) ((((nl) & 0x7) << 9 )|(((f) & 0x3) << 21))
175 
p2m_add_iommu_flags(l1_pgentry_t * p2m_entry,unsigned int nlevel,unsigned int flags)176 static void p2m_add_iommu_flags(l1_pgentry_t *p2m_entry,
177                                 unsigned int nlevel, unsigned int flags)
178 {
179     if ( iommu_hap_pt_share )
180         l1e_add_flags(*p2m_entry, iommu_nlevel_to_flags(nlevel, flags));
181 }
182 
183 /* Returns: 0 for success, -errno for failure */
184 static int
p2m_next_level(struct p2m_domain * p2m,void ** table,unsigned long * gfn_remainder,unsigned long gfn,u32 shift,u32 max,unsigned int level,bool_t unmap)185 p2m_next_level(struct p2m_domain *p2m, void **table,
186                unsigned long *gfn_remainder, unsigned long gfn, u32 shift,
187                u32 max, unsigned int level, bool_t unmap)
188 {
189     l1_pgentry_t *p2m_entry, new_entry;
190     void *next;
191     unsigned int flags;
192 
193     if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
194                                       shift, max)) )
195         return -ENOENT;
196 
197     flags = l1e_get_flags(*p2m_entry);
198 
199     /* PoD/paging: Not present doesn't imply empty. */
200     if ( !flags )
201     {
202         mfn_t mfn = p2m_alloc_ptp(p2m, level);
203 
204         if ( mfn_eq(mfn, INVALID_MFN) )
205             return -ENOMEM;
206 
207         new_entry = l1e_from_mfn(mfn, P2M_BASE_FLAGS | _PAGE_RW);
208 
209         p2m_add_iommu_flags(&new_entry, level, IOMMUF_readable|IOMMUF_writable);
210         p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, level + 1);
211     }
212     else if ( flags & _PAGE_PSE )
213     {
214         /* Split superpages pages into smaller ones. */
215         unsigned long pfn = l1e_get_pfn(*p2m_entry);
216         mfn_t mfn;
217         l1_pgentry_t *l1_entry;
218         unsigned int i;
219 
220         switch ( level )
221         {
222         case 2:
223             break;
224 
225         case 1:
226             /*
227              * New splintered mappings inherit the flags of the old superpage,
228              * with a little reorganisation for the _PAGE_PSE_PAT bit.
229              */
230             if ( pfn & 1 )           /* ==> _PAGE_PSE_PAT was set */
231                 pfn -= 1;            /* Clear it; _PAGE_PSE becomes _PAGE_PAT */
232             else
233                 flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */
234             break;
235 
236         default:
237             ASSERT_UNREACHABLE();
238             return -EINVAL;
239         }
240 
241         mfn = p2m_alloc_ptp(p2m, level);
242         if ( mfn_eq(mfn, INVALID_MFN) )
243             return -ENOMEM;
244 
245         l1_entry = map_domain_page(mfn);
246 
247         /* Inherit original IOMMU permissions, but update Next Level. */
248         if ( iommu_hap_pt_share )
249         {
250             flags &= ~iommu_nlevel_to_flags(~0, 0);
251             flags |= iommu_nlevel_to_flags(level - 1, 0);
252         }
253 
254         for ( i = 0; i < (1u << PAGETABLE_ORDER); i++ )
255         {
256             new_entry = l1e_from_pfn(pfn | (i << ((level - 1) * PAGETABLE_ORDER)),
257                                      flags);
258             p2m->write_p2m_entry(p2m, gfn, l1_entry + i, new_entry, level);
259         }
260 
261         unmap_domain_page(l1_entry);
262 
263         new_entry = l1e_from_mfn(mfn, P2M_BASE_FLAGS | _PAGE_RW);
264         p2m_add_iommu_flags(&new_entry, level, IOMMUF_readable|IOMMUF_writable);
265         p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, level + 1);
266     }
267     else
268         ASSERT(flags & _PAGE_PRESENT);
269 
270     next = map_domain_page(l1e_get_mfn(*p2m_entry));
271     if ( unmap )
272         unmap_domain_page(*table);
273     *table = next;
274 
275     return 0;
276 }
277 
278 /*
279  * Mark (via clearing the U flag) as needing P2M type re-calculation all valid
280  * present entries at the targeted level for the passed in GFN range, which is
281  * guaranteed to not cross a page (table) boundary at that level.
282  */
p2m_pt_set_recalc_range(struct p2m_domain * p2m,unsigned int level,unsigned long first_gfn,unsigned long last_gfn)283 static int p2m_pt_set_recalc_range(struct p2m_domain *p2m,
284                                    unsigned int level,
285                                    unsigned long first_gfn,
286                                    unsigned long last_gfn)
287 {
288     void *table;
289     unsigned long gfn_remainder = first_gfn, remainder;
290     unsigned int i;
291     l1_pgentry_t *pent, *plast;
292     int err = 0;
293 
294     table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
295     for ( i = 4; i-- > level; )
296     {
297         remainder = gfn_remainder;
298         pent = p2m_find_entry(table, &remainder, first_gfn,
299                               i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
300         if ( !pent )
301         {
302             err = -EINVAL;
303             goto out;
304         }
305 
306         if ( !(l1e_get_flags(*pent) & _PAGE_PRESENT) )
307             goto out;
308 
309         err = p2m_next_level(p2m, &table, &gfn_remainder, first_gfn,
310                              i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER,
311                              i, 1);
312         if ( err )
313             goto out;
314     }
315 
316     remainder = gfn_remainder + (last_gfn - first_gfn);
317     pent = p2m_find_entry(table, &gfn_remainder, first_gfn,
318                           i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
319     plast = p2m_find_entry(table, &remainder, last_gfn,
320                            i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
321     if ( pent && plast )
322         for ( ; pent <= plast; ++pent )
323         {
324             l1_pgentry_t e = *pent;
325 
326             if ( (l1e_get_flags(e) & _PAGE_PRESENT) && !needs_recalc(l1, e) )
327             {
328                 set_recalc(l1, e);
329                 p2m->write_p2m_entry(p2m, first_gfn, pent, e, level);
330             }
331             first_gfn += 1UL << (i * PAGETABLE_ORDER);
332         }
333     else
334         err = -EIO;
335 
336  out:
337     unmap_domain_page(table);
338 
339     return err;
340 }
341 
342 /*
343  * Handle possibly necessary P2M type re-calculation (U flag clear for a
344  * present entry) for the entries in the page table hierarchy for the given
345  * GFN. Propagate the re-calculation flag down to the next page table level
346  * for entries not involved in the translation of the given GFN.
347  */
do_recalc(struct p2m_domain * p2m,unsigned long gfn)348 static int do_recalc(struct p2m_domain *p2m, unsigned long gfn)
349 {
350     void *table;
351     unsigned long gfn_remainder = gfn;
352     unsigned int level = 4;
353     l1_pgentry_t *pent;
354     int err = 0;
355 
356     table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
357     while ( --level )
358     {
359         unsigned long remainder = gfn_remainder;
360 
361         pent = p2m_find_entry(table, &remainder, gfn,
362                               level * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
363         if ( !pent || !(l1e_get_flags(*pent) & _PAGE_PRESENT) )
364             goto out;
365 
366         if ( l1e_get_flags(*pent) & _PAGE_PSE )
367         {
368             unsigned long mask = ~0UL << (level * PAGETABLE_ORDER);
369 
370             ASSERT(p2m_flags_to_type(l1e_get_flags(*pent)) != p2m_ioreq_server);
371             if ( !needs_recalc(l1, *pent) ||
372                  !p2m_is_changeable(p2m_flags_to_type(l1e_get_flags(*pent))) ||
373                  p2m_is_logdirty_range(p2m, gfn & mask, gfn | ~mask) >= 0 )
374                 break;
375         }
376 
377         err = p2m_next_level(p2m, &table, &gfn_remainder, gfn,
378                              level * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER,
379                              level, 0);
380         if ( err )
381             goto out;
382 
383         if ( needs_recalc(l1, *pent) )
384         {
385             l1_pgentry_t e = *pent, *ptab = table;
386             unsigned int i;
387 
388             if ( !valid_recalc(l1, e) )
389                 P2M_DEBUG("bogus recalc state at d%d:%lx:%u\n",
390                           p2m->domain->domain_id, gfn, level);
391             remainder = gfn_remainder;
392             for ( i = 0; i < (1 << PAGETABLE_ORDER); ++i )
393             {
394                 l1_pgentry_t ent = ptab[i];
395 
396                 if ( (l1e_get_flags(ent) & _PAGE_PRESENT) &&
397                      !needs_recalc(l1, ent) )
398                 {
399                     set_recalc(l1, ent);
400                     p2m->write_p2m_entry(p2m, gfn - remainder, &ptab[i],
401                                          ent, level);
402                 }
403                 remainder -= 1UL << ((level - 1) * PAGETABLE_ORDER);
404             }
405             smp_wmb();
406             clear_recalc(l1, e);
407             p2m->write_p2m_entry(p2m, gfn, pent, e, level + 1);
408         }
409         unmap_domain_page((void *)((unsigned long)pent & PAGE_MASK));
410     }
411 
412     pent = p2m_find_entry(table, &gfn_remainder, gfn,
413                           level * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
414     if ( pent && (l1e_get_flags(*pent) & _PAGE_PRESENT) &&
415          needs_recalc(l1, *pent) )
416     {
417         l1_pgentry_t e = *pent;
418         p2m_type_t ot, nt;
419         unsigned long mask = ~0UL << (level * PAGETABLE_ORDER);
420 
421         if ( !valid_recalc(l1, e) )
422             P2M_DEBUG("bogus recalc leaf at d%d:%lx:%u\n",
423                       p2m->domain->domain_id, gfn, level);
424         ot = p2m_flags_to_type(l1e_get_flags(e));
425         nt = p2m_recalc_type_range(true, ot, p2m, gfn & mask, gfn | ~mask);
426         if ( nt != ot )
427         {
428             unsigned long mfn = l1e_get_pfn(e);
429             unsigned long flags = p2m_type_to_flags(p2m, nt,
430                                                     _mfn(mfn), level);
431 
432             if ( level )
433             {
434                 if ( flags & _PAGE_PAT )
435                 {
436                      BUILD_BUG_ON(_PAGE_PAT != _PAGE_PSE);
437                      mfn |= _PAGE_PSE_PAT >> PAGE_SHIFT;
438                 }
439                 else
440                      mfn &= ~((unsigned long)_PAGE_PSE_PAT >> PAGE_SHIFT);
441                 flags |= _PAGE_PSE;
442             }
443 
444             if ( ot == p2m_ioreq_server )
445             {
446                 ASSERT(p2m->ioreq.entry_count > 0);
447                 ASSERT(level == 0);
448                 p2m->ioreq.entry_count--;
449             }
450 
451             e = l1e_from_pfn(mfn, flags);
452             p2m_add_iommu_flags(&e, level,
453                                 (nt == p2m_ram_rw)
454                                 ? IOMMUF_readable|IOMMUF_writable : 0);
455             ASSERT(!needs_recalc(l1, e));
456         }
457         else
458             clear_recalc(l1, e);
459         p2m->write_p2m_entry(p2m, gfn, pent, e, level + 1);
460     }
461 
462  out:
463     unmap_domain_page(table);
464 
465     return err;
466 }
467 
p2m_pt_handle_deferred_changes(uint64_t gpa)468 int p2m_pt_handle_deferred_changes(uint64_t gpa)
469 {
470     struct p2m_domain *p2m = p2m_get_hostp2m(current->domain);
471     int rc;
472 
473     p2m_lock(p2m);
474     rc = do_recalc(p2m, PFN_DOWN(gpa));
475     p2m_unlock(p2m);
476 
477     return rc;
478 }
479 
480 /* Returns: 0 for success, -errno for failure */
481 static int
p2m_pt_set_entry(struct p2m_domain * p2m,gfn_t gfn_,mfn_t mfn,unsigned int page_order,p2m_type_t p2mt,p2m_access_t p2ma,int sve)482 p2m_pt_set_entry(struct p2m_domain *p2m, gfn_t gfn_, mfn_t mfn,
483                  unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma,
484                  int sve)
485 {
486     /* XXX -- this might be able to be faster iff current->domain == d */
487     void *table;
488     unsigned long gfn = gfn_x(gfn_);
489     unsigned long i, gfn_remainder = gfn;
490     l1_pgentry_t *p2m_entry, entry_content;
491     /* Intermediate table to free if we're replacing it with a superpage. */
492     l1_pgentry_t intermediate_entry = l1e_empty();
493     l2_pgentry_t l2e_content;
494     l3_pgentry_t l3e_content;
495     int rc;
496     unsigned int iommu_pte_flags = p2m_get_iommu_flags(p2mt, mfn);
497     /*
498      * old_mfn and iommu_old_flags control possible flush/update needs on the
499      * IOMMU: We need to flush when MFN or flags (i.e. permissions) change.
500      * iommu_old_flags being initialized to zero covers the case of the entry
501      * getting replaced being a non-present (leaf or intermediate) one. For
502      * present leaf entries the real value will get calculated below, while
503      * for present intermediate entries ~0 (guaranteed != iommu_pte_flags)
504      * will be used (to cover all cases of what the leaf entries underneath
505      * the intermediate one might be).
506      */
507     unsigned int flags, iommu_old_flags = 0;
508     unsigned long old_mfn = mfn_x(INVALID_MFN);
509 
510     ASSERT(sve != 0);
511 
512     if ( tb_init_done )
513     {
514         struct {
515             u64 gfn, mfn;
516             int p2mt;
517             int d:16,order:16;
518         } t;
519 
520         t.gfn = gfn;
521         t.mfn = mfn_x(mfn);
522         t.p2mt = p2mt;
523         t.d = p2m->domain->domain_id;
524         t.order = page_order;
525 
526         __trace_var(TRC_MEM_SET_P2M_ENTRY, 0, sizeof(t), &t);
527     }
528 
529     if ( unlikely(p2m_is_foreign(p2mt)) )
530     {
531         /* hvm fixme: foreign types are only supported on ept at present */
532         gdprintk(XENLOG_WARNING, "Unimplemented foreign p2m type.\n");
533         return -EINVAL;
534     }
535 
536     /* Carry out any eventually pending earlier changes first. */
537     rc = do_recalc(p2m, gfn);
538     if ( rc < 0 )
539         return rc;
540 
541     table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
542     rc = p2m_next_level(p2m, &table, &gfn_remainder, gfn,
543                         L4_PAGETABLE_SHIFT - PAGE_SHIFT,
544                         L4_PAGETABLE_ENTRIES, 3, 1);
545     if ( rc )
546         goto out;
547 
548     /*
549      * Try to allocate 1GB page table if this feature is supported.
550      */
551     if ( page_order == PAGE_ORDER_1G )
552     {
553         p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
554                                    L3_PAGETABLE_SHIFT - PAGE_SHIFT,
555                                    L3_PAGETABLE_ENTRIES);
556         ASSERT(p2m_entry);
557         flags = l1e_get_flags(*p2m_entry);
558         if ( flags & _PAGE_PRESENT )
559         {
560             if ( flags & _PAGE_PSE )
561             {
562                 old_mfn = l1e_get_pfn(*p2m_entry);
563                 iommu_old_flags =
564                     p2m_get_iommu_flags(p2m_flags_to_type(flags),
565                                         _mfn(old_mfn));
566             }
567             else
568             {
569                 iommu_old_flags = ~0;
570                 intermediate_entry = *p2m_entry;
571             }
572         }
573 
574         ASSERT(p2m_flags_to_type(flags) != p2m_ioreq_server);
575         ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
576         l3e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt)
577             ? p2m_l3e_from_pfn(mfn_x(mfn),
578                                p2m_type_to_flags(p2m, p2mt, mfn, 2))
579             : l3e_empty();
580         entry_content.l1 = l3e_content.l3;
581 
582         if ( entry_content.l1 != 0 )
583             p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags);
584 
585         p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 3);
586         /* NB: paging_write_p2m_entry() handles tlb flushes properly */
587     }
588     else
589     {
590         rc = p2m_next_level(p2m, &table, &gfn_remainder, gfn,
591                             L3_PAGETABLE_SHIFT - PAGE_SHIFT,
592                             L3_PAGETABLE_ENTRIES, 2, 1);
593         if ( rc )
594             goto out;
595     }
596 
597     if ( page_order == PAGE_ORDER_4K )
598     {
599         p2m_type_t p2mt_old;
600 
601         rc = p2m_next_level(p2m, &table, &gfn_remainder, gfn,
602                             L2_PAGETABLE_SHIFT - PAGE_SHIFT,
603                             L2_PAGETABLE_ENTRIES, 1, 1);
604         if ( rc )
605             goto out;
606 
607         p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
608                                    0, L1_PAGETABLE_ENTRIES);
609         ASSERT(p2m_entry);
610         old_mfn = l1e_get_pfn(*p2m_entry);
611         iommu_old_flags =
612             p2m_get_iommu_flags(p2m_flags_to_type(l1e_get_flags(*p2m_entry)),
613                                 _mfn(old_mfn));
614 
615         if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) )
616             entry_content = p2m_l1e_from_pfn(mfn_x(mfn),
617                                          p2m_type_to_flags(p2m, p2mt, mfn, 0));
618         else
619             entry_content = l1e_empty();
620 
621         if ( entry_content.l1 != 0 )
622             p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags);
623 
624         p2mt_old = p2m_flags_to_type(l1e_get_flags(*p2m_entry));
625 
626         /*
627          * p2m_ioreq_server is only used for 4K pages, so
628          * the count is only done for level 1 entries.
629          */
630         if ( p2mt == p2m_ioreq_server )
631             p2m->ioreq.entry_count++;
632 
633         if ( p2mt_old == p2m_ioreq_server )
634         {
635             ASSERT(p2m->ioreq.entry_count > 0);
636             p2m->ioreq.entry_count--;
637         }
638 
639         /* level 1 entry */
640         p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 1);
641         /* NB: paging_write_p2m_entry() handles tlb flushes properly */
642     }
643     else if ( page_order == PAGE_ORDER_2M )
644     {
645         p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
646                                    L2_PAGETABLE_SHIFT - PAGE_SHIFT,
647                                    L2_PAGETABLE_ENTRIES);
648         ASSERT(p2m_entry);
649         flags = l1e_get_flags(*p2m_entry);
650         if ( flags & _PAGE_PRESENT )
651         {
652             if ( flags & _PAGE_PSE )
653             {
654                 old_mfn = l1e_get_pfn(*p2m_entry);
655                 iommu_old_flags =
656                     p2m_get_iommu_flags(p2m_flags_to_type(flags),
657                                         _mfn(old_mfn));
658             }
659             else
660             {
661                 iommu_old_flags = ~0;
662                 intermediate_entry = *p2m_entry;
663             }
664         }
665 
666         ASSERT(p2m_flags_to_type(flags) != p2m_ioreq_server);
667         ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
668         l2e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt)
669             ? p2m_l2e_from_pfn(mfn_x(mfn),
670                                p2m_type_to_flags(p2m, p2mt, mfn, 1))
671             : l2e_empty();
672         entry_content.l1 = l2e_content.l2;
673 
674         if ( entry_content.l1 != 0 )
675             p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags);
676 
677         p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 2);
678         /* NB: paging_write_p2m_entry() handles tlb flushes properly */
679     }
680 
681     /* Track the highest gfn for which we have ever had a valid mapping */
682     if ( p2mt != p2m_invalid
683          && (gfn + (1UL << page_order) - 1 > p2m->max_mapped_pfn) )
684         p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1;
685 
686     if ( iommu_enabled && need_iommu(p2m->domain) &&
687          (iommu_old_flags != iommu_pte_flags || old_mfn != mfn_x(mfn)) )
688     {
689         ASSERT(rc == 0);
690 
691         if ( iommu_use_hap_pt(p2m->domain) )
692         {
693             if ( iommu_old_flags )
694                 amd_iommu_flush_pages(p2m->domain, gfn, page_order);
695         }
696         else if ( iommu_pte_flags )
697             for ( i = 0; i < (1UL << page_order); i++ )
698             {
699                 rc = iommu_map_page(p2m->domain, gfn + i, mfn_x(mfn) + i,
700                                     iommu_pte_flags);
701                 if ( unlikely(rc) )
702                 {
703                     while ( i-- )
704                         /* If statement to satisfy __must_check. */
705                         if ( iommu_unmap_page(p2m->domain, gfn + i) )
706                             continue;
707 
708                     break;
709                 }
710             }
711         else
712             for ( i = 0; i < (1UL << page_order); i++ )
713             {
714                 int ret = iommu_unmap_page(p2m->domain, gfn + i);
715 
716                 if ( !rc )
717                     rc = ret;
718             }
719     }
720 
721     /*
722      * Free old intermediate tables if necessary.  This has to be the
723      * last thing we do, after removal from the IOMMU tables, so as to
724      * avoid a potential use-after-free.
725      */
726     if ( l1e_get_flags(intermediate_entry) & _PAGE_PRESENT )
727         p2m_free_entry(p2m, &intermediate_entry, page_order);
728 
729  out:
730     unmap_domain_page(table);
731     return rc;
732 }
733 
734 static mfn_t
p2m_pt_get_entry(struct p2m_domain * p2m,gfn_t gfn_,p2m_type_t * t,p2m_access_t * a,p2m_query_t q,unsigned int * page_order,bool_t * sve)735 p2m_pt_get_entry(struct p2m_domain *p2m, gfn_t gfn_,
736                  p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
737                  unsigned int *page_order, bool_t *sve)
738 {
739     mfn_t mfn;
740     unsigned long gfn = gfn_x(gfn_);
741     paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
742     l2_pgentry_t *l2e;
743     l1_pgentry_t *l1e;
744     unsigned int flags;
745     p2m_type_t l1t;
746     bool_t recalc;
747 
748     ASSERT(paging_mode_translate(p2m->domain));
749 
750     if ( sve )
751         *sve = 1;
752 
753     /* XXX This is for compatibility with the old model, where anything not
754      * XXX marked as RAM was considered to be emulated MMIO space.
755      * XXX Once we start explicitly registering MMIO regions in the p2m
756      * XXX we will return p2m_invalid for unmapped gfns */
757     *t = p2m_mmio_dm;
758     /* Not implemented except with EPT */
759     *a = p2m_access_rwx;
760 
761     if ( gfn > p2m->max_mapped_pfn )
762     {
763         /* This pfn is higher than the highest the p2m map currently holds */
764         if ( page_order )
765         {
766             for ( *page_order = 3 * PAGETABLE_ORDER; *page_order;
767                   *page_order -= PAGETABLE_ORDER )
768                 if ( (gfn & ~((1UL << *page_order) - 1)) >
769                      p2m->max_mapped_pfn )
770                     break;
771         }
772         return INVALID_MFN;
773     }
774 
775     mfn = pagetable_get_mfn(p2m_get_pagetable(p2m));
776 
777     {
778         l4_pgentry_t *l4e = map_domain_page(mfn);
779         l4e += l4_table_offset(addr);
780         if ( page_order )
781             *page_order = 3 * PAGETABLE_ORDER;
782         if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
783         {
784             unmap_domain_page(l4e);
785             return INVALID_MFN;
786         }
787         mfn = l4e_get_mfn(*l4e);
788         recalc = needs_recalc(l4, *l4e);
789         unmap_domain_page(l4e);
790     }
791     {
792         l3_pgentry_t *l3e = map_domain_page(mfn);
793         l3e += l3_table_offset(addr);
794         if ( page_order )
795             *page_order = 2 * PAGETABLE_ORDER;
796 
797 pod_retry_l3:
798         flags = l3e_get_flags(*l3e);
799         if ( !(flags & _PAGE_PRESENT) )
800         {
801             if ( p2m_flags_to_type(flags) == p2m_populate_on_demand )
802             {
803                 if ( q & P2M_ALLOC )
804                 {
805                     if ( p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_1G) )
806                         goto pod_retry_l3;
807                     gdprintk(XENLOG_ERR, "%s: Allocate 1GB failed!\n", __func__);
808                 }
809                 else
810                     *t = p2m_populate_on_demand;
811             }
812             unmap_domain_page(l3e);
813             return INVALID_MFN;
814         }
815         if ( flags & _PAGE_PSE )
816         {
817             mfn = _mfn(l3e_get_pfn(*l3e) +
818                        l2_table_offset(addr) * L1_PAGETABLE_ENTRIES +
819                        l1_table_offset(addr));
820             *t = p2m_recalc_type(recalc || _needs_recalc(flags),
821                                  p2m_flags_to_type(flags), p2m, gfn);
822             unmap_domain_page(l3e);
823 
824             ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
825             return (p2m_is_valid(*t)) ? mfn : INVALID_MFN;
826         }
827 
828         mfn = l3e_get_mfn(*l3e);
829         if ( _needs_recalc(flags) )
830             recalc = 1;
831         unmap_domain_page(l3e);
832     }
833 
834     l2e = map_domain_page(mfn);
835     l2e += l2_table_offset(addr);
836     if ( page_order )
837         *page_order = PAGETABLE_ORDER;
838 
839 pod_retry_l2:
840     flags = l2e_get_flags(*l2e);
841     if ( !(flags & _PAGE_PRESENT) )
842     {
843         /* PoD: Try to populate a 2-meg chunk */
844         if ( p2m_flags_to_type(flags) == p2m_populate_on_demand )
845         {
846             if ( q & P2M_ALLOC ) {
847                 if ( p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_2M) )
848                     goto pod_retry_l2;
849             } else
850                 *t = p2m_populate_on_demand;
851         }
852 
853         unmap_domain_page(l2e);
854         return INVALID_MFN;
855     }
856     if ( flags & _PAGE_PSE )
857     {
858         mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
859         *t = p2m_recalc_type(recalc || _needs_recalc(flags),
860                              p2m_flags_to_type(flags), p2m, gfn);
861         unmap_domain_page(l2e);
862 
863         ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
864         return (p2m_is_valid(*t)) ? mfn : INVALID_MFN;
865     }
866 
867     mfn = l2e_get_mfn(*l2e);
868     if ( needs_recalc(l2, *l2e) )
869         recalc = 1;
870     unmap_domain_page(l2e);
871 
872     l1e = map_domain_page(mfn);
873     l1e += l1_table_offset(addr);
874     if ( page_order )
875         *page_order = 0;
876 
877 pod_retry_l1:
878     flags = l1e_get_flags(*l1e);
879     l1t = p2m_flags_to_type(flags);
880     if ( !(flags & _PAGE_PRESENT) && !p2m_is_paging(l1t) )
881     {
882         /* PoD: Try to populate */
883         if ( l1t == p2m_populate_on_demand )
884         {
885             if ( q & P2M_ALLOC ) {
886                 if ( p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_4K) )
887                     goto pod_retry_l1;
888             } else
889                 *t = p2m_populate_on_demand;
890         }
891 
892         unmap_domain_page(l1e);
893         return INVALID_MFN;
894     }
895     mfn = l1e_get_mfn(*l1e);
896     *t = p2m_recalc_type(recalc || _needs_recalc(flags), l1t, p2m, gfn);
897     unmap_domain_page(l1e);
898 
899     ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t) || p2m_is_paging(*t));
900     return (p2m_is_valid(*t) || p2m_is_grant(*t)) ? mfn : INVALID_MFN;
901 }
902 
p2m_pt_change_entry_type_global(struct p2m_domain * p2m,p2m_type_t ot,p2m_type_t nt)903 static void p2m_pt_change_entry_type_global(struct p2m_domain *p2m,
904                                             p2m_type_t ot, p2m_type_t nt)
905 {
906     l1_pgentry_t *tab;
907     unsigned long gfn = 0;
908     unsigned int i, changed;
909 
910     if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) == 0 )
911         return;
912 
913     ASSERT(hap_enabled(p2m->domain));
914 
915     tab = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
916     for ( changed = i = 0; i < (1 << PAGETABLE_ORDER); ++i )
917     {
918         l1_pgentry_t e = tab[i];
919 
920         if ( (l1e_get_flags(e) & _PAGE_PRESENT) &&
921              !needs_recalc(l1, e) )
922         {
923             set_recalc(l1, e);
924             p2m->write_p2m_entry(p2m, gfn, &tab[i], e, 4);
925             ++changed;
926         }
927         gfn += 1UL << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
928     }
929     unmap_domain_page(tab);
930 
931     if ( changed )
932          flush_tlb_mask(p2m->domain->domain_dirty_cpumask);
933 }
934 
p2m_pt_change_entry_type_range(struct p2m_domain * p2m,p2m_type_t ot,p2m_type_t nt,unsigned long first_gfn,unsigned long last_gfn)935 static int p2m_pt_change_entry_type_range(struct p2m_domain *p2m,
936                                           p2m_type_t ot, p2m_type_t nt,
937                                           unsigned long first_gfn,
938                                           unsigned long last_gfn)
939 {
940     unsigned long mask = (1 << PAGETABLE_ORDER) - 1;
941     unsigned int i;
942     int err = 0;
943 
944     ASSERT(hap_enabled(p2m->domain));
945 
946     for ( i = 1; i <= 4; )
947     {
948         if ( first_gfn & mask )
949         {
950             unsigned long end_gfn = min(first_gfn | mask, last_gfn);
951 
952             err = p2m_pt_set_recalc_range(p2m, i, first_gfn, end_gfn);
953             if ( err || end_gfn >= last_gfn )
954                 break;
955             first_gfn = end_gfn + 1;
956         }
957         else if ( (last_gfn & mask) != mask )
958         {
959             unsigned long start_gfn = max(first_gfn, last_gfn & ~mask);
960 
961             err = p2m_pt_set_recalc_range(p2m, i, start_gfn, last_gfn);
962             if ( err || start_gfn <= first_gfn )
963                 break;
964             last_gfn = start_gfn - 1;
965         }
966         else
967         {
968             ++i;
969             mask |= mask << PAGETABLE_ORDER;
970         }
971     }
972 
973     return err;
974 }
975 
976 #if P2M_AUDIT
p2m_pt_audit_p2m(struct p2m_domain * p2m)977 long p2m_pt_audit_p2m(struct p2m_domain *p2m)
978 {
979     unsigned long entry_count = 0, pmbad = 0;
980     unsigned long mfn, gfn, m2pfn;
981 
982     ASSERT(p2m_locked_by_me(p2m));
983     ASSERT(pod_locked_by_me(p2m));
984 
985     /* Audit part one: walk the domain's p2m table, checking the entries. */
986     if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 )
987     {
988         l2_pgentry_t *l2e;
989         l1_pgentry_t *l1e;
990         int i1, i2;
991 
992         l4_pgentry_t *l4e;
993         l3_pgentry_t *l3e;
994         int i4, i3;
995         l4e = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
996 
997         gfn = 0;
998         for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
999         {
1000             if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
1001             {
1002                 gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
1003                 continue;
1004             }
1005             l3e = map_l3t_from_l4e(l4e[i4]);
1006             for ( i3 = 0;
1007                   i3 < L3_PAGETABLE_ENTRIES;
1008                   i3++ )
1009             {
1010                 if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
1011                 {
1012                     gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
1013                     continue;
1014                 }
1015 
1016                 /* check for 1GB super page */
1017                 if ( l3e_get_flags(l3e[i3]) & _PAGE_PSE )
1018                 {
1019                     mfn = l3e_get_pfn(l3e[i3]);
1020                     ASSERT(mfn_valid(_mfn(mfn)));
1021                     /* we have to cover 512x512 4K pages */
1022                     for ( i2 = 0;
1023                           i2 < (L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES);
1024                           i2++)
1025                     {
1026                         m2pfn = get_gpfn_from_mfn(mfn+i2);
1027                         if ( m2pfn != (gfn + i2) )
1028                         {
1029                             pmbad++;
1030                             P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1031                                        " -> gfn %#lx\n", gfn+i2, mfn+i2,
1032                                        m2pfn);
1033                             BUG();
1034                         }
1035                         gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
1036                         continue;
1037                     }
1038                 }
1039 
1040                 l2e = map_l2t_from_l3e(l3e[i3]);
1041                 for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
1042                 {
1043                     if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
1044                     {
1045                         if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE)
1046                              && ( p2m_flags_to_type(l2e_get_flags(l2e[i2]))
1047                                   == p2m_populate_on_demand ) )
1048                             entry_count+=SUPERPAGE_PAGES;
1049                         gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
1050                         continue;
1051                     }
1052 
1053                     /* check for super page */
1054                     if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
1055                     {
1056                         mfn = l2e_get_pfn(l2e[i2]);
1057                         ASSERT(mfn_valid(_mfn(mfn)));
1058                         for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
1059                         {
1060                             m2pfn = get_gpfn_from_mfn(mfn+i1);
1061                             /* Allow shared M2Ps */
1062                             if ( (m2pfn != (gfn + i1)) &&
1063                                  (m2pfn != SHARED_M2P_ENTRY) )
1064                             {
1065                                 pmbad++;
1066                                 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1067                                            " -> gfn %#lx\n", gfn+i1, mfn+i1,
1068                                            m2pfn);
1069                                 BUG();
1070                             }
1071                         }
1072                         gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
1073                         continue;
1074                     }
1075 
1076                     l1e = map_l1t_from_l2e(l2e[i2]);
1077 
1078                     for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
1079                     {
1080                         p2m_type_t type;
1081 
1082                         type = p2m_flags_to_type(l1e_get_flags(l1e[i1]));
1083                         if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
1084                         {
1085                             if ( type == p2m_populate_on_demand )
1086                                 entry_count++;
1087                             continue;
1088                         }
1089                         mfn = l1e_get_pfn(l1e[i1]);
1090                         ASSERT(mfn_valid(_mfn(mfn)));
1091                         m2pfn = get_gpfn_from_mfn(mfn);
1092                         if ( m2pfn != gfn &&
1093                              type != p2m_mmio_direct &&
1094                              !p2m_is_grant(type) &&
1095                              !p2m_is_shared(type) )
1096                         {
1097                             pmbad++;
1098                             printk("mismatch: gfn %#lx -> mfn %#lx"
1099                                    " -> gfn %#lx\n", gfn, mfn, m2pfn);
1100                             P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1101                                        " -> gfn %#lx\n", gfn, mfn, m2pfn);
1102                             BUG();
1103                         }
1104                     }
1105                     unmap_domain_page(l1e);
1106                 }
1107                 unmap_domain_page(l2e);
1108             }
1109             unmap_domain_page(l3e);
1110         }
1111 
1112         unmap_domain_page(l4e);
1113     }
1114 
1115     if ( entry_count != p2m->pod.entry_count )
1116     {
1117         printk("%s: refcounted entry count %ld, audit count %lu!\n",
1118                __func__,
1119                p2m->pod.entry_count,
1120                entry_count);
1121         BUG();
1122     }
1123 
1124     return pmbad;
1125 }
1126 #endif /* P2M_AUDIT */
1127 
1128 /* Set up the p2m function pointers for pagetable format */
p2m_pt_init(struct p2m_domain * p2m)1129 void p2m_pt_init(struct p2m_domain *p2m)
1130 {
1131     p2m->set_entry = p2m_pt_set_entry;
1132     p2m->get_entry = p2m_pt_get_entry;
1133     p2m->recalc = do_recalc;
1134     p2m->change_entry_type_global = p2m_pt_change_entry_type_global;
1135     p2m->change_entry_type_range = p2m_pt_change_entry_type_range;
1136     p2m->write_p2m_entry = paging_write_p2m_entry;
1137 #if P2M_AUDIT
1138     p2m->audit_p2m = p2m_pt_audit_p2m;
1139 #else
1140     p2m->audit_p2m = NULL;
1141 #endif
1142 }
1143 
1144 
1145