1 /******************************************************************************
2 * arch/x86/mm/p2m-pt.c
3 *
4 * Implementation of p2m datastructures as pagetables, for use by
5 * NPT and shadow-pagetable code
6 *
7 * Parts of this code are Copyright (c) 2009-2011 by Citrix Systems, Inc.
8 * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
9 * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
10 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
11 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; If not, see <http://www.gnu.org/licenses/>.
25 */
26
27 #include <xen/iommu.h>
28 #include <xen/vm_event.h>
29 #include <xen/event.h>
30 #include <xen/trace.h>
31 #include <public/vm_event.h>
32 #include <asm/domain.h>
33 #include <asm/page.h>
34 #include <asm/paging.h>
35 #include <asm/p2m.h>
36 #include <asm/mem_sharing.h>
37 #include <asm/hvm/nestedhvm.h>
38 #include <asm/hvm/svm/amd-iommu-proto.h>
39
40 #include "mm-locks.h"
41
42 /* Override macros from asm/page.h to make them work with mfn_t */
43 #undef mfn_to_page
44 #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
45 #undef page_to_mfn
46 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
47
48 /*
49 * We may store INVALID_MFN in PTEs. We need to clip this to avoid trampling
50 * over higher-order bits (NX, p2m type, IOMMU flags). We seem to not need
51 * to unclip on the read path, as callers are concerned only with p2m type in
52 * such cases.
53 */
54 #define p2m_l1e_from_pfn(pfn, flags) \
55 l1e_from_pfn((pfn) & (PADDR_MASK >> PAGE_SHIFT), (flags))
56 #define p2m_l2e_from_pfn(pfn, flags) \
57 l2e_from_pfn((pfn) & ((PADDR_MASK & ~(_PAGE_PSE_PAT | 0UL)) \
58 >> PAGE_SHIFT), (flags) | _PAGE_PSE)
59 #define p2m_l3e_from_pfn(pfn, flags) \
60 l3e_from_pfn((pfn) & ((PADDR_MASK & ~(_PAGE_PSE_PAT | 0UL)) \
61 >> PAGE_SHIFT), (flags) | _PAGE_PSE)
62
63 /* PTE flags for the various types of p2m entry */
64 #define P2M_BASE_FLAGS \
65 (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
66
67 #define RECALC_FLAGS (_PAGE_USER|_PAGE_ACCESSED)
68 #define set_recalc(level, ent) level##e_remove_flags(ent, RECALC_FLAGS)
69 #define clear_recalc(level, ent) level##e_add_flags(ent, RECALC_FLAGS)
70 #define _needs_recalc(flags) (!((flags) & _PAGE_USER))
71 #define needs_recalc(level, ent) _needs_recalc(level##e_get_flags(ent))
72 #define valid_recalc(level, ent) (!(level##e_get_flags(ent) & _PAGE_ACCESSED))
73
p2m_type_to_flags(const struct p2m_domain * p2m,p2m_type_t t,mfn_t mfn,unsigned int level)74 static unsigned long p2m_type_to_flags(const struct p2m_domain *p2m,
75 p2m_type_t t,
76 mfn_t mfn,
77 unsigned int level)
78 {
79 unsigned long flags;
80 /*
81 * AMD IOMMU: When we share p2m table with iommu, bit 9 - bit 11 will be
82 * used for iommu hardware to encode next io page level. Bit 59 - bit 62
83 * are used for iommu flags, We could not use these bits to store p2m types.
84 */
85 flags = (unsigned long)(t & 0x7f) << 12;
86
87 switch(t)
88 {
89 case p2m_invalid:
90 case p2m_mmio_dm:
91 case p2m_populate_on_demand:
92 case p2m_ram_paging_out:
93 case p2m_ram_paged:
94 case p2m_ram_paging_in:
95 default:
96 return flags | _PAGE_NX_BIT;
97 case p2m_grant_map_ro:
98 return flags | P2M_BASE_FLAGS | _PAGE_NX_BIT;
99 case p2m_ioreq_server:
100 flags |= P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT;
101 if ( p2m->ioreq.flags & XEN_DMOP_IOREQ_MEM_ACCESS_WRITE )
102 return flags & ~_PAGE_RW;
103 return flags;
104 case p2m_ram_ro:
105 case p2m_ram_logdirty:
106 case p2m_ram_shared:
107 return flags | P2M_BASE_FLAGS;
108 case p2m_ram_rw:
109 return flags | P2M_BASE_FLAGS | _PAGE_RW;
110 case p2m_grant_map_rw:
111 case p2m_map_foreign:
112 return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT;
113 case p2m_mmio_direct:
114 if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) )
115 flags |= _PAGE_RW;
116 else
117 {
118 flags |= _PAGE_PWT;
119 ASSERT(!level);
120 }
121 return flags | P2M_BASE_FLAGS | _PAGE_PCD;
122 }
123 }
124
125
126 // Find the next level's P2M entry, checking for out-of-range gfn's...
127 // Returns NULL on error.
128 //
129 static l1_pgentry_t *
p2m_find_entry(void * table,unsigned long * gfn_remainder,unsigned long gfn,uint32_t shift,uint32_t max)130 p2m_find_entry(void *table, unsigned long *gfn_remainder,
131 unsigned long gfn, uint32_t shift, uint32_t max)
132 {
133 u32 index;
134
135 index = *gfn_remainder >> shift;
136 if ( index >= max )
137 {
138 P2M_DEBUG("gfn=%#lx out of range "
139 "(gfn_remainder=%#lx shift=%d index=%#x max=%#x)\n",
140 gfn, *gfn_remainder, shift, index, max);
141 return NULL;
142 }
143 *gfn_remainder &= (1 << shift) - 1;
144 return (l1_pgentry_t *)table + index;
145 }
146
147 /* Free intermediate tables from a p2m sub-tree */
148 static void
p2m_free_entry(struct p2m_domain * p2m,l1_pgentry_t * p2m_entry,int page_order)149 p2m_free_entry(struct p2m_domain *p2m, l1_pgentry_t *p2m_entry, int page_order)
150 {
151 /* End if the entry is a leaf entry. */
152 if ( page_order == PAGE_ORDER_4K
153 || !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT)
154 || (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
155 return;
156
157 if ( page_order > PAGE_ORDER_2M )
158 {
159 l1_pgentry_t *l3_table = map_domain_page(l1e_get_mfn(*p2m_entry));
160
161 for ( int i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
162 p2m_free_entry(p2m, l3_table + i, page_order - 9);
163 unmap_domain_page(l3_table);
164 }
165
166 p2m_free_ptp(p2m, l1e_get_page(*p2m_entry));
167 }
168
169 // Walk one level of the P2M table, allocating a new table if required.
170 // Returns 0 on error.
171 //
172
173 /* AMD IOMMU: Convert next level bits and r/w bits into 24 bits p2m flags */
174 #define iommu_nlevel_to_flags(nl, f) ((((nl) & 0x7) << 9 )|(((f) & 0x3) << 21))
175
p2m_add_iommu_flags(l1_pgentry_t * p2m_entry,unsigned int nlevel,unsigned int flags)176 static void p2m_add_iommu_flags(l1_pgentry_t *p2m_entry,
177 unsigned int nlevel, unsigned int flags)
178 {
179 if ( iommu_hap_pt_share )
180 l1e_add_flags(*p2m_entry, iommu_nlevel_to_flags(nlevel, flags));
181 }
182
183 /* Returns: 0 for success, -errno for failure */
184 static int
p2m_next_level(struct p2m_domain * p2m,void ** table,unsigned long * gfn_remainder,unsigned long gfn,u32 shift,u32 max,unsigned int level,bool_t unmap)185 p2m_next_level(struct p2m_domain *p2m, void **table,
186 unsigned long *gfn_remainder, unsigned long gfn, u32 shift,
187 u32 max, unsigned int level, bool_t unmap)
188 {
189 l1_pgentry_t *p2m_entry, new_entry;
190 void *next;
191 unsigned int flags;
192
193 if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
194 shift, max)) )
195 return -ENOENT;
196
197 flags = l1e_get_flags(*p2m_entry);
198
199 /* PoD/paging: Not present doesn't imply empty. */
200 if ( !flags )
201 {
202 mfn_t mfn = p2m_alloc_ptp(p2m, level);
203
204 if ( mfn_eq(mfn, INVALID_MFN) )
205 return -ENOMEM;
206
207 new_entry = l1e_from_mfn(mfn, P2M_BASE_FLAGS | _PAGE_RW);
208
209 p2m_add_iommu_flags(&new_entry, level, IOMMUF_readable|IOMMUF_writable);
210 p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, level + 1);
211 }
212 else if ( flags & _PAGE_PSE )
213 {
214 /* Split superpages pages into smaller ones. */
215 unsigned long pfn = l1e_get_pfn(*p2m_entry);
216 mfn_t mfn;
217 l1_pgentry_t *l1_entry;
218 unsigned int i;
219
220 switch ( level )
221 {
222 case 2:
223 break;
224
225 case 1:
226 /*
227 * New splintered mappings inherit the flags of the old superpage,
228 * with a little reorganisation for the _PAGE_PSE_PAT bit.
229 */
230 if ( pfn & 1 ) /* ==> _PAGE_PSE_PAT was set */
231 pfn -= 1; /* Clear it; _PAGE_PSE becomes _PAGE_PAT */
232 else
233 flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */
234 break;
235
236 default:
237 ASSERT_UNREACHABLE();
238 return -EINVAL;
239 }
240
241 mfn = p2m_alloc_ptp(p2m, level);
242 if ( mfn_eq(mfn, INVALID_MFN) )
243 return -ENOMEM;
244
245 l1_entry = map_domain_page(mfn);
246
247 /* Inherit original IOMMU permissions, but update Next Level. */
248 if ( iommu_hap_pt_share )
249 {
250 flags &= ~iommu_nlevel_to_flags(~0, 0);
251 flags |= iommu_nlevel_to_flags(level - 1, 0);
252 }
253
254 for ( i = 0; i < (1u << PAGETABLE_ORDER); i++ )
255 {
256 new_entry = l1e_from_pfn(pfn | (i << ((level - 1) * PAGETABLE_ORDER)),
257 flags);
258 p2m->write_p2m_entry(p2m, gfn, l1_entry + i, new_entry, level);
259 }
260
261 unmap_domain_page(l1_entry);
262
263 new_entry = l1e_from_mfn(mfn, P2M_BASE_FLAGS | _PAGE_RW);
264 p2m_add_iommu_flags(&new_entry, level, IOMMUF_readable|IOMMUF_writable);
265 p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, level + 1);
266 }
267 else
268 ASSERT(flags & _PAGE_PRESENT);
269
270 next = map_domain_page(l1e_get_mfn(*p2m_entry));
271 if ( unmap )
272 unmap_domain_page(*table);
273 *table = next;
274
275 return 0;
276 }
277
278 /*
279 * Mark (via clearing the U flag) as needing P2M type re-calculation all valid
280 * present entries at the targeted level for the passed in GFN range, which is
281 * guaranteed to not cross a page (table) boundary at that level.
282 */
p2m_pt_set_recalc_range(struct p2m_domain * p2m,unsigned int level,unsigned long first_gfn,unsigned long last_gfn)283 static int p2m_pt_set_recalc_range(struct p2m_domain *p2m,
284 unsigned int level,
285 unsigned long first_gfn,
286 unsigned long last_gfn)
287 {
288 void *table;
289 unsigned long gfn_remainder = first_gfn, remainder;
290 unsigned int i;
291 l1_pgentry_t *pent, *plast;
292 int err = 0;
293
294 table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
295 for ( i = 4; i-- > level; )
296 {
297 remainder = gfn_remainder;
298 pent = p2m_find_entry(table, &remainder, first_gfn,
299 i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
300 if ( !pent )
301 {
302 err = -EINVAL;
303 goto out;
304 }
305
306 if ( !(l1e_get_flags(*pent) & _PAGE_PRESENT) )
307 goto out;
308
309 err = p2m_next_level(p2m, &table, &gfn_remainder, first_gfn,
310 i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER,
311 i, 1);
312 if ( err )
313 goto out;
314 }
315
316 remainder = gfn_remainder + (last_gfn - first_gfn);
317 pent = p2m_find_entry(table, &gfn_remainder, first_gfn,
318 i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
319 plast = p2m_find_entry(table, &remainder, last_gfn,
320 i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
321 if ( pent && plast )
322 for ( ; pent <= plast; ++pent )
323 {
324 l1_pgentry_t e = *pent;
325
326 if ( (l1e_get_flags(e) & _PAGE_PRESENT) && !needs_recalc(l1, e) )
327 {
328 set_recalc(l1, e);
329 p2m->write_p2m_entry(p2m, first_gfn, pent, e, level);
330 }
331 first_gfn += 1UL << (i * PAGETABLE_ORDER);
332 }
333 else
334 err = -EIO;
335
336 out:
337 unmap_domain_page(table);
338
339 return err;
340 }
341
342 /*
343 * Handle possibly necessary P2M type re-calculation (U flag clear for a
344 * present entry) for the entries in the page table hierarchy for the given
345 * GFN. Propagate the re-calculation flag down to the next page table level
346 * for entries not involved in the translation of the given GFN.
347 */
do_recalc(struct p2m_domain * p2m,unsigned long gfn)348 static int do_recalc(struct p2m_domain *p2m, unsigned long gfn)
349 {
350 void *table;
351 unsigned long gfn_remainder = gfn;
352 unsigned int level = 4;
353 l1_pgentry_t *pent;
354 int err = 0;
355
356 table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
357 while ( --level )
358 {
359 unsigned long remainder = gfn_remainder;
360
361 pent = p2m_find_entry(table, &remainder, gfn,
362 level * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
363 if ( !pent || !(l1e_get_flags(*pent) & _PAGE_PRESENT) )
364 goto out;
365
366 if ( l1e_get_flags(*pent) & _PAGE_PSE )
367 {
368 unsigned long mask = ~0UL << (level * PAGETABLE_ORDER);
369
370 ASSERT(p2m_flags_to_type(l1e_get_flags(*pent)) != p2m_ioreq_server);
371 if ( !needs_recalc(l1, *pent) ||
372 !p2m_is_changeable(p2m_flags_to_type(l1e_get_flags(*pent))) ||
373 p2m_is_logdirty_range(p2m, gfn & mask, gfn | ~mask) >= 0 )
374 break;
375 }
376
377 err = p2m_next_level(p2m, &table, &gfn_remainder, gfn,
378 level * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER,
379 level, 0);
380 if ( err )
381 goto out;
382
383 if ( needs_recalc(l1, *pent) )
384 {
385 l1_pgentry_t e = *pent, *ptab = table;
386 unsigned int i;
387
388 if ( !valid_recalc(l1, e) )
389 P2M_DEBUG("bogus recalc state at d%d:%lx:%u\n",
390 p2m->domain->domain_id, gfn, level);
391 remainder = gfn_remainder;
392 for ( i = 0; i < (1 << PAGETABLE_ORDER); ++i )
393 {
394 l1_pgentry_t ent = ptab[i];
395
396 if ( (l1e_get_flags(ent) & _PAGE_PRESENT) &&
397 !needs_recalc(l1, ent) )
398 {
399 set_recalc(l1, ent);
400 p2m->write_p2m_entry(p2m, gfn - remainder, &ptab[i],
401 ent, level);
402 }
403 remainder -= 1UL << ((level - 1) * PAGETABLE_ORDER);
404 }
405 smp_wmb();
406 clear_recalc(l1, e);
407 p2m->write_p2m_entry(p2m, gfn, pent, e, level + 1);
408 }
409 unmap_domain_page((void *)((unsigned long)pent & PAGE_MASK));
410 }
411
412 pent = p2m_find_entry(table, &gfn_remainder, gfn,
413 level * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER);
414 if ( pent && (l1e_get_flags(*pent) & _PAGE_PRESENT) &&
415 needs_recalc(l1, *pent) )
416 {
417 l1_pgentry_t e = *pent;
418 p2m_type_t ot, nt;
419 unsigned long mask = ~0UL << (level * PAGETABLE_ORDER);
420
421 if ( !valid_recalc(l1, e) )
422 P2M_DEBUG("bogus recalc leaf at d%d:%lx:%u\n",
423 p2m->domain->domain_id, gfn, level);
424 ot = p2m_flags_to_type(l1e_get_flags(e));
425 nt = p2m_recalc_type_range(true, ot, p2m, gfn & mask, gfn | ~mask);
426 if ( nt != ot )
427 {
428 unsigned long mfn = l1e_get_pfn(e);
429 unsigned long flags = p2m_type_to_flags(p2m, nt,
430 _mfn(mfn), level);
431
432 if ( level )
433 {
434 if ( flags & _PAGE_PAT )
435 {
436 BUILD_BUG_ON(_PAGE_PAT != _PAGE_PSE);
437 mfn |= _PAGE_PSE_PAT >> PAGE_SHIFT;
438 }
439 else
440 mfn &= ~((unsigned long)_PAGE_PSE_PAT >> PAGE_SHIFT);
441 flags |= _PAGE_PSE;
442 }
443
444 if ( ot == p2m_ioreq_server )
445 {
446 ASSERT(p2m->ioreq.entry_count > 0);
447 ASSERT(level == 0);
448 p2m->ioreq.entry_count--;
449 }
450
451 e = l1e_from_pfn(mfn, flags);
452 p2m_add_iommu_flags(&e, level,
453 (nt == p2m_ram_rw)
454 ? IOMMUF_readable|IOMMUF_writable : 0);
455 ASSERT(!needs_recalc(l1, e));
456 }
457 else
458 clear_recalc(l1, e);
459 p2m->write_p2m_entry(p2m, gfn, pent, e, level + 1);
460 }
461
462 out:
463 unmap_domain_page(table);
464
465 return err;
466 }
467
p2m_pt_handle_deferred_changes(uint64_t gpa)468 int p2m_pt_handle_deferred_changes(uint64_t gpa)
469 {
470 struct p2m_domain *p2m = p2m_get_hostp2m(current->domain);
471 int rc;
472
473 p2m_lock(p2m);
474 rc = do_recalc(p2m, PFN_DOWN(gpa));
475 p2m_unlock(p2m);
476
477 return rc;
478 }
479
480 /* Returns: 0 for success, -errno for failure */
481 static int
p2m_pt_set_entry(struct p2m_domain * p2m,gfn_t gfn_,mfn_t mfn,unsigned int page_order,p2m_type_t p2mt,p2m_access_t p2ma,int sve)482 p2m_pt_set_entry(struct p2m_domain *p2m, gfn_t gfn_, mfn_t mfn,
483 unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma,
484 int sve)
485 {
486 /* XXX -- this might be able to be faster iff current->domain == d */
487 void *table;
488 unsigned long gfn = gfn_x(gfn_);
489 unsigned long i, gfn_remainder = gfn;
490 l1_pgentry_t *p2m_entry, entry_content;
491 /* Intermediate table to free if we're replacing it with a superpage. */
492 l1_pgentry_t intermediate_entry = l1e_empty();
493 l2_pgentry_t l2e_content;
494 l3_pgentry_t l3e_content;
495 int rc;
496 unsigned int iommu_pte_flags = p2m_get_iommu_flags(p2mt, mfn);
497 /*
498 * old_mfn and iommu_old_flags control possible flush/update needs on the
499 * IOMMU: We need to flush when MFN or flags (i.e. permissions) change.
500 * iommu_old_flags being initialized to zero covers the case of the entry
501 * getting replaced being a non-present (leaf or intermediate) one. For
502 * present leaf entries the real value will get calculated below, while
503 * for present intermediate entries ~0 (guaranteed != iommu_pte_flags)
504 * will be used (to cover all cases of what the leaf entries underneath
505 * the intermediate one might be).
506 */
507 unsigned int flags, iommu_old_flags = 0;
508 unsigned long old_mfn = mfn_x(INVALID_MFN);
509
510 ASSERT(sve != 0);
511
512 if ( tb_init_done )
513 {
514 struct {
515 u64 gfn, mfn;
516 int p2mt;
517 int d:16,order:16;
518 } t;
519
520 t.gfn = gfn;
521 t.mfn = mfn_x(mfn);
522 t.p2mt = p2mt;
523 t.d = p2m->domain->domain_id;
524 t.order = page_order;
525
526 __trace_var(TRC_MEM_SET_P2M_ENTRY, 0, sizeof(t), &t);
527 }
528
529 if ( unlikely(p2m_is_foreign(p2mt)) )
530 {
531 /* hvm fixme: foreign types are only supported on ept at present */
532 gdprintk(XENLOG_WARNING, "Unimplemented foreign p2m type.\n");
533 return -EINVAL;
534 }
535
536 /* Carry out any eventually pending earlier changes first. */
537 rc = do_recalc(p2m, gfn);
538 if ( rc < 0 )
539 return rc;
540
541 table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
542 rc = p2m_next_level(p2m, &table, &gfn_remainder, gfn,
543 L4_PAGETABLE_SHIFT - PAGE_SHIFT,
544 L4_PAGETABLE_ENTRIES, 3, 1);
545 if ( rc )
546 goto out;
547
548 /*
549 * Try to allocate 1GB page table if this feature is supported.
550 */
551 if ( page_order == PAGE_ORDER_1G )
552 {
553 p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
554 L3_PAGETABLE_SHIFT - PAGE_SHIFT,
555 L3_PAGETABLE_ENTRIES);
556 ASSERT(p2m_entry);
557 flags = l1e_get_flags(*p2m_entry);
558 if ( flags & _PAGE_PRESENT )
559 {
560 if ( flags & _PAGE_PSE )
561 {
562 old_mfn = l1e_get_pfn(*p2m_entry);
563 iommu_old_flags =
564 p2m_get_iommu_flags(p2m_flags_to_type(flags),
565 _mfn(old_mfn));
566 }
567 else
568 {
569 iommu_old_flags = ~0;
570 intermediate_entry = *p2m_entry;
571 }
572 }
573
574 ASSERT(p2m_flags_to_type(flags) != p2m_ioreq_server);
575 ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
576 l3e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt)
577 ? p2m_l3e_from_pfn(mfn_x(mfn),
578 p2m_type_to_flags(p2m, p2mt, mfn, 2))
579 : l3e_empty();
580 entry_content.l1 = l3e_content.l3;
581
582 if ( entry_content.l1 != 0 )
583 p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags);
584
585 p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 3);
586 /* NB: paging_write_p2m_entry() handles tlb flushes properly */
587 }
588 else
589 {
590 rc = p2m_next_level(p2m, &table, &gfn_remainder, gfn,
591 L3_PAGETABLE_SHIFT - PAGE_SHIFT,
592 L3_PAGETABLE_ENTRIES, 2, 1);
593 if ( rc )
594 goto out;
595 }
596
597 if ( page_order == PAGE_ORDER_4K )
598 {
599 p2m_type_t p2mt_old;
600
601 rc = p2m_next_level(p2m, &table, &gfn_remainder, gfn,
602 L2_PAGETABLE_SHIFT - PAGE_SHIFT,
603 L2_PAGETABLE_ENTRIES, 1, 1);
604 if ( rc )
605 goto out;
606
607 p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
608 0, L1_PAGETABLE_ENTRIES);
609 ASSERT(p2m_entry);
610 old_mfn = l1e_get_pfn(*p2m_entry);
611 iommu_old_flags =
612 p2m_get_iommu_flags(p2m_flags_to_type(l1e_get_flags(*p2m_entry)),
613 _mfn(old_mfn));
614
615 if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) )
616 entry_content = p2m_l1e_from_pfn(mfn_x(mfn),
617 p2m_type_to_flags(p2m, p2mt, mfn, 0));
618 else
619 entry_content = l1e_empty();
620
621 if ( entry_content.l1 != 0 )
622 p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags);
623
624 p2mt_old = p2m_flags_to_type(l1e_get_flags(*p2m_entry));
625
626 /*
627 * p2m_ioreq_server is only used for 4K pages, so
628 * the count is only done for level 1 entries.
629 */
630 if ( p2mt == p2m_ioreq_server )
631 p2m->ioreq.entry_count++;
632
633 if ( p2mt_old == p2m_ioreq_server )
634 {
635 ASSERT(p2m->ioreq.entry_count > 0);
636 p2m->ioreq.entry_count--;
637 }
638
639 /* level 1 entry */
640 p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 1);
641 /* NB: paging_write_p2m_entry() handles tlb flushes properly */
642 }
643 else if ( page_order == PAGE_ORDER_2M )
644 {
645 p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
646 L2_PAGETABLE_SHIFT - PAGE_SHIFT,
647 L2_PAGETABLE_ENTRIES);
648 ASSERT(p2m_entry);
649 flags = l1e_get_flags(*p2m_entry);
650 if ( flags & _PAGE_PRESENT )
651 {
652 if ( flags & _PAGE_PSE )
653 {
654 old_mfn = l1e_get_pfn(*p2m_entry);
655 iommu_old_flags =
656 p2m_get_iommu_flags(p2m_flags_to_type(flags),
657 _mfn(old_mfn));
658 }
659 else
660 {
661 iommu_old_flags = ~0;
662 intermediate_entry = *p2m_entry;
663 }
664 }
665
666 ASSERT(p2m_flags_to_type(flags) != p2m_ioreq_server);
667 ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct);
668 l2e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt)
669 ? p2m_l2e_from_pfn(mfn_x(mfn),
670 p2m_type_to_flags(p2m, p2mt, mfn, 1))
671 : l2e_empty();
672 entry_content.l1 = l2e_content.l2;
673
674 if ( entry_content.l1 != 0 )
675 p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags);
676
677 p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 2);
678 /* NB: paging_write_p2m_entry() handles tlb flushes properly */
679 }
680
681 /* Track the highest gfn for which we have ever had a valid mapping */
682 if ( p2mt != p2m_invalid
683 && (gfn + (1UL << page_order) - 1 > p2m->max_mapped_pfn) )
684 p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1;
685
686 if ( iommu_enabled && need_iommu(p2m->domain) &&
687 (iommu_old_flags != iommu_pte_flags || old_mfn != mfn_x(mfn)) )
688 {
689 ASSERT(rc == 0);
690
691 if ( iommu_use_hap_pt(p2m->domain) )
692 {
693 if ( iommu_old_flags )
694 amd_iommu_flush_pages(p2m->domain, gfn, page_order);
695 }
696 else if ( iommu_pte_flags )
697 for ( i = 0; i < (1UL << page_order); i++ )
698 {
699 rc = iommu_map_page(p2m->domain, gfn + i, mfn_x(mfn) + i,
700 iommu_pte_flags);
701 if ( unlikely(rc) )
702 {
703 while ( i-- )
704 /* If statement to satisfy __must_check. */
705 if ( iommu_unmap_page(p2m->domain, gfn + i) )
706 continue;
707
708 break;
709 }
710 }
711 else
712 for ( i = 0; i < (1UL << page_order); i++ )
713 {
714 int ret = iommu_unmap_page(p2m->domain, gfn + i);
715
716 if ( !rc )
717 rc = ret;
718 }
719 }
720
721 /*
722 * Free old intermediate tables if necessary. This has to be the
723 * last thing we do, after removal from the IOMMU tables, so as to
724 * avoid a potential use-after-free.
725 */
726 if ( l1e_get_flags(intermediate_entry) & _PAGE_PRESENT )
727 p2m_free_entry(p2m, &intermediate_entry, page_order);
728
729 out:
730 unmap_domain_page(table);
731 return rc;
732 }
733
734 static mfn_t
p2m_pt_get_entry(struct p2m_domain * p2m,gfn_t gfn_,p2m_type_t * t,p2m_access_t * a,p2m_query_t q,unsigned int * page_order,bool_t * sve)735 p2m_pt_get_entry(struct p2m_domain *p2m, gfn_t gfn_,
736 p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
737 unsigned int *page_order, bool_t *sve)
738 {
739 mfn_t mfn;
740 unsigned long gfn = gfn_x(gfn_);
741 paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
742 l2_pgentry_t *l2e;
743 l1_pgentry_t *l1e;
744 unsigned int flags;
745 p2m_type_t l1t;
746 bool_t recalc;
747
748 ASSERT(paging_mode_translate(p2m->domain));
749
750 if ( sve )
751 *sve = 1;
752
753 /* XXX This is for compatibility with the old model, where anything not
754 * XXX marked as RAM was considered to be emulated MMIO space.
755 * XXX Once we start explicitly registering MMIO regions in the p2m
756 * XXX we will return p2m_invalid for unmapped gfns */
757 *t = p2m_mmio_dm;
758 /* Not implemented except with EPT */
759 *a = p2m_access_rwx;
760
761 if ( gfn > p2m->max_mapped_pfn )
762 {
763 /* This pfn is higher than the highest the p2m map currently holds */
764 if ( page_order )
765 {
766 for ( *page_order = 3 * PAGETABLE_ORDER; *page_order;
767 *page_order -= PAGETABLE_ORDER )
768 if ( (gfn & ~((1UL << *page_order) - 1)) >
769 p2m->max_mapped_pfn )
770 break;
771 }
772 return INVALID_MFN;
773 }
774
775 mfn = pagetable_get_mfn(p2m_get_pagetable(p2m));
776
777 {
778 l4_pgentry_t *l4e = map_domain_page(mfn);
779 l4e += l4_table_offset(addr);
780 if ( page_order )
781 *page_order = 3 * PAGETABLE_ORDER;
782 if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 )
783 {
784 unmap_domain_page(l4e);
785 return INVALID_MFN;
786 }
787 mfn = l4e_get_mfn(*l4e);
788 recalc = needs_recalc(l4, *l4e);
789 unmap_domain_page(l4e);
790 }
791 {
792 l3_pgentry_t *l3e = map_domain_page(mfn);
793 l3e += l3_table_offset(addr);
794 if ( page_order )
795 *page_order = 2 * PAGETABLE_ORDER;
796
797 pod_retry_l3:
798 flags = l3e_get_flags(*l3e);
799 if ( !(flags & _PAGE_PRESENT) )
800 {
801 if ( p2m_flags_to_type(flags) == p2m_populate_on_demand )
802 {
803 if ( q & P2M_ALLOC )
804 {
805 if ( p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_1G) )
806 goto pod_retry_l3;
807 gdprintk(XENLOG_ERR, "%s: Allocate 1GB failed!\n", __func__);
808 }
809 else
810 *t = p2m_populate_on_demand;
811 }
812 unmap_domain_page(l3e);
813 return INVALID_MFN;
814 }
815 if ( flags & _PAGE_PSE )
816 {
817 mfn = _mfn(l3e_get_pfn(*l3e) +
818 l2_table_offset(addr) * L1_PAGETABLE_ENTRIES +
819 l1_table_offset(addr));
820 *t = p2m_recalc_type(recalc || _needs_recalc(flags),
821 p2m_flags_to_type(flags), p2m, gfn);
822 unmap_domain_page(l3e);
823
824 ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
825 return (p2m_is_valid(*t)) ? mfn : INVALID_MFN;
826 }
827
828 mfn = l3e_get_mfn(*l3e);
829 if ( _needs_recalc(flags) )
830 recalc = 1;
831 unmap_domain_page(l3e);
832 }
833
834 l2e = map_domain_page(mfn);
835 l2e += l2_table_offset(addr);
836 if ( page_order )
837 *page_order = PAGETABLE_ORDER;
838
839 pod_retry_l2:
840 flags = l2e_get_flags(*l2e);
841 if ( !(flags & _PAGE_PRESENT) )
842 {
843 /* PoD: Try to populate a 2-meg chunk */
844 if ( p2m_flags_to_type(flags) == p2m_populate_on_demand )
845 {
846 if ( q & P2M_ALLOC ) {
847 if ( p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_2M) )
848 goto pod_retry_l2;
849 } else
850 *t = p2m_populate_on_demand;
851 }
852
853 unmap_domain_page(l2e);
854 return INVALID_MFN;
855 }
856 if ( flags & _PAGE_PSE )
857 {
858 mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
859 *t = p2m_recalc_type(recalc || _needs_recalc(flags),
860 p2m_flags_to_type(flags), p2m, gfn);
861 unmap_domain_page(l2e);
862
863 ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
864 return (p2m_is_valid(*t)) ? mfn : INVALID_MFN;
865 }
866
867 mfn = l2e_get_mfn(*l2e);
868 if ( needs_recalc(l2, *l2e) )
869 recalc = 1;
870 unmap_domain_page(l2e);
871
872 l1e = map_domain_page(mfn);
873 l1e += l1_table_offset(addr);
874 if ( page_order )
875 *page_order = 0;
876
877 pod_retry_l1:
878 flags = l1e_get_flags(*l1e);
879 l1t = p2m_flags_to_type(flags);
880 if ( !(flags & _PAGE_PRESENT) && !p2m_is_paging(l1t) )
881 {
882 /* PoD: Try to populate */
883 if ( l1t == p2m_populate_on_demand )
884 {
885 if ( q & P2M_ALLOC ) {
886 if ( p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_4K) )
887 goto pod_retry_l1;
888 } else
889 *t = p2m_populate_on_demand;
890 }
891
892 unmap_domain_page(l1e);
893 return INVALID_MFN;
894 }
895 mfn = l1e_get_mfn(*l1e);
896 *t = p2m_recalc_type(recalc || _needs_recalc(flags), l1t, p2m, gfn);
897 unmap_domain_page(l1e);
898
899 ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t) || p2m_is_paging(*t));
900 return (p2m_is_valid(*t) || p2m_is_grant(*t)) ? mfn : INVALID_MFN;
901 }
902
p2m_pt_change_entry_type_global(struct p2m_domain * p2m,p2m_type_t ot,p2m_type_t nt)903 static void p2m_pt_change_entry_type_global(struct p2m_domain *p2m,
904 p2m_type_t ot, p2m_type_t nt)
905 {
906 l1_pgentry_t *tab;
907 unsigned long gfn = 0;
908 unsigned int i, changed;
909
910 if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) == 0 )
911 return;
912
913 ASSERT(hap_enabled(p2m->domain));
914
915 tab = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
916 for ( changed = i = 0; i < (1 << PAGETABLE_ORDER); ++i )
917 {
918 l1_pgentry_t e = tab[i];
919
920 if ( (l1e_get_flags(e) & _PAGE_PRESENT) &&
921 !needs_recalc(l1, e) )
922 {
923 set_recalc(l1, e);
924 p2m->write_p2m_entry(p2m, gfn, &tab[i], e, 4);
925 ++changed;
926 }
927 gfn += 1UL << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
928 }
929 unmap_domain_page(tab);
930
931 if ( changed )
932 flush_tlb_mask(p2m->domain->domain_dirty_cpumask);
933 }
934
p2m_pt_change_entry_type_range(struct p2m_domain * p2m,p2m_type_t ot,p2m_type_t nt,unsigned long first_gfn,unsigned long last_gfn)935 static int p2m_pt_change_entry_type_range(struct p2m_domain *p2m,
936 p2m_type_t ot, p2m_type_t nt,
937 unsigned long first_gfn,
938 unsigned long last_gfn)
939 {
940 unsigned long mask = (1 << PAGETABLE_ORDER) - 1;
941 unsigned int i;
942 int err = 0;
943
944 ASSERT(hap_enabled(p2m->domain));
945
946 for ( i = 1; i <= 4; )
947 {
948 if ( first_gfn & mask )
949 {
950 unsigned long end_gfn = min(first_gfn | mask, last_gfn);
951
952 err = p2m_pt_set_recalc_range(p2m, i, first_gfn, end_gfn);
953 if ( err || end_gfn >= last_gfn )
954 break;
955 first_gfn = end_gfn + 1;
956 }
957 else if ( (last_gfn & mask) != mask )
958 {
959 unsigned long start_gfn = max(first_gfn, last_gfn & ~mask);
960
961 err = p2m_pt_set_recalc_range(p2m, i, start_gfn, last_gfn);
962 if ( err || start_gfn <= first_gfn )
963 break;
964 last_gfn = start_gfn - 1;
965 }
966 else
967 {
968 ++i;
969 mask |= mask << PAGETABLE_ORDER;
970 }
971 }
972
973 return err;
974 }
975
976 #if P2M_AUDIT
p2m_pt_audit_p2m(struct p2m_domain * p2m)977 long p2m_pt_audit_p2m(struct p2m_domain *p2m)
978 {
979 unsigned long entry_count = 0, pmbad = 0;
980 unsigned long mfn, gfn, m2pfn;
981
982 ASSERT(p2m_locked_by_me(p2m));
983 ASSERT(pod_locked_by_me(p2m));
984
985 /* Audit part one: walk the domain's p2m table, checking the entries. */
986 if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 )
987 {
988 l2_pgentry_t *l2e;
989 l1_pgentry_t *l1e;
990 int i1, i2;
991
992 l4_pgentry_t *l4e;
993 l3_pgentry_t *l3e;
994 int i4, i3;
995 l4e = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m)));
996
997 gfn = 0;
998 for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
999 {
1000 if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
1001 {
1002 gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT);
1003 continue;
1004 }
1005 l3e = map_l3t_from_l4e(l4e[i4]);
1006 for ( i3 = 0;
1007 i3 < L3_PAGETABLE_ENTRIES;
1008 i3++ )
1009 {
1010 if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
1011 {
1012 gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
1013 continue;
1014 }
1015
1016 /* check for 1GB super page */
1017 if ( l3e_get_flags(l3e[i3]) & _PAGE_PSE )
1018 {
1019 mfn = l3e_get_pfn(l3e[i3]);
1020 ASSERT(mfn_valid(_mfn(mfn)));
1021 /* we have to cover 512x512 4K pages */
1022 for ( i2 = 0;
1023 i2 < (L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES);
1024 i2++)
1025 {
1026 m2pfn = get_gpfn_from_mfn(mfn+i2);
1027 if ( m2pfn != (gfn + i2) )
1028 {
1029 pmbad++;
1030 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1031 " -> gfn %#lx\n", gfn+i2, mfn+i2,
1032 m2pfn);
1033 BUG();
1034 }
1035 gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT);
1036 continue;
1037 }
1038 }
1039
1040 l2e = map_l2t_from_l3e(l3e[i3]);
1041 for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
1042 {
1043 if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
1044 {
1045 if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE)
1046 && ( p2m_flags_to_type(l2e_get_flags(l2e[i2]))
1047 == p2m_populate_on_demand ) )
1048 entry_count+=SUPERPAGE_PAGES;
1049 gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
1050 continue;
1051 }
1052
1053 /* check for super page */
1054 if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
1055 {
1056 mfn = l2e_get_pfn(l2e[i2]);
1057 ASSERT(mfn_valid(_mfn(mfn)));
1058 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
1059 {
1060 m2pfn = get_gpfn_from_mfn(mfn+i1);
1061 /* Allow shared M2Ps */
1062 if ( (m2pfn != (gfn + i1)) &&
1063 (m2pfn != SHARED_M2P_ENTRY) )
1064 {
1065 pmbad++;
1066 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1067 " -> gfn %#lx\n", gfn+i1, mfn+i1,
1068 m2pfn);
1069 BUG();
1070 }
1071 }
1072 gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
1073 continue;
1074 }
1075
1076 l1e = map_l1t_from_l2e(l2e[i2]);
1077
1078 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
1079 {
1080 p2m_type_t type;
1081
1082 type = p2m_flags_to_type(l1e_get_flags(l1e[i1]));
1083 if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
1084 {
1085 if ( type == p2m_populate_on_demand )
1086 entry_count++;
1087 continue;
1088 }
1089 mfn = l1e_get_pfn(l1e[i1]);
1090 ASSERT(mfn_valid(_mfn(mfn)));
1091 m2pfn = get_gpfn_from_mfn(mfn);
1092 if ( m2pfn != gfn &&
1093 type != p2m_mmio_direct &&
1094 !p2m_is_grant(type) &&
1095 !p2m_is_shared(type) )
1096 {
1097 pmbad++;
1098 printk("mismatch: gfn %#lx -> mfn %#lx"
1099 " -> gfn %#lx\n", gfn, mfn, m2pfn);
1100 P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
1101 " -> gfn %#lx\n", gfn, mfn, m2pfn);
1102 BUG();
1103 }
1104 }
1105 unmap_domain_page(l1e);
1106 }
1107 unmap_domain_page(l2e);
1108 }
1109 unmap_domain_page(l3e);
1110 }
1111
1112 unmap_domain_page(l4e);
1113 }
1114
1115 if ( entry_count != p2m->pod.entry_count )
1116 {
1117 printk("%s: refcounted entry count %ld, audit count %lu!\n",
1118 __func__,
1119 p2m->pod.entry_count,
1120 entry_count);
1121 BUG();
1122 }
1123
1124 return pmbad;
1125 }
1126 #endif /* P2M_AUDIT */
1127
1128 /* Set up the p2m function pointers for pagetable format */
p2m_pt_init(struct p2m_domain * p2m)1129 void p2m_pt_init(struct p2m_domain *p2m)
1130 {
1131 p2m->set_entry = p2m_pt_set_entry;
1132 p2m->get_entry = p2m_pt_get_entry;
1133 p2m->recalc = do_recalc;
1134 p2m->change_entry_type_global = p2m_pt_change_entry_type_global;
1135 p2m->change_entry_type_range = p2m_pt_change_entry_type_range;
1136 p2m->write_p2m_entry = paging_write_p2m_entry;
1137 #if P2M_AUDIT
1138 p2m->audit_p2m = p2m_pt_audit_p2m;
1139 #else
1140 p2m->audit_p2m = NULL;
1141 #endif
1142 }
1143
1144
1145