1 #include <xen/sched.h>
2 #include <xen/lib.h>
3 #include <xen/errno.h>
4 #include <xen/domain_page.h>
5 #include <xen/bitops.h>
6 #include <xen/vm_event.h>
7 #include <xen/monitor.h>
8 #include <xen/iocap.h>
9 #include <xen/mem_access.h>
10 #include <xen/xmalloc.h>
11 #include <public/vm_event.h>
12 #include <asm/flushtlb.h>
13 #include <asm/gic.h>
14 #include <asm/event.h>
15 #include <asm/hardirq.h>
16 #include <asm/page.h>
17
18 #define MAX_VMID_8_BIT (1UL << 8)
19 #define MAX_VMID_16_BIT (1UL << 16)
20
21 #define INVALID_VMID 0 /* VMID 0 is reserved */
22
23 #ifdef CONFIG_ARM_64
24 static unsigned int __read_mostly p2m_root_order;
25 static unsigned int __read_mostly p2m_root_level;
26 #define P2M_ROOT_ORDER p2m_root_order
27 #define P2M_ROOT_LEVEL p2m_root_level
28 static unsigned int __read_mostly max_vmid = MAX_VMID_8_BIT;
29 /* VMID is by default 8 bit width on AArch64 */
30 #define MAX_VMID max_vmid
31 #else
32 /* First level P2M is alway 2 consecutive pages */
33 #define P2M_ROOT_LEVEL 1
34 #define P2M_ROOT_ORDER 1
35 /* VMID is always 8 bit width on AArch32 */
36 #define MAX_VMID MAX_VMID_8_BIT
37 #endif
38
39 #define P2M_ROOT_PAGES (1<<P2M_ROOT_ORDER)
40
41 /* Override macros from asm/mm.h to make them work with mfn_t */
42 #undef mfn_to_page
43 #define mfn_to_page(mfn) __mfn_to_page(mfn_x(mfn))
44 #undef page_to_mfn
45 #define page_to_mfn(pg) _mfn(__page_to_mfn(pg))
46
47 unsigned int __read_mostly p2m_ipa_bits;
48
49 /* Helpers to lookup the properties of each level */
50 static const paddr_t level_masks[] =
51 { ZEROETH_MASK, FIRST_MASK, SECOND_MASK, THIRD_MASK };
52 static const uint8_t level_orders[] =
53 { ZEROETH_ORDER, FIRST_ORDER, SECOND_ORDER, THIRD_ORDER };
54
55 static void p2m_flush_tlb(struct p2m_domain *p2m);
56
57 /* Unlock the flush and do a P2M TLB flush if necessary */
p2m_write_unlock(struct p2m_domain * p2m)58 void p2m_write_unlock(struct p2m_domain *p2m)
59 {
60 if ( p2m->need_flush )
61 {
62 p2m->need_flush = false;
63 /*
64 * The final flush is done with the P2M write lock taken to
65 * to avoid someone else modify the P2M before the TLB
66 * invalidation has completed.
67 */
68 p2m_flush_tlb(p2m);
69 }
70
71 write_unlock(&p2m->lock);
72 }
73
p2m_dump_info(struct domain * d)74 void p2m_dump_info(struct domain *d)
75 {
76 struct p2m_domain *p2m = p2m_get_hostp2m(d);
77
78 p2m_read_lock(p2m);
79 printk("p2m mappings for domain %d (vmid %d):\n",
80 d->domain_id, p2m->vmid);
81 BUG_ON(p2m->stats.mappings[0] || p2m->stats.shattered[0]);
82 printk(" 1G mappings: %ld (shattered %ld)\n",
83 p2m->stats.mappings[1], p2m->stats.shattered[1]);
84 printk(" 2M mappings: %ld (shattered %ld)\n",
85 p2m->stats.mappings[2], p2m->stats.shattered[2]);
86 printk(" 4K mappings: %ld\n", p2m->stats.mappings[3]);
87 p2m_read_unlock(p2m);
88 }
89
memory_type_changed(struct domain * d)90 void memory_type_changed(struct domain *d)
91 {
92 }
93
dump_p2m_lookup(struct domain * d,paddr_t addr)94 void dump_p2m_lookup(struct domain *d, paddr_t addr)
95 {
96 struct p2m_domain *p2m = p2m_get_hostp2m(d);
97
98 printk("dom%d IPA 0x%"PRIpaddr"\n", d->domain_id, addr);
99
100 printk("P2M @ %p mfn:0x%lx\n",
101 p2m->root, __page_to_mfn(p2m->root));
102
103 dump_pt_walk(page_to_maddr(p2m->root), addr,
104 P2M_ROOT_LEVEL, P2M_ROOT_PAGES);
105 }
106
p2m_save_state(struct vcpu * p)107 void p2m_save_state(struct vcpu *p)
108 {
109 p->arch.sctlr = READ_SYSREG(SCTLR_EL1);
110 }
111
p2m_restore_state(struct vcpu * n)112 void p2m_restore_state(struct vcpu *n)
113 {
114 struct p2m_domain *p2m = p2m_get_hostp2m(n->domain);
115 uint8_t *last_vcpu_ran;
116
117 if ( is_idle_vcpu(n) )
118 return;
119
120 WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2);
121 isb();
122
123 WRITE_SYSREG(n->arch.sctlr, SCTLR_EL1);
124 isb();
125
126 WRITE_SYSREG(n->arch.hcr_el2, HCR_EL2);
127 isb();
128
129 last_vcpu_ran = &p2m->last_vcpu_ran[smp_processor_id()];
130
131 /*
132 * Flush local TLB for the domain to prevent wrong TLB translation
133 * when running multiple vCPU of the same domain on a single pCPU.
134 */
135 if ( *last_vcpu_ran != INVALID_VCPU_ID && *last_vcpu_ran != n->vcpu_id )
136 flush_tlb_local();
137
138 *last_vcpu_ran = n->vcpu_id;
139 }
140
p2m_flush_tlb(struct p2m_domain * p2m)141 static void p2m_flush_tlb(struct p2m_domain *p2m)
142 {
143 unsigned long flags = 0;
144 uint64_t ovttbr;
145
146 /*
147 * ARM only provides an instruction to flush TLBs for the current
148 * VMID. So switch to the VTTBR of a given P2M if different.
149 */
150 ovttbr = READ_SYSREG64(VTTBR_EL2);
151 if ( ovttbr != p2m->vttbr )
152 {
153 local_irq_save(flags);
154 WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2);
155 isb();
156 }
157
158 flush_tlb();
159
160 if ( ovttbr != READ_SYSREG64(VTTBR_EL2) )
161 {
162 WRITE_SYSREG64(ovttbr, VTTBR_EL2);
163 isb();
164 local_irq_restore(flags);
165 }
166 }
167
168 /*
169 * Force a synchronous P2M TLB flush.
170 *
171 * Must be called with the p2m lock held.
172 */
p2m_flush_tlb_sync(struct p2m_domain * p2m)173 static void p2m_flush_tlb_sync(struct p2m_domain *p2m)
174 {
175 ASSERT(p2m_is_write_locked(p2m));
176
177 p2m_flush_tlb(p2m);
178 p2m->need_flush = false;
179 }
180
181 /*
182 * Find and map the root page table. The caller is responsible for
183 * unmapping the table.
184 *
185 * The function will return NULL if the offset of the root table is
186 * invalid.
187 */
p2m_get_root_pointer(struct p2m_domain * p2m,gfn_t gfn)188 static lpae_t *p2m_get_root_pointer(struct p2m_domain *p2m,
189 gfn_t gfn)
190 {
191 unsigned int root_table;
192
193 if ( P2M_ROOT_PAGES == 1 )
194 return __map_domain_page(p2m->root);
195
196 /*
197 * Concatenated root-level tables. The table number will be the
198 * offset at the previous level. It is not possible to
199 * concatenate a level-0 root.
200 */
201 ASSERT(P2M_ROOT_LEVEL > 0);
202
203 root_table = gfn_x(gfn) >> (level_orders[P2M_ROOT_LEVEL - 1]);
204 root_table &= LPAE_ENTRY_MASK;
205
206 if ( root_table >= P2M_ROOT_PAGES )
207 return NULL;
208
209 return __map_domain_page(p2m->root + root_table);
210 }
211
212 /*
213 * Lookup the MFN corresponding to a domain's GFN.
214 * Lookup mem access in the ratrix tree.
215 * The entries associated to the GFN is considered valid.
216 */
p2m_mem_access_radix_get(struct p2m_domain * p2m,gfn_t gfn)217 static p2m_access_t p2m_mem_access_radix_get(struct p2m_domain *p2m, gfn_t gfn)
218 {
219 void *ptr;
220
221 if ( !p2m->mem_access_enabled )
222 return p2m->default_access;
223
224 ptr = radix_tree_lookup(&p2m->mem_access_settings, gfn_x(gfn));
225 if ( !ptr )
226 return p2m_access_rwx;
227 else
228 return radix_tree_ptr_to_int(ptr);
229 }
230
231 #define GUEST_TABLE_MAP_FAILED 0
232 #define GUEST_TABLE_SUPER_PAGE 1
233 #define GUEST_TABLE_NORMAL_PAGE 2
234
235 static int p2m_create_table(struct p2m_domain *p2m, lpae_t *entry);
236
237 /*
238 * Take the currently mapped table, find the corresponding GFN entry,
239 * and map the next table, if available. The previous table will be
240 * unmapped if the next level was mapped (e.g GUEST_TABLE_NORMAL_PAGE
241 * returned).
242 *
243 * The read_only parameters indicates whether intermediate tables should
244 * be allocated when not present.
245 *
246 * Return values:
247 * GUEST_TABLE_MAP_FAILED: Either read_only was set and the entry
248 * was empty, or allocating a new page failed.
249 * GUEST_TABLE_NORMAL_PAGE: next level mapped normally
250 * GUEST_TABLE_SUPER_PAGE: The next entry points to a superpage.
251 */
p2m_next_level(struct p2m_domain * p2m,bool read_only,lpae_t ** table,unsigned int offset)252 static int p2m_next_level(struct p2m_domain *p2m, bool read_only,
253 lpae_t **table, unsigned int offset)
254 {
255 lpae_t *entry;
256 int ret;
257 mfn_t mfn;
258
259 entry = *table + offset;
260
261 if ( !lpae_valid(*entry) )
262 {
263 if ( read_only )
264 return GUEST_TABLE_MAP_FAILED;
265
266 ret = p2m_create_table(p2m, entry);
267 if ( ret )
268 return GUEST_TABLE_MAP_FAILED;
269 }
270
271 /* The function p2m_next_level is never called at the 3rd level */
272 if ( lpae_mapping(*entry) )
273 return GUEST_TABLE_SUPER_PAGE;
274
275 mfn = _mfn(entry->p2m.base);
276
277 unmap_domain_page(*table);
278 *table = map_domain_page(mfn);
279
280 return GUEST_TABLE_NORMAL_PAGE;
281 }
282
283 /*
284 * Get the details of a given gfn.
285 *
286 * If the entry is present, the associated MFN will be returned and the
287 * access and type filled up. The page_order will correspond to the
288 * order of the mapping in the page table (i.e it could be a superpage).
289 *
290 * If the entry is not present, INVALID_MFN will be returned and the
291 * page_order will be set according to the order of the invalid range.
292 */
p2m_get_entry(struct p2m_domain * p2m,gfn_t gfn,p2m_type_t * t,p2m_access_t * a,unsigned int * page_order)293 mfn_t p2m_get_entry(struct p2m_domain *p2m, gfn_t gfn,
294 p2m_type_t *t, p2m_access_t *a,
295 unsigned int *page_order)
296 {
297 paddr_t addr = gfn_to_gaddr(gfn);
298 unsigned int level = 0;
299 lpae_t entry, *table;
300 int rc;
301 mfn_t mfn = INVALID_MFN;
302 p2m_type_t _t;
303
304 /* Convenience aliases */
305 const unsigned int offsets[4] = {
306 zeroeth_table_offset(addr),
307 first_table_offset(addr),
308 second_table_offset(addr),
309 third_table_offset(addr)
310 };
311
312 ASSERT(p2m_is_locked(p2m));
313 BUILD_BUG_ON(THIRD_MASK != PAGE_MASK);
314
315 /* Allow t to be NULL */
316 t = t ?: &_t;
317
318 *t = p2m_invalid;
319
320 /* XXX: Check if the mapping is lower than the mapped gfn */
321
322 /* This gfn is higher than the highest the p2m map currently holds */
323 if ( gfn_x(gfn) > gfn_x(p2m->max_mapped_gfn) )
324 {
325 for ( level = P2M_ROOT_LEVEL; level < 3; level++ )
326 if ( (gfn_x(gfn) & (level_masks[level] >> PAGE_SHIFT)) >
327 gfn_x(p2m->max_mapped_gfn) )
328 break;
329
330 goto out;
331 }
332
333 table = p2m_get_root_pointer(p2m, gfn);
334
335 /*
336 * the table should always be non-NULL because the gfn is below
337 * p2m->max_mapped_gfn and the root table pages are always present.
338 */
339 BUG_ON(table == NULL);
340
341 for ( level = P2M_ROOT_LEVEL; level < 3; level++ )
342 {
343 rc = p2m_next_level(p2m, true, &table, offsets[level]);
344 if ( rc == GUEST_TABLE_MAP_FAILED )
345 goto out_unmap;
346 else if ( rc != GUEST_TABLE_NORMAL_PAGE )
347 break;
348 }
349
350 entry = table[offsets[level]];
351
352 if ( lpae_valid(entry) )
353 {
354 *t = entry.p2m.type;
355
356 if ( a )
357 *a = p2m_mem_access_radix_get(p2m, gfn);
358
359 mfn = _mfn(entry.p2m.base);
360 /*
361 * The entry may point to a superpage. Find the MFN associated
362 * to the GFN.
363 */
364 mfn = mfn_add(mfn, gfn_x(gfn) & ((1UL << level_orders[level]) - 1));
365 }
366
367 out_unmap:
368 unmap_domain_page(table);
369
370 out:
371 if ( page_order )
372 *page_order = level_orders[level];
373
374 return mfn;
375 }
376
p2m_lookup(struct domain * d,gfn_t gfn,p2m_type_t * t)377 mfn_t p2m_lookup(struct domain *d, gfn_t gfn, p2m_type_t *t)
378 {
379 mfn_t ret;
380 struct p2m_domain *p2m = p2m_get_hostp2m(d);
381
382 p2m_read_lock(p2m);
383 ret = p2m_get_entry(p2m, gfn, t, NULL, NULL);
384 p2m_read_unlock(p2m);
385
386 return ret;
387 }
388
guest_physmap_mark_populate_on_demand(struct domain * d,unsigned long gfn,unsigned int order)389 int guest_physmap_mark_populate_on_demand(struct domain *d,
390 unsigned long gfn,
391 unsigned int order)
392 {
393 return -ENOSYS;
394 }
395
p2m_pod_decrease_reservation(struct domain * d,gfn_t gfn,unsigned int order)396 int p2m_pod_decrease_reservation(struct domain *d, gfn_t gfn,
397 unsigned int order)
398 {
399 return -ENOSYS;
400 }
401
p2m_set_permission(lpae_t * e,p2m_type_t t,p2m_access_t a)402 static void p2m_set_permission(lpae_t *e, p2m_type_t t, p2m_access_t a)
403 {
404 /* First apply type permissions */
405 switch ( t )
406 {
407 case p2m_ram_rw:
408 e->p2m.xn = 0;
409 e->p2m.write = 1;
410 break;
411
412 case p2m_ram_ro:
413 e->p2m.xn = 0;
414 e->p2m.write = 0;
415 break;
416
417 case p2m_iommu_map_rw:
418 case p2m_map_foreign:
419 case p2m_grant_map_rw:
420 case p2m_mmio_direct_dev:
421 case p2m_mmio_direct_nc:
422 case p2m_mmio_direct_c:
423 e->p2m.xn = 1;
424 e->p2m.write = 1;
425 break;
426
427 case p2m_iommu_map_ro:
428 case p2m_grant_map_ro:
429 case p2m_invalid:
430 e->p2m.xn = 1;
431 e->p2m.write = 0;
432 break;
433
434 case p2m_max_real_type:
435 BUG();
436 break;
437 }
438
439 /* Then restrict with access permissions */
440 switch ( a )
441 {
442 case p2m_access_rwx:
443 break;
444 case p2m_access_wx:
445 e->p2m.read = 0;
446 break;
447 case p2m_access_rw:
448 e->p2m.xn = 1;
449 break;
450 case p2m_access_w:
451 e->p2m.read = 0;
452 e->p2m.xn = 1;
453 break;
454 case p2m_access_rx:
455 case p2m_access_rx2rw:
456 e->p2m.write = 0;
457 break;
458 case p2m_access_x:
459 e->p2m.write = 0;
460 e->p2m.read = 0;
461 break;
462 case p2m_access_r:
463 e->p2m.write = 0;
464 e->p2m.xn = 1;
465 break;
466 case p2m_access_n:
467 case p2m_access_n2rwx:
468 e->p2m.read = e->p2m.write = 0;
469 e->p2m.xn = 1;
470 break;
471 }
472 }
473
mfn_to_p2m_entry(mfn_t mfn,p2m_type_t t,p2m_access_t a)474 static lpae_t mfn_to_p2m_entry(mfn_t mfn, p2m_type_t t, p2m_access_t a)
475 {
476 /*
477 * sh, xn and write bit will be defined in the following switches
478 * based on mattr and t.
479 */
480 lpae_t e = (lpae_t) {
481 .p2m.af = 1,
482 .p2m.read = 1,
483 .p2m.table = 1,
484 .p2m.valid = 1,
485 .p2m.type = t,
486 };
487
488 BUILD_BUG_ON(p2m_max_real_type > (1 << 4));
489
490 switch ( t )
491 {
492 case p2m_mmio_direct_dev:
493 e.p2m.mattr = MATTR_DEV;
494 e.p2m.sh = LPAE_SH_OUTER;
495 break;
496
497 case p2m_mmio_direct_c:
498 e.p2m.mattr = MATTR_MEM;
499 e.p2m.sh = LPAE_SH_OUTER;
500 break;
501
502 /*
503 * ARM ARM: Overlaying the shareability attribute (DDI
504 * 0406C.b B3-1376 to 1377)
505 *
506 * A memory region with a resultant memory type attribute of Normal,
507 * and a resultant cacheability attribute of Inner Non-cacheable,
508 * Outer Non-cacheable, must have a resultant shareability attribute
509 * of Outer Shareable, otherwise shareability is UNPREDICTABLE.
510 *
511 * On ARMv8 shareability is ignored and explicitly treated as Outer
512 * Shareable for Normal Inner Non_cacheable, Outer Non-cacheable.
513 * See the note for table D4-40, in page 1788 of the ARM DDI 0487A.j.
514 */
515 case p2m_mmio_direct_nc:
516 e.p2m.mattr = MATTR_MEM_NC;
517 e.p2m.sh = LPAE_SH_OUTER;
518 break;
519
520 default:
521 e.p2m.mattr = MATTR_MEM;
522 e.p2m.sh = LPAE_SH_INNER;
523 }
524
525 p2m_set_permission(&e, t, a);
526
527 ASSERT(!(mfn_to_maddr(mfn) & ~PADDR_MASK));
528
529 e.p2m.base = mfn_x(mfn);
530
531 return e;
532 }
533
p2m_write_pte(lpae_t * p,lpae_t pte,bool clean_pte)534 static inline void p2m_write_pte(lpae_t *p, lpae_t pte, bool clean_pte)
535 {
536 write_pte(p, pte);
537 if ( clean_pte )
538 clean_dcache(*p);
539 }
540
p2m_remove_pte(lpae_t * p,bool clean_pte)541 static inline void p2m_remove_pte(lpae_t *p, bool clean_pte)
542 {
543 lpae_t pte;
544
545 memset(&pte, 0x00, sizeof(pte));
546 p2m_write_pte(p, pte, clean_pte);
547 }
548
549 /* Allocate a new page table page and hook it in via the given entry. */
p2m_create_table(struct p2m_domain * p2m,lpae_t * entry)550 static int p2m_create_table(struct p2m_domain *p2m, lpae_t *entry)
551 {
552 struct page_info *page;
553 lpae_t *p;
554 lpae_t pte;
555
556 ASSERT(!lpae_valid(*entry));
557
558 page = alloc_domheap_page(NULL, 0);
559 if ( page == NULL )
560 return -ENOMEM;
561
562 page_list_add(page, &p2m->pages);
563
564 p = __map_domain_page(page);
565 clear_page(p);
566
567 if ( p2m->clean_pte )
568 clean_dcache_va_range(p, PAGE_SIZE);
569
570 unmap_domain_page(p);
571
572 /*
573 * The access value does not matter because the hardware will ignore
574 * the permission fields for table entry.
575 */
576 pte = mfn_to_p2m_entry(page_to_mfn(page), p2m_invalid,
577 p2m->default_access);
578
579 p2m_write_pte(entry, pte, p2m->clean_pte);
580
581 return 0;
582 }
583
p2m_mem_access_radix_set(struct p2m_domain * p2m,gfn_t gfn,p2m_access_t a)584 static int p2m_mem_access_radix_set(struct p2m_domain *p2m, gfn_t gfn,
585 p2m_access_t a)
586 {
587 int rc;
588
589 if ( !p2m->mem_access_enabled )
590 return 0;
591
592 if ( p2m_access_rwx == a )
593 {
594 radix_tree_delete(&p2m->mem_access_settings, gfn_x(gfn));
595 return 0;
596 }
597
598 rc = radix_tree_insert(&p2m->mem_access_settings, gfn_x(gfn),
599 radix_tree_int_to_ptr(a));
600 if ( rc == -EEXIST )
601 {
602 /* If a setting already exists, change it to the new one */
603 radix_tree_replace_slot(
604 radix_tree_lookup_slot(
605 &p2m->mem_access_settings, gfn_x(gfn)),
606 radix_tree_int_to_ptr(a));
607 rc = 0;
608 }
609
610 return rc;
611 }
612
613 /*
614 * Put any references on the single 4K page referenced by pte.
615 * TODO: Handle superpages, for now we only take special references for leaf
616 * pages (specifically foreign ones, which can't be super mapped today).
617 */
p2m_put_l3_page(const lpae_t pte)618 static void p2m_put_l3_page(const lpae_t pte)
619 {
620 ASSERT(lpae_valid(pte));
621
622 /*
623 * TODO: Handle other p2m types
624 *
625 * It's safe to do the put_page here because page_alloc will
626 * flush the TLBs if the page is reallocated before the end of
627 * this loop.
628 */
629 if ( p2m_is_foreign(pte.p2m.type) )
630 {
631 mfn_t mfn = _mfn(pte.p2m.base);
632
633 ASSERT(mfn_valid(mfn));
634 put_page(mfn_to_page(mfn));
635 }
636 }
637
638 /* Free lpae sub-tree behind an entry */
p2m_free_entry(struct p2m_domain * p2m,lpae_t entry,unsigned int level)639 static void p2m_free_entry(struct p2m_domain *p2m,
640 lpae_t entry, unsigned int level)
641 {
642 unsigned int i;
643 lpae_t *table;
644 mfn_t mfn;
645 struct page_info *pg;
646
647 /* Nothing to do if the entry is invalid. */
648 if ( !lpae_valid(entry) )
649 return;
650
651 /* Nothing to do but updating the stats if the entry is a super-page. */
652 if ( lpae_is_superpage(entry, level) )
653 {
654 p2m->stats.mappings[level]--;
655 return;
656 }
657
658 if ( level == 3 )
659 {
660 p2m->stats.mappings[level]--;
661 p2m_put_l3_page(entry);
662 return;
663 }
664
665 table = map_domain_page(_mfn(entry.p2m.base));
666 for ( i = 0; i < LPAE_ENTRIES; i++ )
667 p2m_free_entry(p2m, *(table + i), level + 1);
668
669 unmap_domain_page(table);
670
671 /*
672 * Make sure all the references in the TLB have been removed before
673 * freing the intermediate page table.
674 * XXX: Should we defer the free of the page table to avoid the
675 * flush?
676 */
677 if ( p2m->need_flush )
678 p2m_flush_tlb_sync(p2m);
679
680 mfn = _mfn(entry.p2m.base);
681 ASSERT(mfn_valid(mfn));
682
683 pg = mfn_to_page(mfn);
684
685 page_list_del(pg, &p2m->pages);
686 free_domheap_page(pg);
687 }
688
p2m_split_superpage(struct p2m_domain * p2m,lpae_t * entry,unsigned int level,unsigned int target,const unsigned int * offsets)689 static bool p2m_split_superpage(struct p2m_domain *p2m, lpae_t *entry,
690 unsigned int level, unsigned int target,
691 const unsigned int *offsets)
692 {
693 struct page_info *page;
694 unsigned int i;
695 lpae_t pte, *table;
696 bool rv = true;
697
698 /* Convenience aliases */
699 mfn_t mfn = _mfn(entry->p2m.base);
700 unsigned int next_level = level + 1;
701 unsigned int level_order = level_orders[next_level];
702
703 /*
704 * This should only be called with target != level and the entry is
705 * a superpage.
706 */
707 ASSERT(level < target);
708 ASSERT(lpae_is_superpage(*entry, level));
709
710 page = alloc_domheap_page(NULL, 0);
711 if ( !page )
712 return false;
713
714 page_list_add(page, &p2m->pages);
715 table = __map_domain_page(page);
716
717 /*
718 * We are either splitting a first level 1G page into 512 second level
719 * 2M pages, or a second level 2M page into 512 third level 4K pages.
720 */
721 for ( i = 0; i < LPAE_ENTRIES; i++ )
722 {
723 lpae_t *new_entry = table + i;
724
725 /*
726 * Use the content of the superpage entry and override
727 * the necessary fields. So the correct permission are kept.
728 */
729 pte = *entry;
730 pte.p2m.base = mfn_x(mfn_add(mfn, i << level_order));
731
732 /*
733 * First and second level pages set p2m.table = 0, but third
734 * level entries set p2m.table = 1.
735 */
736 pte.p2m.table = (next_level == 3);
737
738 write_pte(new_entry, pte);
739 }
740
741 /* Update stats */
742 p2m->stats.shattered[level]++;
743 p2m->stats.mappings[level]--;
744 p2m->stats.mappings[next_level] += LPAE_ENTRIES;
745
746 /*
747 * Shatter superpage in the page to the level we want to make the
748 * changes.
749 * This is done outside the loop to avoid checking the offset to
750 * know whether the entry should be shattered for every entry.
751 */
752 if ( next_level != target )
753 rv = p2m_split_superpage(p2m, table + offsets[next_level],
754 level + 1, target, offsets);
755
756 if ( p2m->clean_pte )
757 clean_dcache_va_range(table, PAGE_SIZE);
758
759 unmap_domain_page(table);
760
761 pte = mfn_to_p2m_entry(page_to_mfn(page), p2m_invalid,
762 p2m->default_access);
763
764 /*
765 * Even if we failed, we should install the newly allocated LPAE
766 * entry. The caller will be in charge to free the sub-tree.
767 */
768 p2m_write_pte(entry, pte, p2m->clean_pte);
769
770 return rv;
771 }
772
773 /*
774 * Insert an entry in the p2m. This should be called with a mapping
775 * equal to a page/superpage (4K, 2M, 1G).
776 */
__p2m_set_entry(struct p2m_domain * p2m,gfn_t sgfn,unsigned int page_order,mfn_t smfn,p2m_type_t t,p2m_access_t a)777 static int __p2m_set_entry(struct p2m_domain *p2m,
778 gfn_t sgfn,
779 unsigned int page_order,
780 mfn_t smfn,
781 p2m_type_t t,
782 p2m_access_t a)
783 {
784 paddr_t addr = gfn_to_gaddr(sgfn);
785 unsigned int level = 0;
786 unsigned int target = 3 - (page_order / LPAE_SHIFT);
787 lpae_t *entry, *table, orig_pte;
788 int rc;
789
790 /* Convenience aliases */
791 const unsigned int offsets[4] = {
792 zeroeth_table_offset(addr),
793 first_table_offset(addr),
794 second_table_offset(addr),
795 third_table_offset(addr)
796 };
797
798 ASSERT(p2m_is_write_locked(p2m));
799
800 /*
801 * Check if the level target is valid: we only support
802 * 4K - 2M - 1G mapping.
803 */
804 ASSERT(target > 0 && target <= 3);
805
806 table = p2m_get_root_pointer(p2m, sgfn);
807 if ( !table )
808 return -EINVAL;
809
810 for ( level = P2M_ROOT_LEVEL; level < target; level++ )
811 {
812 /*
813 * Don't try to allocate intermediate page table if the mapping
814 * is about to be removed (i.e mfn == INVALID_MFN).
815 */
816 rc = p2m_next_level(p2m, mfn_eq(smfn, INVALID_MFN),
817 &table, offsets[level]);
818 if ( rc == GUEST_TABLE_MAP_FAILED )
819 {
820 /*
821 * We are here because p2m_next_level has failed to map
822 * the intermediate page table (e.g the table does not exist
823 * and they p2m tree is read-only). It is a valid case
824 * when removing a mapping as it may not exist in the
825 * page table. In this case, just ignore it.
826 */
827 rc = mfn_eq(smfn, INVALID_MFN) ? 0 : -ENOENT;
828 goto out;
829 }
830 else if ( rc != GUEST_TABLE_NORMAL_PAGE )
831 break;
832 }
833
834 entry = table + offsets[level];
835
836 /*
837 * If we are here with level < target, we must be at a leaf node,
838 * and we need to break up the superpage.
839 */
840 if ( level < target )
841 {
842 /* We need to split the original page. */
843 lpae_t split_pte = *entry;
844
845 ASSERT(lpae_is_superpage(*entry, level));
846
847 if ( !p2m_split_superpage(p2m, &split_pte, level, target, offsets) )
848 {
849 /*
850 * The current super-page is still in-place, so re-increment
851 * the stats.
852 */
853 p2m->stats.mappings[level]++;
854
855 /* Free the allocated sub-tree */
856 p2m_free_entry(p2m, split_pte, level);
857
858 rc = -ENOMEM;
859 goto out;
860 }
861
862 /*
863 * Follow the break-before-sequence to update the entry.
864 * For more details see (D4.7.1 in ARM DDI 0487A.j).
865 */
866 p2m_remove_pte(entry, p2m->clean_pte);
867 p2m_flush_tlb_sync(p2m);
868
869 p2m_write_pte(entry, split_pte, p2m->clean_pte);
870
871 /* then move to the level we want to make real changes */
872 for ( ; level < target; level++ )
873 {
874 rc = p2m_next_level(p2m, true, &table, offsets[level]);
875
876 /*
877 * The entry should be found and either be a table
878 * or a superpage if level 3 is not targeted
879 */
880 ASSERT(rc == GUEST_TABLE_NORMAL_PAGE ||
881 (rc == GUEST_TABLE_SUPER_PAGE && target < 3));
882 }
883
884 entry = table + offsets[level];
885 }
886
887 /*
888 * We should always be there with the correct level because
889 * all the intermediate tables have been installed if necessary.
890 */
891 ASSERT(level == target);
892
893 orig_pte = *entry;
894
895 /*
896 * The radix-tree can only work on 4KB. This is only used when
897 * memaccess is enabled and during shutdown.
898 */
899 ASSERT(!p2m->mem_access_enabled || page_order == 0 ||
900 p2m->domain->is_dying);
901 /*
902 * The access type should always be p2m_access_rwx when the mapping
903 * is removed.
904 */
905 ASSERT(!mfn_eq(INVALID_MFN, smfn) || (a == p2m_access_rwx));
906 /*
907 * Update the mem access permission before update the P2M. So we
908 * don't have to revert the mapping if it has failed.
909 */
910 rc = p2m_mem_access_radix_set(p2m, sgfn, a);
911 if ( rc )
912 goto out;
913
914 /*
915 * Always remove the entry in order to follow the break-before-make
916 * sequence when updating the translation table (D4.7.1 in ARM DDI
917 * 0487A.j).
918 */
919 if ( lpae_valid(orig_pte) )
920 p2m_remove_pte(entry, p2m->clean_pte);
921
922 if ( mfn_eq(smfn, INVALID_MFN) )
923 /* Flush can be deferred if the entry is removed */
924 p2m->need_flush |= !!lpae_valid(orig_pte);
925 else
926 {
927 lpae_t pte = mfn_to_p2m_entry(smfn, t, a);
928
929 if ( level < 3 )
930 pte.p2m.table = 0; /* Superpage entry */
931
932 /*
933 * It is necessary to flush the TLB before writing the new entry
934 * to keep coherency when the previous entry was valid.
935 *
936 * Although, it could be defered when only the permissions are
937 * changed (e.g in case of memaccess).
938 */
939 if ( lpae_valid(orig_pte) )
940 {
941 if ( likely(!p2m->mem_access_enabled) ||
942 P2M_CLEAR_PERM(pte) != P2M_CLEAR_PERM(orig_pte) )
943 p2m_flush_tlb_sync(p2m);
944 else
945 p2m->need_flush = true;
946 }
947 else /* new mapping */
948 p2m->stats.mappings[level]++;
949
950 p2m_write_pte(entry, pte, p2m->clean_pte);
951
952 p2m->max_mapped_gfn = gfn_max(p2m->max_mapped_gfn,
953 gfn_add(sgfn, 1 << page_order));
954 p2m->lowest_mapped_gfn = gfn_min(p2m->lowest_mapped_gfn, sgfn);
955 }
956
957 /*
958 * Free the entry only if the original pte was valid and the base
959 * is different (to avoid freeing when permission is changed).
960 */
961 if ( lpae_valid(orig_pte) && entry->p2m.base != orig_pte.p2m.base )
962 p2m_free_entry(p2m, orig_pte, level);
963
964 if ( need_iommu(p2m->domain) &&
965 (lpae_valid(orig_pte) || lpae_valid(*entry)) )
966 rc = iommu_iotlb_flush(p2m->domain, gfn_x(sgfn), 1UL << page_order);
967 else
968 rc = 0;
969
970 out:
971 unmap_domain_page(table);
972
973 return rc;
974 }
975
p2m_set_entry(struct p2m_domain * p2m,gfn_t sgfn,unsigned long nr,mfn_t smfn,p2m_type_t t,p2m_access_t a)976 int p2m_set_entry(struct p2m_domain *p2m,
977 gfn_t sgfn,
978 unsigned long nr,
979 mfn_t smfn,
980 p2m_type_t t,
981 p2m_access_t a)
982 {
983 int rc = 0;
984
985 while ( nr )
986 {
987 unsigned long mask;
988 unsigned long order;
989
990 /*
991 * Don't take into account the MFN when removing mapping (i.e
992 * MFN_INVALID) to calculate the correct target order.
993 *
994 * XXX: Support superpage mappings if nr is not aligned to a
995 * superpage size.
996 */
997 mask = !mfn_eq(smfn, INVALID_MFN) ? mfn_x(smfn) : 0;
998 mask |= gfn_x(sgfn) | nr;
999
1000 /* Always map 4k by 4k when memaccess is enabled */
1001 if ( unlikely(p2m->mem_access_enabled) )
1002 order = THIRD_ORDER;
1003 else if ( !(mask & ((1UL << FIRST_ORDER) - 1)) )
1004 order = FIRST_ORDER;
1005 else if ( !(mask & ((1UL << SECOND_ORDER) - 1)) )
1006 order = SECOND_ORDER;
1007 else
1008 order = THIRD_ORDER;
1009
1010 rc = __p2m_set_entry(p2m, sgfn, order, smfn, t, a);
1011 if ( rc )
1012 break;
1013
1014 sgfn = gfn_add(sgfn, (1 << order));
1015 if ( !mfn_eq(smfn, INVALID_MFN) )
1016 smfn = mfn_add(smfn, (1 << order));
1017
1018 nr -= (1 << order);
1019 }
1020
1021 return rc;
1022 }
1023
p2m_insert_mapping(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn,p2m_type_t t)1024 static inline int p2m_insert_mapping(struct domain *d,
1025 gfn_t start_gfn,
1026 unsigned long nr,
1027 mfn_t mfn,
1028 p2m_type_t t)
1029 {
1030 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1031 int rc;
1032
1033 p2m_write_lock(p2m);
1034 rc = p2m_set_entry(p2m, start_gfn, nr, mfn, t, p2m->default_access);
1035 p2m_write_unlock(p2m);
1036
1037 return rc;
1038 }
1039
p2m_remove_mapping(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)1040 static inline int p2m_remove_mapping(struct domain *d,
1041 gfn_t start_gfn,
1042 unsigned long nr,
1043 mfn_t mfn)
1044 {
1045 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1046 int rc;
1047
1048 p2m_write_lock(p2m);
1049 rc = p2m_set_entry(p2m, start_gfn, nr, INVALID_MFN,
1050 p2m_invalid, p2m_access_rwx);
1051 p2m_write_unlock(p2m);
1052
1053 return rc;
1054 }
1055
map_regions_p2mt(struct domain * d,gfn_t gfn,unsigned long nr,mfn_t mfn,p2m_type_t p2mt)1056 int map_regions_p2mt(struct domain *d,
1057 gfn_t gfn,
1058 unsigned long nr,
1059 mfn_t mfn,
1060 p2m_type_t p2mt)
1061 {
1062 return p2m_insert_mapping(d, gfn, nr, mfn, p2mt);
1063 }
1064
unmap_regions_p2mt(struct domain * d,gfn_t gfn,unsigned long nr,mfn_t mfn)1065 int unmap_regions_p2mt(struct domain *d,
1066 gfn_t gfn,
1067 unsigned long nr,
1068 mfn_t mfn)
1069 {
1070 return p2m_remove_mapping(d, gfn, nr, mfn);
1071 }
1072
map_mmio_regions(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)1073 int map_mmio_regions(struct domain *d,
1074 gfn_t start_gfn,
1075 unsigned long nr,
1076 mfn_t mfn)
1077 {
1078 return p2m_insert_mapping(d, start_gfn, nr, mfn, p2m_mmio_direct_dev);
1079 }
1080
unmap_mmio_regions(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)1081 int unmap_mmio_regions(struct domain *d,
1082 gfn_t start_gfn,
1083 unsigned long nr,
1084 mfn_t mfn)
1085 {
1086 return p2m_remove_mapping(d, start_gfn, nr, mfn);
1087 }
1088
map_dev_mmio_region(struct domain * d,gfn_t gfn,unsigned long nr,mfn_t mfn)1089 int map_dev_mmio_region(struct domain *d,
1090 gfn_t gfn,
1091 unsigned long nr,
1092 mfn_t mfn)
1093 {
1094 int res;
1095
1096 if ( !(nr && iomem_access_permitted(d, mfn_x(mfn), mfn_x(mfn) + nr - 1)) )
1097 return 0;
1098
1099 res = p2m_insert_mapping(d, gfn, nr, mfn, p2m_mmio_direct_c);
1100 if ( res < 0 )
1101 {
1102 printk(XENLOG_G_ERR "Unable to map MFNs [%#"PRI_mfn" - %#"PRI_mfn" in Dom%d\n",
1103 mfn_x(mfn), mfn_x(mfn) + nr - 1, d->domain_id);
1104 return res;
1105 }
1106
1107 return 0;
1108 }
1109
guest_physmap_add_entry(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned long page_order,p2m_type_t t)1110 int guest_physmap_add_entry(struct domain *d,
1111 gfn_t gfn,
1112 mfn_t mfn,
1113 unsigned long page_order,
1114 p2m_type_t t)
1115 {
1116 return p2m_insert_mapping(d, gfn, (1 << page_order), mfn, t);
1117 }
1118
guest_physmap_remove_page(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int page_order)1119 int guest_physmap_remove_page(struct domain *d, gfn_t gfn, mfn_t mfn,
1120 unsigned int page_order)
1121 {
1122 return p2m_remove_mapping(d, gfn, (1 << page_order), mfn);
1123 }
1124
p2m_alloc_table(struct domain * d)1125 static int p2m_alloc_table(struct domain *d)
1126 {
1127 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1128 struct page_info *page;
1129 unsigned int i;
1130
1131 page = alloc_domheap_pages(NULL, P2M_ROOT_ORDER, 0);
1132 if ( page == NULL )
1133 return -ENOMEM;
1134
1135 /* Clear both first level pages */
1136 for ( i = 0; i < P2M_ROOT_PAGES; i++ )
1137 clear_and_clean_page(page + i);
1138
1139 p2m->root = page;
1140
1141 p2m->vttbr = page_to_maddr(p2m->root) | ((uint64_t)p2m->vmid << 48);
1142
1143 /*
1144 * Make sure that all TLBs corresponding to the new VMID are flushed
1145 * before using it
1146 */
1147 p2m_flush_tlb(p2m);
1148
1149 return 0;
1150 }
1151
1152
1153 static spinlock_t vmid_alloc_lock = SPIN_LOCK_UNLOCKED;
1154
1155 /*
1156 * VTTBR_EL2 VMID field is 8 or 16 bits. AArch64 may support 16-bit VMID.
1157 * Using a bitmap here limits us to 256 or 65536 (for AArch64) concurrent
1158 * domains. The bitmap space will be allocated dynamically based on
1159 * whether 8 or 16 bit VMIDs are supported.
1160 */
1161 static unsigned long *vmid_mask;
1162
p2m_vmid_allocator_init(void)1163 static void p2m_vmid_allocator_init(void)
1164 {
1165 /*
1166 * allocate space for vmid_mask based on MAX_VMID
1167 */
1168 vmid_mask = xzalloc_array(unsigned long, BITS_TO_LONGS(MAX_VMID));
1169
1170 if ( !vmid_mask )
1171 panic("Could not allocate VMID bitmap space");
1172
1173 set_bit(INVALID_VMID, vmid_mask);
1174 }
1175
p2m_alloc_vmid(struct domain * d)1176 static int p2m_alloc_vmid(struct domain *d)
1177 {
1178 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1179
1180 int rc, nr;
1181
1182 spin_lock(&vmid_alloc_lock);
1183
1184 nr = find_first_zero_bit(vmid_mask, MAX_VMID);
1185
1186 ASSERT(nr != INVALID_VMID);
1187
1188 if ( nr == MAX_VMID )
1189 {
1190 rc = -EBUSY;
1191 printk(XENLOG_ERR "p2m.c: dom%d: VMID pool exhausted\n", d->domain_id);
1192 goto out;
1193 }
1194
1195 set_bit(nr, vmid_mask);
1196
1197 p2m->vmid = nr;
1198
1199 rc = 0;
1200
1201 out:
1202 spin_unlock(&vmid_alloc_lock);
1203 return rc;
1204 }
1205
p2m_free_vmid(struct domain * d)1206 static void p2m_free_vmid(struct domain *d)
1207 {
1208 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1209 spin_lock(&vmid_alloc_lock);
1210 if ( p2m->vmid != INVALID_VMID )
1211 clear_bit(p2m->vmid, vmid_mask);
1212
1213 spin_unlock(&vmid_alloc_lock);
1214 }
1215
p2m_teardown(struct domain * d)1216 void p2m_teardown(struct domain *d)
1217 {
1218 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1219 struct page_info *pg;
1220
1221 /* p2m not actually initialized */
1222 if ( !p2m->domain )
1223 return;
1224
1225 while ( (pg = page_list_remove_head(&p2m->pages)) )
1226 free_domheap_page(pg);
1227
1228 if ( p2m->root )
1229 free_domheap_pages(p2m->root, P2M_ROOT_ORDER);
1230
1231 p2m->root = NULL;
1232
1233 p2m_free_vmid(d);
1234
1235 radix_tree_destroy(&p2m->mem_access_settings, NULL);
1236
1237 p2m->domain = NULL;
1238 }
1239
p2m_init(struct domain * d)1240 int p2m_init(struct domain *d)
1241 {
1242 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1243 int rc = 0;
1244 unsigned int cpu;
1245
1246 rwlock_init(&p2m->lock);
1247 INIT_PAGE_LIST_HEAD(&p2m->pages);
1248
1249 p2m->vmid = INVALID_VMID;
1250
1251 rc = p2m_alloc_vmid(d);
1252 if ( rc != 0 )
1253 return rc;
1254
1255 p2m->max_mapped_gfn = _gfn(0);
1256 p2m->lowest_mapped_gfn = _gfn(ULONG_MAX);
1257
1258 p2m->default_access = p2m_access_rwx;
1259 p2m->mem_access_enabled = false;
1260 radix_tree_init(&p2m->mem_access_settings);
1261
1262 /*
1263 * Some IOMMUs don't support coherent PT walk. When the p2m is
1264 * shared with the CPU, Xen has to make sure that the PT changes have
1265 * reached the memory
1266 */
1267 p2m->clean_pte = iommu_enabled &&
1268 !iommu_has_feature(d, IOMMU_FEAT_COHERENT_WALK);
1269
1270 rc = p2m_alloc_table(d);
1271
1272 /*
1273 * Make sure that the type chosen to is able to store the an vCPU ID
1274 * between 0 and the maximum of virtual CPUS supported as long as
1275 * the INVALID_VCPU_ID.
1276 */
1277 BUILD_BUG_ON((1 << (sizeof(p2m->last_vcpu_ran[0]) * 8)) < MAX_VIRT_CPUS);
1278 BUILD_BUG_ON((1 << (sizeof(p2m->last_vcpu_ran[0])* 8)) < INVALID_VCPU_ID);
1279
1280 for_each_possible_cpu(cpu)
1281 p2m->last_vcpu_ran[cpu] = INVALID_VCPU_ID;
1282
1283 /*
1284 * Besides getting a domain when we only have the p2m in hand,
1285 * the back pointer to domain is also used in p2m_teardown()
1286 * as an end-of-initialization indicator.
1287 */
1288 p2m->domain = d;
1289
1290 return rc;
1291 }
1292
1293 /*
1294 * The function will go through the p2m and remove page reference when it
1295 * is required. The mapping will be removed from the p2m.
1296 *
1297 * XXX: See whether the mapping can be left intact in the p2m.
1298 */
relinquish_p2m_mapping(struct domain * d)1299 int relinquish_p2m_mapping(struct domain *d)
1300 {
1301 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1302 unsigned long count = 0;
1303 p2m_type_t t;
1304 int rc = 0;
1305 unsigned int order;
1306 gfn_t start, end;
1307
1308 p2m_write_lock(p2m);
1309
1310 start = p2m->lowest_mapped_gfn;
1311 end = p2m->max_mapped_gfn;
1312
1313 for ( ; gfn_x(start) < gfn_x(end);
1314 start = gfn_next_boundary(start, order) )
1315 {
1316 mfn_t mfn = p2m_get_entry(p2m, start, &t, NULL, &order);
1317
1318 count++;
1319 /*
1320 * Arbitrarily preempt every 512 iterations.
1321 */
1322 if ( !(count % 512) && hypercall_preempt_check() )
1323 {
1324 rc = -ERESTART;
1325 break;
1326 }
1327
1328 /*
1329 * p2m_set_entry will take care of removing reference on page
1330 * when it is necessary and removing the mapping in the p2m.
1331 */
1332 if ( !mfn_eq(mfn, INVALID_MFN) )
1333 {
1334 /*
1335 * For valid mapping, the start will always be aligned as
1336 * entry will be removed whilst relinquishing.
1337 */
1338 rc = __p2m_set_entry(p2m, start, order, INVALID_MFN,
1339 p2m_invalid, p2m_access_rwx);
1340 if ( unlikely(rc) )
1341 {
1342 printk(XENLOG_G_ERR "Unable to remove mapping gfn=%#"PRI_gfn" order=%u from the p2m of domain %d\n", gfn_x(start), order, d->domain_id);
1343 break;
1344 }
1345 }
1346 }
1347
1348 /*
1349 * Update lowest_mapped_gfn so on the next call we still start where
1350 * we stopped.
1351 */
1352 p2m->lowest_mapped_gfn = start;
1353
1354 p2m_write_unlock(p2m);
1355
1356 return rc;
1357 }
1358
p2m_cache_flush(struct domain * d,gfn_t start,unsigned long nr)1359 int p2m_cache_flush(struct domain *d, gfn_t start, unsigned long nr)
1360 {
1361 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1362 gfn_t end = gfn_add(start, nr);
1363 gfn_t next_gfn;
1364 p2m_type_t t;
1365 unsigned int order;
1366
1367 /*
1368 * The operation cache flush will invalidate the RAM assigned to the
1369 * guest in a given range. It will not modify the page table and
1370 * flushing the cache whilst the page is used by another CPU is
1371 * fine. So using read-lock is fine here.
1372 */
1373 p2m_read_lock(p2m);
1374
1375 start = gfn_max(start, p2m->lowest_mapped_gfn);
1376 end = gfn_min(end, p2m->max_mapped_gfn);
1377
1378 for ( ; gfn_x(start) < gfn_x(end); start = next_gfn )
1379 {
1380 mfn_t mfn = p2m_get_entry(p2m, start, &t, NULL, &order);
1381
1382 next_gfn = gfn_next_boundary(start, order);
1383
1384 /* Skip hole and non-RAM page */
1385 if ( mfn_eq(mfn, INVALID_MFN) || !p2m_is_ram(t) )
1386 continue;
1387
1388 /* XXX: Implement preemption */
1389 while ( gfn_x(start) < gfn_x(next_gfn) )
1390 {
1391 flush_page_to_ram(mfn_x(mfn), false);
1392
1393 start = gfn_add(start, 1);
1394 mfn = mfn_add(mfn, 1);
1395 }
1396 }
1397
1398 invalidate_icache();
1399
1400 p2m_read_unlock(p2m);
1401
1402 return 0;
1403 }
1404
gfn_to_mfn(struct domain * d,gfn_t gfn)1405 mfn_t gfn_to_mfn(struct domain *d, gfn_t gfn)
1406 {
1407 return p2m_lookup(d, gfn, NULL);
1408 }
1409
get_page_from_gva(struct vcpu * v,vaddr_t va,unsigned long flags)1410 struct page_info *get_page_from_gva(struct vcpu *v, vaddr_t va,
1411 unsigned long flags)
1412 {
1413 struct domain *d = v->domain;
1414 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1415 struct page_info *page = NULL;
1416 paddr_t maddr = 0;
1417 uint64_t par;
1418
1419 /*
1420 * XXX: To support a different vCPU, we would need to load the
1421 * VTTBR_EL2, TTBR0_EL1, TTBR1_EL1 and SCTLR_EL1
1422 */
1423 if ( v != current )
1424 return NULL;
1425
1426 p2m_read_lock(p2m);
1427
1428 par = gvirt_to_maddr(va, &maddr, flags);
1429
1430 if ( par )
1431 {
1432 dprintk(XENLOG_G_DEBUG,
1433 "%pv: gvirt_to_maddr failed va=%#"PRIvaddr" flags=0x%lx par=%#"PRIx64"\n",
1434 v, va, flags, par);
1435 goto err;
1436 }
1437
1438 if ( !mfn_valid(maddr_to_mfn(maddr)) )
1439 {
1440 dprintk(XENLOG_G_DEBUG, "%pv: Invalid MFN %#"PRI_mfn"\n",
1441 v, mfn_x(maddr_to_mfn(maddr)));
1442 goto err;
1443 }
1444
1445 page = mfn_to_page(maddr_to_mfn(maddr));
1446 ASSERT(page);
1447
1448 if ( unlikely(!get_page(page, d)) )
1449 {
1450 dprintk(XENLOG_G_DEBUG, "%pv: Failing to acquire the MFN %#"PRI_mfn"\n",
1451 v, mfn_x(maddr_to_mfn(maddr)));
1452 page = NULL;
1453 }
1454
1455 err:
1456 if ( !page && p2m->mem_access_enabled )
1457 page = p2m_mem_access_check_and_get_page(va, flags, v);
1458
1459 p2m_read_unlock(p2m);
1460
1461 return page;
1462 }
1463
setup_virt_paging_one(void * data)1464 static void __init setup_virt_paging_one(void *data)
1465 {
1466 unsigned long val = (unsigned long)data;
1467 WRITE_SYSREG32(val, VTCR_EL2);
1468 isb();
1469 }
1470
setup_virt_paging(void)1471 void __init setup_virt_paging(void)
1472 {
1473 /* Setup Stage 2 address translation */
1474 unsigned long val = VTCR_RES1|VTCR_SH0_IS|VTCR_ORGN0_WBWA|VTCR_IRGN0_WBWA;
1475
1476 #ifdef CONFIG_ARM_32
1477 printk("P2M: 40-bit IPA\n");
1478 p2m_ipa_bits = 40;
1479 val |= VTCR_T0SZ(0x18); /* 40 bit IPA */
1480 val |= VTCR_SL0(0x1); /* P2M starts at first level */
1481 #else /* CONFIG_ARM_64 */
1482 const struct {
1483 unsigned int pabits; /* Physical Address Size */
1484 unsigned int t0sz; /* Desired T0SZ, minimum in comment */
1485 unsigned int root_order; /* Page order of the root of the p2m */
1486 unsigned int sl0; /* Desired SL0, maximum in comment */
1487 } pa_range_info[] = {
1488 /* T0SZ minimum and SL0 maximum from ARM DDI 0487A.b Table D4-5 */
1489 /* PA size, t0sz(min), root-order, sl0(max) */
1490 [0] = { 32, 32/*32*/, 0, 1 },
1491 [1] = { 36, 28/*28*/, 0, 1 },
1492 [2] = { 40, 24/*24*/, 1, 1 },
1493 [3] = { 42, 24/*22*/, 1, 1 },
1494 [4] = { 44, 20/*20*/, 0, 2 },
1495 [5] = { 48, 16/*16*/, 0, 2 },
1496 [6] = { 0 }, /* Invalid */
1497 [7] = { 0 } /* Invalid */
1498 };
1499
1500 unsigned int cpu;
1501 unsigned int pa_range = 0x10; /* Larger than any possible value */
1502 bool vmid_8_bit = false;
1503
1504 for_each_online_cpu ( cpu )
1505 {
1506 const struct cpuinfo_arm *info = &cpu_data[cpu];
1507 if ( info->mm64.pa_range < pa_range )
1508 pa_range = info->mm64.pa_range;
1509
1510 /* Set a flag if the current cpu does not support 16 bit VMIDs. */
1511 if ( info->mm64.vmid_bits != MM64_VMID_16_BITS_SUPPORT )
1512 vmid_8_bit = true;
1513 }
1514
1515 /*
1516 * If the flag is not set then it means all CPUs support 16-bit
1517 * VMIDs.
1518 */
1519 if ( !vmid_8_bit )
1520 max_vmid = MAX_VMID_16_BIT;
1521
1522 /* pa_range is 4 bits, but the defined encodings are only 3 bits */
1523 if ( pa_range >= ARRAY_SIZE(pa_range_info) || !pa_range_info[pa_range].pabits )
1524 panic("Unknown encoding of ID_AA64MMFR0_EL1.PARange %x\n", pa_range);
1525
1526 val |= VTCR_PS(pa_range);
1527 val |= VTCR_TG0_4K;
1528
1529 /* Set the VS bit only if 16 bit VMID is supported. */
1530 if ( MAX_VMID == MAX_VMID_16_BIT )
1531 val |= VTCR_VS;
1532 val |= VTCR_SL0(pa_range_info[pa_range].sl0);
1533 val |= VTCR_T0SZ(pa_range_info[pa_range].t0sz);
1534
1535 p2m_root_order = pa_range_info[pa_range].root_order;
1536 p2m_root_level = 2 - pa_range_info[pa_range].sl0;
1537 p2m_ipa_bits = 64 - pa_range_info[pa_range].t0sz;
1538
1539 printk("P2M: %d-bit IPA with %d-bit PA and %d-bit VMID\n",
1540 p2m_ipa_bits,
1541 pa_range_info[pa_range].pabits,
1542 ( MAX_VMID == MAX_VMID_16_BIT ) ? 16 : 8);
1543 #endif
1544 printk("P2M: %d levels with order-%d root, VTCR 0x%lx\n",
1545 4 - P2M_ROOT_LEVEL, P2M_ROOT_ORDER, val);
1546
1547 p2m_vmid_allocator_init();
1548
1549 /* It is not allowed to concatenate a level zero root */
1550 BUG_ON( P2M_ROOT_LEVEL == 0 && P2M_ROOT_ORDER > 0 );
1551 setup_virt_paging_one((void *)val);
1552 smp_call_function(setup_virt_paging_one, (void *)val, 1);
1553 }
1554
1555 /*
1556 * Local variables:
1557 * mode: C
1558 * c-file-style: "BSD"
1559 * c-basic-offset: 4
1560 * indent-tabs-mode: nil
1561 * End:
1562 */
1563