1 #include <xen/sched.h>
2 #include <xen/lib.h>
3 #include <xen/errno.h>
4 #include <xen/domain_page.h>
5 #include <xen/bitops.h>
6 #include <xen/vm_event.h>
7 #include <xen/monitor.h>
8 #include <xen/iocap.h>
9 #include <xen/mem_access.h>
10 #include <xen/xmalloc.h>
11 #include <public/vm_event.h>
12 #include <asm/flushtlb.h>
13 #include <asm/gic.h>
14 #include <asm/event.h>
15 #include <asm/hardirq.h>
16 #include <asm/page.h>
17 
18 #define MAX_VMID_8_BIT  (1UL << 8)
19 #define MAX_VMID_16_BIT (1UL << 16)
20 
21 #define INVALID_VMID 0 /* VMID 0 is reserved */
22 
23 #ifdef CONFIG_ARM_64
24 static unsigned int __read_mostly p2m_root_order;
25 static unsigned int __read_mostly p2m_root_level;
26 #define P2M_ROOT_ORDER    p2m_root_order
27 #define P2M_ROOT_LEVEL p2m_root_level
28 static unsigned int __read_mostly max_vmid = MAX_VMID_8_BIT;
29 /* VMID is by default 8 bit width on AArch64 */
30 #define MAX_VMID       max_vmid
31 #else
32 /* First level P2M is alway 2 consecutive pages */
33 #define P2M_ROOT_LEVEL 1
34 #define P2M_ROOT_ORDER    1
35 /* VMID is always 8 bit width on AArch32 */
36 #define MAX_VMID        MAX_VMID_8_BIT
37 #endif
38 
39 #define P2M_ROOT_PAGES    (1<<P2M_ROOT_ORDER)
40 
41 /* Override macros from asm/mm.h to make them work with mfn_t */
42 #undef mfn_to_page
43 #define mfn_to_page(mfn) __mfn_to_page(mfn_x(mfn))
44 #undef page_to_mfn
45 #define page_to_mfn(pg) _mfn(__page_to_mfn(pg))
46 
47 unsigned int __read_mostly p2m_ipa_bits;
48 
49 /* Helpers to lookup the properties of each level */
50 static const paddr_t level_masks[] =
51     { ZEROETH_MASK, FIRST_MASK, SECOND_MASK, THIRD_MASK };
52 static const uint8_t level_orders[] =
53     { ZEROETH_ORDER, FIRST_ORDER, SECOND_ORDER, THIRD_ORDER };
54 
55 static void p2m_flush_tlb(struct p2m_domain *p2m);
56 
57 /* Unlock the flush and do a P2M TLB flush if necessary */
p2m_write_unlock(struct p2m_domain * p2m)58 void p2m_write_unlock(struct p2m_domain *p2m)
59 {
60     if ( p2m->need_flush )
61     {
62         p2m->need_flush = false;
63         /*
64          * The final flush is done with the P2M write lock taken to
65          * to avoid someone else modify the P2M before the TLB
66          * invalidation has completed.
67          */
68         p2m_flush_tlb(p2m);
69     }
70 
71     write_unlock(&p2m->lock);
72 }
73 
p2m_dump_info(struct domain * d)74 void p2m_dump_info(struct domain *d)
75 {
76     struct p2m_domain *p2m = p2m_get_hostp2m(d);
77 
78     p2m_read_lock(p2m);
79     printk("p2m mappings for domain %d (vmid %d):\n",
80            d->domain_id, p2m->vmid);
81     BUG_ON(p2m->stats.mappings[0] || p2m->stats.shattered[0]);
82     printk("  1G mappings: %ld (shattered %ld)\n",
83            p2m->stats.mappings[1], p2m->stats.shattered[1]);
84     printk("  2M mappings: %ld (shattered %ld)\n",
85            p2m->stats.mappings[2], p2m->stats.shattered[2]);
86     printk("  4K mappings: %ld\n", p2m->stats.mappings[3]);
87     p2m_read_unlock(p2m);
88 }
89 
memory_type_changed(struct domain * d)90 void memory_type_changed(struct domain *d)
91 {
92 }
93 
dump_p2m_lookup(struct domain * d,paddr_t addr)94 void dump_p2m_lookup(struct domain *d, paddr_t addr)
95 {
96     struct p2m_domain *p2m = p2m_get_hostp2m(d);
97 
98     printk("dom%d IPA 0x%"PRIpaddr"\n", d->domain_id, addr);
99 
100     printk("P2M @ %p mfn:0x%lx\n",
101            p2m->root, __page_to_mfn(p2m->root));
102 
103     dump_pt_walk(page_to_maddr(p2m->root), addr,
104                  P2M_ROOT_LEVEL, P2M_ROOT_PAGES);
105 }
106 
p2m_save_state(struct vcpu * p)107 void p2m_save_state(struct vcpu *p)
108 {
109     p->arch.sctlr = READ_SYSREG(SCTLR_EL1);
110 }
111 
p2m_restore_state(struct vcpu * n)112 void p2m_restore_state(struct vcpu *n)
113 {
114     struct p2m_domain *p2m = p2m_get_hostp2m(n->domain);
115     uint8_t *last_vcpu_ran;
116 
117     if ( is_idle_vcpu(n) )
118         return;
119 
120     WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2);
121     isb();
122 
123     WRITE_SYSREG(n->arch.sctlr, SCTLR_EL1);
124     isb();
125 
126     WRITE_SYSREG(n->arch.hcr_el2, HCR_EL2);
127     isb();
128 
129     last_vcpu_ran = &p2m->last_vcpu_ran[smp_processor_id()];
130 
131     /*
132      * Flush local TLB for the domain to prevent wrong TLB translation
133      * when running multiple vCPU of the same domain on a single pCPU.
134      */
135     if ( *last_vcpu_ran != INVALID_VCPU_ID && *last_vcpu_ran != n->vcpu_id )
136         flush_tlb_local();
137 
138     *last_vcpu_ran = n->vcpu_id;
139 }
140 
p2m_flush_tlb(struct p2m_domain * p2m)141 static void p2m_flush_tlb(struct p2m_domain *p2m)
142 {
143     unsigned long flags = 0;
144     uint64_t ovttbr;
145 
146     /*
147      * ARM only provides an instruction to flush TLBs for the current
148      * VMID. So switch to the VTTBR of a given P2M if different.
149      */
150     ovttbr = READ_SYSREG64(VTTBR_EL2);
151     if ( ovttbr != p2m->vttbr )
152     {
153         local_irq_save(flags);
154         WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2);
155         isb();
156     }
157 
158     flush_tlb();
159 
160     if ( ovttbr != READ_SYSREG64(VTTBR_EL2) )
161     {
162         WRITE_SYSREG64(ovttbr, VTTBR_EL2);
163         isb();
164         local_irq_restore(flags);
165     }
166 }
167 
168 /*
169  * Force a synchronous P2M TLB flush.
170  *
171  * Must be called with the p2m lock held.
172  */
p2m_flush_tlb_sync(struct p2m_domain * p2m)173 static void p2m_flush_tlb_sync(struct p2m_domain *p2m)
174 {
175     ASSERT(p2m_is_write_locked(p2m));
176 
177     p2m_flush_tlb(p2m);
178     p2m->need_flush = false;
179 }
180 
181 /*
182  * Find and map the root page table. The caller is responsible for
183  * unmapping the table.
184  *
185  * The function will return NULL if the offset of the root table is
186  * invalid.
187  */
p2m_get_root_pointer(struct p2m_domain * p2m,gfn_t gfn)188 static lpae_t *p2m_get_root_pointer(struct p2m_domain *p2m,
189                                     gfn_t gfn)
190 {
191     unsigned int root_table;
192 
193     if ( P2M_ROOT_PAGES == 1 )
194         return __map_domain_page(p2m->root);
195 
196     /*
197      * Concatenated root-level tables. The table number will be the
198      * offset at the previous level. It is not possible to
199      * concatenate a level-0 root.
200      */
201     ASSERT(P2M_ROOT_LEVEL > 0);
202 
203     root_table = gfn_x(gfn) >> (level_orders[P2M_ROOT_LEVEL - 1]);
204     root_table &= LPAE_ENTRY_MASK;
205 
206     if ( root_table >= P2M_ROOT_PAGES )
207         return NULL;
208 
209     return __map_domain_page(p2m->root + root_table);
210 }
211 
212 /*
213  * Lookup the MFN corresponding to a domain's GFN.
214  * Lookup mem access in the ratrix tree.
215  * The entries associated to the GFN is considered valid.
216  */
p2m_mem_access_radix_get(struct p2m_domain * p2m,gfn_t gfn)217 static p2m_access_t p2m_mem_access_radix_get(struct p2m_domain *p2m, gfn_t gfn)
218 {
219     void *ptr;
220 
221     if ( !p2m->mem_access_enabled )
222         return p2m->default_access;
223 
224     ptr = radix_tree_lookup(&p2m->mem_access_settings, gfn_x(gfn));
225     if ( !ptr )
226         return p2m_access_rwx;
227     else
228         return radix_tree_ptr_to_int(ptr);
229 }
230 
231 #define GUEST_TABLE_MAP_FAILED 0
232 #define GUEST_TABLE_SUPER_PAGE 1
233 #define GUEST_TABLE_NORMAL_PAGE 2
234 
235 static int p2m_create_table(struct p2m_domain *p2m, lpae_t *entry);
236 
237 /*
238  * Take the currently mapped table, find the corresponding GFN entry,
239  * and map the next table, if available. The previous table will be
240  * unmapped if the next level was mapped (e.g GUEST_TABLE_NORMAL_PAGE
241  * returned).
242  *
243  * The read_only parameters indicates whether intermediate tables should
244  * be allocated when not present.
245  *
246  * Return values:
247  *  GUEST_TABLE_MAP_FAILED: Either read_only was set and the entry
248  *  was empty, or allocating a new page failed.
249  *  GUEST_TABLE_NORMAL_PAGE: next level mapped normally
250  *  GUEST_TABLE_SUPER_PAGE: The next entry points to a superpage.
251  */
p2m_next_level(struct p2m_domain * p2m,bool read_only,lpae_t ** table,unsigned int offset)252 static int p2m_next_level(struct p2m_domain *p2m, bool read_only,
253                           lpae_t **table, unsigned int offset)
254 {
255     lpae_t *entry;
256     int ret;
257     mfn_t mfn;
258 
259     entry = *table + offset;
260 
261     if ( !lpae_valid(*entry) )
262     {
263         if ( read_only )
264             return GUEST_TABLE_MAP_FAILED;
265 
266         ret = p2m_create_table(p2m, entry);
267         if ( ret )
268             return GUEST_TABLE_MAP_FAILED;
269     }
270 
271     /* The function p2m_next_level is never called at the 3rd level */
272     if ( lpae_mapping(*entry) )
273         return GUEST_TABLE_SUPER_PAGE;
274 
275     mfn = _mfn(entry->p2m.base);
276 
277     unmap_domain_page(*table);
278     *table = map_domain_page(mfn);
279 
280     return GUEST_TABLE_NORMAL_PAGE;
281 }
282 
283 /*
284  * Get the details of a given gfn.
285  *
286  * If the entry is present, the associated MFN will be returned and the
287  * access and type filled up. The page_order will correspond to the
288  * order of the mapping in the page table (i.e it could be a superpage).
289  *
290  * If the entry is not present, INVALID_MFN will be returned and the
291  * page_order will be set according to the order of the invalid range.
292  */
p2m_get_entry(struct p2m_domain * p2m,gfn_t gfn,p2m_type_t * t,p2m_access_t * a,unsigned int * page_order)293 mfn_t p2m_get_entry(struct p2m_domain *p2m, gfn_t gfn,
294                     p2m_type_t *t, p2m_access_t *a,
295                     unsigned int *page_order)
296 {
297     paddr_t addr = gfn_to_gaddr(gfn);
298     unsigned int level = 0;
299     lpae_t entry, *table;
300     int rc;
301     mfn_t mfn = INVALID_MFN;
302     p2m_type_t _t;
303 
304     /* Convenience aliases */
305     const unsigned int offsets[4] = {
306         zeroeth_table_offset(addr),
307         first_table_offset(addr),
308         second_table_offset(addr),
309         third_table_offset(addr)
310     };
311 
312     ASSERT(p2m_is_locked(p2m));
313     BUILD_BUG_ON(THIRD_MASK != PAGE_MASK);
314 
315     /* Allow t to be NULL */
316     t = t ?: &_t;
317 
318     *t = p2m_invalid;
319 
320     /* XXX: Check if the mapping is lower than the mapped gfn */
321 
322     /* This gfn is higher than the highest the p2m map currently holds */
323     if ( gfn_x(gfn) > gfn_x(p2m->max_mapped_gfn) )
324     {
325         for ( level = P2M_ROOT_LEVEL; level < 3; level++ )
326             if ( (gfn_x(gfn) & (level_masks[level] >> PAGE_SHIFT)) >
327                  gfn_x(p2m->max_mapped_gfn) )
328                 break;
329 
330         goto out;
331     }
332 
333     table = p2m_get_root_pointer(p2m, gfn);
334 
335     /*
336      * the table should always be non-NULL because the gfn is below
337      * p2m->max_mapped_gfn and the root table pages are always present.
338      */
339     BUG_ON(table == NULL);
340 
341     for ( level = P2M_ROOT_LEVEL; level < 3; level++ )
342     {
343         rc = p2m_next_level(p2m, true, &table, offsets[level]);
344         if ( rc == GUEST_TABLE_MAP_FAILED )
345             goto out_unmap;
346         else if ( rc != GUEST_TABLE_NORMAL_PAGE )
347             break;
348     }
349 
350     entry = table[offsets[level]];
351 
352     if ( lpae_valid(entry) )
353     {
354         *t = entry.p2m.type;
355 
356         if ( a )
357             *a = p2m_mem_access_radix_get(p2m, gfn);
358 
359         mfn = _mfn(entry.p2m.base);
360         /*
361          * The entry may point to a superpage. Find the MFN associated
362          * to the GFN.
363          */
364         mfn = mfn_add(mfn, gfn_x(gfn) & ((1UL << level_orders[level]) - 1));
365     }
366 
367 out_unmap:
368     unmap_domain_page(table);
369 
370 out:
371     if ( page_order )
372         *page_order = level_orders[level];
373 
374     return mfn;
375 }
376 
p2m_lookup(struct domain * d,gfn_t gfn,p2m_type_t * t)377 mfn_t p2m_lookup(struct domain *d, gfn_t gfn, p2m_type_t *t)
378 {
379     mfn_t ret;
380     struct p2m_domain *p2m = p2m_get_hostp2m(d);
381 
382     p2m_read_lock(p2m);
383     ret = p2m_get_entry(p2m, gfn, t, NULL, NULL);
384     p2m_read_unlock(p2m);
385 
386     return ret;
387 }
388 
guest_physmap_mark_populate_on_demand(struct domain * d,unsigned long gfn,unsigned int order)389 int guest_physmap_mark_populate_on_demand(struct domain *d,
390                                           unsigned long gfn,
391                                           unsigned int order)
392 {
393     return -ENOSYS;
394 }
395 
p2m_pod_decrease_reservation(struct domain * d,gfn_t gfn,unsigned int order)396 int p2m_pod_decrease_reservation(struct domain *d, gfn_t gfn,
397                                  unsigned int order)
398 {
399     return -ENOSYS;
400 }
401 
p2m_set_permission(lpae_t * e,p2m_type_t t,p2m_access_t a)402 static void p2m_set_permission(lpae_t *e, p2m_type_t t, p2m_access_t a)
403 {
404     /* First apply type permissions */
405     switch ( t )
406     {
407     case p2m_ram_rw:
408         e->p2m.xn = 0;
409         e->p2m.write = 1;
410         break;
411 
412     case p2m_ram_ro:
413         e->p2m.xn = 0;
414         e->p2m.write = 0;
415         break;
416 
417     case p2m_iommu_map_rw:
418     case p2m_map_foreign:
419     case p2m_grant_map_rw:
420     case p2m_mmio_direct_dev:
421     case p2m_mmio_direct_nc:
422     case p2m_mmio_direct_c:
423         e->p2m.xn = 1;
424         e->p2m.write = 1;
425         break;
426 
427     case p2m_iommu_map_ro:
428     case p2m_grant_map_ro:
429     case p2m_invalid:
430         e->p2m.xn = 1;
431         e->p2m.write = 0;
432         break;
433 
434     case p2m_max_real_type:
435         BUG();
436         break;
437     }
438 
439     /* Then restrict with access permissions */
440     switch ( a )
441     {
442     case p2m_access_rwx:
443         break;
444     case p2m_access_wx:
445         e->p2m.read = 0;
446         break;
447     case p2m_access_rw:
448         e->p2m.xn = 1;
449         break;
450     case p2m_access_w:
451         e->p2m.read = 0;
452         e->p2m.xn = 1;
453         break;
454     case p2m_access_rx:
455     case p2m_access_rx2rw:
456         e->p2m.write = 0;
457         break;
458     case p2m_access_x:
459         e->p2m.write = 0;
460         e->p2m.read = 0;
461         break;
462     case p2m_access_r:
463         e->p2m.write = 0;
464         e->p2m.xn = 1;
465         break;
466     case p2m_access_n:
467     case p2m_access_n2rwx:
468         e->p2m.read = e->p2m.write = 0;
469         e->p2m.xn = 1;
470         break;
471     }
472 }
473 
mfn_to_p2m_entry(mfn_t mfn,p2m_type_t t,p2m_access_t a)474 static lpae_t mfn_to_p2m_entry(mfn_t mfn, p2m_type_t t, p2m_access_t a)
475 {
476     /*
477      * sh, xn and write bit will be defined in the following switches
478      * based on mattr and t.
479      */
480     lpae_t e = (lpae_t) {
481         .p2m.af = 1,
482         .p2m.read = 1,
483         .p2m.table = 1,
484         .p2m.valid = 1,
485         .p2m.type = t,
486     };
487 
488     BUILD_BUG_ON(p2m_max_real_type > (1 << 4));
489 
490     switch ( t )
491     {
492     case p2m_mmio_direct_dev:
493         e.p2m.mattr = MATTR_DEV;
494         e.p2m.sh = LPAE_SH_OUTER;
495         break;
496 
497     case p2m_mmio_direct_c:
498         e.p2m.mattr = MATTR_MEM;
499         e.p2m.sh = LPAE_SH_OUTER;
500         break;
501 
502     /*
503      * ARM ARM: Overlaying the shareability attribute (DDI
504      * 0406C.b B3-1376 to 1377)
505      *
506      * A memory region with a resultant memory type attribute of Normal,
507      * and a resultant cacheability attribute of Inner Non-cacheable,
508      * Outer Non-cacheable, must have a resultant shareability attribute
509      * of Outer Shareable, otherwise shareability is UNPREDICTABLE.
510      *
511      * On ARMv8 shareability is ignored and explicitly treated as Outer
512      * Shareable for Normal Inner Non_cacheable, Outer Non-cacheable.
513      * See the note for table D4-40, in page 1788 of the ARM DDI 0487A.j.
514      */
515     case p2m_mmio_direct_nc:
516         e.p2m.mattr = MATTR_MEM_NC;
517         e.p2m.sh = LPAE_SH_OUTER;
518         break;
519 
520     default:
521         e.p2m.mattr = MATTR_MEM;
522         e.p2m.sh = LPAE_SH_INNER;
523     }
524 
525     p2m_set_permission(&e, t, a);
526 
527     ASSERT(!(mfn_to_maddr(mfn) & ~PADDR_MASK));
528 
529     e.p2m.base = mfn_x(mfn);
530 
531     return e;
532 }
533 
p2m_write_pte(lpae_t * p,lpae_t pte,bool clean_pte)534 static inline void p2m_write_pte(lpae_t *p, lpae_t pte, bool clean_pte)
535 {
536     write_pte(p, pte);
537     if ( clean_pte )
538         clean_dcache(*p);
539 }
540 
p2m_remove_pte(lpae_t * p,bool clean_pte)541 static inline void p2m_remove_pte(lpae_t *p, bool clean_pte)
542 {
543     lpae_t pte;
544 
545     memset(&pte, 0x00, sizeof(pte));
546     p2m_write_pte(p, pte, clean_pte);
547 }
548 
549 /* Allocate a new page table page and hook it in via the given entry. */
p2m_create_table(struct p2m_domain * p2m,lpae_t * entry)550 static int p2m_create_table(struct p2m_domain *p2m, lpae_t *entry)
551 {
552     struct page_info *page;
553     lpae_t *p;
554     lpae_t pte;
555 
556     ASSERT(!lpae_valid(*entry));
557 
558     page = alloc_domheap_page(NULL, 0);
559     if ( page == NULL )
560         return -ENOMEM;
561 
562     page_list_add(page, &p2m->pages);
563 
564     p = __map_domain_page(page);
565     clear_page(p);
566 
567     if ( p2m->clean_pte )
568         clean_dcache_va_range(p, PAGE_SIZE);
569 
570     unmap_domain_page(p);
571 
572     /*
573      * The access value does not matter because the hardware will ignore
574      * the permission fields for table entry.
575      */
576     pte = mfn_to_p2m_entry(page_to_mfn(page), p2m_invalid,
577                            p2m->default_access);
578 
579     p2m_write_pte(entry, pte, p2m->clean_pte);
580 
581     return 0;
582 }
583 
p2m_mem_access_radix_set(struct p2m_domain * p2m,gfn_t gfn,p2m_access_t a)584 static int p2m_mem_access_radix_set(struct p2m_domain *p2m, gfn_t gfn,
585                                     p2m_access_t a)
586 {
587     int rc;
588 
589     if ( !p2m->mem_access_enabled )
590         return 0;
591 
592     if ( p2m_access_rwx == a )
593     {
594         radix_tree_delete(&p2m->mem_access_settings, gfn_x(gfn));
595         return 0;
596     }
597 
598     rc = radix_tree_insert(&p2m->mem_access_settings, gfn_x(gfn),
599                            radix_tree_int_to_ptr(a));
600     if ( rc == -EEXIST )
601     {
602         /* If a setting already exists, change it to the new one */
603         radix_tree_replace_slot(
604             radix_tree_lookup_slot(
605                 &p2m->mem_access_settings, gfn_x(gfn)),
606             radix_tree_int_to_ptr(a));
607         rc = 0;
608     }
609 
610     return rc;
611 }
612 
613 /*
614  * Put any references on the single 4K page referenced by pte.
615  * TODO: Handle superpages, for now we only take special references for leaf
616  * pages (specifically foreign ones, which can't be super mapped today).
617  */
p2m_put_l3_page(const lpae_t pte)618 static void p2m_put_l3_page(const lpae_t pte)
619 {
620     ASSERT(lpae_valid(pte));
621 
622     /*
623      * TODO: Handle other p2m types
624      *
625      * It's safe to do the put_page here because page_alloc will
626      * flush the TLBs if the page is reallocated before the end of
627      * this loop.
628      */
629     if ( p2m_is_foreign(pte.p2m.type) )
630     {
631         mfn_t mfn = _mfn(pte.p2m.base);
632 
633         ASSERT(mfn_valid(mfn));
634         put_page(mfn_to_page(mfn));
635     }
636 }
637 
638 /* Free lpae sub-tree behind an entry */
p2m_free_entry(struct p2m_domain * p2m,lpae_t entry,unsigned int level)639 static void p2m_free_entry(struct p2m_domain *p2m,
640                            lpae_t entry, unsigned int level)
641 {
642     unsigned int i;
643     lpae_t *table;
644     mfn_t mfn;
645     struct page_info *pg;
646 
647     /* Nothing to do if the entry is invalid. */
648     if ( !lpae_valid(entry) )
649         return;
650 
651     /* Nothing to do but updating the stats if the entry is a super-page. */
652     if ( lpae_is_superpage(entry, level) )
653     {
654         p2m->stats.mappings[level]--;
655         return;
656     }
657 
658     if ( level == 3 )
659     {
660         p2m->stats.mappings[level]--;
661         p2m_put_l3_page(entry);
662         return;
663     }
664 
665     table = map_domain_page(_mfn(entry.p2m.base));
666     for ( i = 0; i < LPAE_ENTRIES; i++ )
667         p2m_free_entry(p2m, *(table + i), level + 1);
668 
669     unmap_domain_page(table);
670 
671     /*
672      * Make sure all the references in the TLB have been removed before
673      * freing the intermediate page table.
674      * XXX: Should we defer the free of the page table to avoid the
675      * flush?
676      */
677     if ( p2m->need_flush )
678         p2m_flush_tlb_sync(p2m);
679 
680     mfn = _mfn(entry.p2m.base);
681     ASSERT(mfn_valid(mfn));
682 
683     pg = mfn_to_page(mfn);
684 
685     page_list_del(pg, &p2m->pages);
686     free_domheap_page(pg);
687 }
688 
p2m_split_superpage(struct p2m_domain * p2m,lpae_t * entry,unsigned int level,unsigned int target,const unsigned int * offsets)689 static bool p2m_split_superpage(struct p2m_domain *p2m, lpae_t *entry,
690                                 unsigned int level, unsigned int target,
691                                 const unsigned int *offsets)
692 {
693     struct page_info *page;
694     unsigned int i;
695     lpae_t pte, *table;
696     bool rv = true;
697 
698     /* Convenience aliases */
699     mfn_t mfn = _mfn(entry->p2m.base);
700     unsigned int next_level = level + 1;
701     unsigned int level_order = level_orders[next_level];
702 
703     /*
704      * This should only be called with target != level and the entry is
705      * a superpage.
706      */
707     ASSERT(level < target);
708     ASSERT(lpae_is_superpage(*entry, level));
709 
710     page = alloc_domheap_page(NULL, 0);
711     if ( !page )
712         return false;
713 
714     page_list_add(page, &p2m->pages);
715     table = __map_domain_page(page);
716 
717     /*
718      * We are either splitting a first level 1G page into 512 second level
719      * 2M pages, or a second level 2M page into 512 third level 4K pages.
720      */
721     for ( i = 0; i < LPAE_ENTRIES; i++ )
722     {
723         lpae_t *new_entry = table + i;
724 
725         /*
726          * Use the content of the superpage entry and override
727          * the necessary fields. So the correct permission are kept.
728          */
729         pte = *entry;
730         pte.p2m.base = mfn_x(mfn_add(mfn, i << level_order));
731 
732         /*
733          * First and second level pages set p2m.table = 0, but third
734          * level entries set p2m.table = 1.
735          */
736         pte.p2m.table = (next_level == 3);
737 
738         write_pte(new_entry, pte);
739     }
740 
741     /* Update stats */
742     p2m->stats.shattered[level]++;
743     p2m->stats.mappings[level]--;
744     p2m->stats.mappings[next_level] += LPAE_ENTRIES;
745 
746     /*
747      * Shatter superpage in the page to the level we want to make the
748      * changes.
749      * This is done outside the loop to avoid checking the offset to
750      * know whether the entry should be shattered for every entry.
751      */
752     if ( next_level != target )
753         rv = p2m_split_superpage(p2m, table + offsets[next_level],
754                                  level + 1, target, offsets);
755 
756     if ( p2m->clean_pte )
757         clean_dcache_va_range(table, PAGE_SIZE);
758 
759     unmap_domain_page(table);
760 
761     pte = mfn_to_p2m_entry(page_to_mfn(page), p2m_invalid,
762                            p2m->default_access);
763 
764     /*
765      * Even if we failed, we should install the newly allocated LPAE
766      * entry. The caller will be in charge to free the sub-tree.
767      */
768     p2m_write_pte(entry, pte, p2m->clean_pte);
769 
770     return rv;
771 }
772 
773 /*
774  * Insert an entry in the p2m. This should be called with a mapping
775  * equal to a page/superpage (4K, 2M, 1G).
776  */
__p2m_set_entry(struct p2m_domain * p2m,gfn_t sgfn,unsigned int page_order,mfn_t smfn,p2m_type_t t,p2m_access_t a)777 static int __p2m_set_entry(struct p2m_domain *p2m,
778                            gfn_t sgfn,
779                            unsigned int page_order,
780                            mfn_t smfn,
781                            p2m_type_t t,
782                            p2m_access_t a)
783 {
784     paddr_t addr = gfn_to_gaddr(sgfn);
785     unsigned int level = 0;
786     unsigned int target = 3 - (page_order / LPAE_SHIFT);
787     lpae_t *entry, *table, orig_pte;
788     int rc;
789 
790     /* Convenience aliases */
791     const unsigned int offsets[4] = {
792         zeroeth_table_offset(addr),
793         first_table_offset(addr),
794         second_table_offset(addr),
795         third_table_offset(addr)
796     };
797 
798     ASSERT(p2m_is_write_locked(p2m));
799 
800     /*
801      * Check if the level target is valid: we only support
802      * 4K - 2M - 1G mapping.
803      */
804     ASSERT(target > 0 && target <= 3);
805 
806     table = p2m_get_root_pointer(p2m, sgfn);
807     if ( !table )
808         return -EINVAL;
809 
810     for ( level = P2M_ROOT_LEVEL; level < target; level++ )
811     {
812         /*
813          * Don't try to allocate intermediate page table if the mapping
814          * is about to be removed (i.e mfn == INVALID_MFN).
815          */
816         rc = p2m_next_level(p2m, mfn_eq(smfn, INVALID_MFN),
817                             &table, offsets[level]);
818         if ( rc == GUEST_TABLE_MAP_FAILED )
819         {
820             /*
821              * We are here because p2m_next_level has failed to map
822              * the intermediate page table (e.g the table does not exist
823              * and they p2m tree is read-only). It is a valid case
824              * when removing a mapping as it may not exist in the
825              * page table. In this case, just ignore it.
826              */
827             rc = mfn_eq(smfn, INVALID_MFN) ? 0 : -ENOENT;
828             goto out;
829         }
830         else if ( rc != GUEST_TABLE_NORMAL_PAGE )
831             break;
832     }
833 
834     entry = table + offsets[level];
835 
836     /*
837      * If we are here with level < target, we must be at a leaf node,
838      * and we need to break up the superpage.
839      */
840     if ( level < target )
841     {
842         /* We need to split the original page. */
843         lpae_t split_pte = *entry;
844 
845         ASSERT(lpae_is_superpage(*entry, level));
846 
847         if ( !p2m_split_superpage(p2m, &split_pte, level, target, offsets) )
848         {
849             /*
850              * The current super-page is still in-place, so re-increment
851              * the stats.
852              */
853             p2m->stats.mappings[level]++;
854 
855             /* Free the allocated sub-tree */
856             p2m_free_entry(p2m, split_pte, level);
857 
858             rc = -ENOMEM;
859             goto out;
860         }
861 
862         /*
863          * Follow the break-before-sequence to update the entry.
864          * For more details see (D4.7.1 in ARM DDI 0487A.j).
865          */
866         p2m_remove_pte(entry, p2m->clean_pte);
867         p2m_flush_tlb_sync(p2m);
868 
869         p2m_write_pte(entry, split_pte, p2m->clean_pte);
870 
871         /* then move to the level we want to make real changes */
872         for ( ; level < target; level++ )
873         {
874             rc = p2m_next_level(p2m, true, &table, offsets[level]);
875 
876             /*
877              * The entry should be found and either be a table
878              * or a superpage if level 3 is not targeted
879              */
880             ASSERT(rc == GUEST_TABLE_NORMAL_PAGE ||
881                    (rc == GUEST_TABLE_SUPER_PAGE && target < 3));
882         }
883 
884         entry = table + offsets[level];
885     }
886 
887     /*
888      * We should always be there with the correct level because
889      * all the intermediate tables have been installed if necessary.
890      */
891     ASSERT(level == target);
892 
893     orig_pte = *entry;
894 
895     /*
896      * The radix-tree can only work on 4KB. This is only used when
897      * memaccess is enabled and during shutdown.
898      */
899     ASSERT(!p2m->mem_access_enabled || page_order == 0 ||
900            p2m->domain->is_dying);
901     /*
902      * The access type should always be p2m_access_rwx when the mapping
903      * is removed.
904      */
905     ASSERT(!mfn_eq(INVALID_MFN, smfn) || (a == p2m_access_rwx));
906     /*
907      * Update the mem access permission before update the P2M. So we
908      * don't have to revert the mapping if it has failed.
909      */
910     rc = p2m_mem_access_radix_set(p2m, sgfn, a);
911     if ( rc )
912         goto out;
913 
914     /*
915      * Always remove the entry in order to follow the break-before-make
916      * sequence when updating the translation table (D4.7.1 in ARM DDI
917      * 0487A.j).
918      */
919     if ( lpae_valid(orig_pte) )
920         p2m_remove_pte(entry, p2m->clean_pte);
921 
922     if ( mfn_eq(smfn, INVALID_MFN) )
923         /* Flush can be deferred if the entry is removed */
924         p2m->need_flush |= !!lpae_valid(orig_pte);
925     else
926     {
927         lpae_t pte = mfn_to_p2m_entry(smfn, t, a);
928 
929         if ( level < 3 )
930             pte.p2m.table = 0; /* Superpage entry */
931 
932         /*
933          * It is necessary to flush the TLB before writing the new entry
934          * to keep coherency when the previous entry was valid.
935          *
936          * Although, it could be defered when only the permissions are
937          * changed (e.g in case of memaccess).
938          */
939         if ( lpae_valid(orig_pte) )
940         {
941             if ( likely(!p2m->mem_access_enabled) ||
942                  P2M_CLEAR_PERM(pte) != P2M_CLEAR_PERM(orig_pte) )
943                 p2m_flush_tlb_sync(p2m);
944             else
945                 p2m->need_flush = true;
946         }
947         else /* new mapping */
948             p2m->stats.mappings[level]++;
949 
950         p2m_write_pte(entry, pte, p2m->clean_pte);
951 
952         p2m->max_mapped_gfn = gfn_max(p2m->max_mapped_gfn,
953                                       gfn_add(sgfn, 1 << page_order));
954         p2m->lowest_mapped_gfn = gfn_min(p2m->lowest_mapped_gfn, sgfn);
955     }
956 
957     /*
958      * Free the entry only if the original pte was valid and the base
959      * is different (to avoid freeing when permission is changed).
960      */
961     if ( lpae_valid(orig_pte) && entry->p2m.base != orig_pte.p2m.base )
962         p2m_free_entry(p2m, orig_pte, level);
963 
964     if ( need_iommu(p2m->domain) &&
965          (lpae_valid(orig_pte) || lpae_valid(*entry)) )
966         rc = iommu_iotlb_flush(p2m->domain, gfn_x(sgfn), 1UL << page_order);
967     else
968         rc = 0;
969 
970 out:
971     unmap_domain_page(table);
972 
973     return rc;
974 }
975 
p2m_set_entry(struct p2m_domain * p2m,gfn_t sgfn,unsigned long nr,mfn_t smfn,p2m_type_t t,p2m_access_t a)976 int p2m_set_entry(struct p2m_domain *p2m,
977                   gfn_t sgfn,
978                   unsigned long nr,
979                   mfn_t smfn,
980                   p2m_type_t t,
981                   p2m_access_t a)
982 {
983     int rc = 0;
984 
985     while ( nr )
986     {
987         unsigned long mask;
988         unsigned long order;
989 
990         /*
991          * Don't take into account the MFN when removing mapping (i.e
992          * MFN_INVALID) to calculate the correct target order.
993          *
994          * XXX: Support superpage mappings if nr is not aligned to a
995          * superpage size.
996          */
997         mask = !mfn_eq(smfn, INVALID_MFN) ? mfn_x(smfn) : 0;
998         mask |= gfn_x(sgfn) | nr;
999 
1000         /* Always map 4k by 4k when memaccess is enabled */
1001         if ( unlikely(p2m->mem_access_enabled) )
1002             order = THIRD_ORDER;
1003         else if ( !(mask & ((1UL << FIRST_ORDER) - 1)) )
1004             order = FIRST_ORDER;
1005         else if ( !(mask & ((1UL << SECOND_ORDER) - 1)) )
1006             order = SECOND_ORDER;
1007         else
1008             order = THIRD_ORDER;
1009 
1010         rc = __p2m_set_entry(p2m, sgfn, order, smfn, t, a);
1011         if ( rc )
1012             break;
1013 
1014         sgfn = gfn_add(sgfn, (1 << order));
1015         if ( !mfn_eq(smfn, INVALID_MFN) )
1016            smfn = mfn_add(smfn, (1 << order));
1017 
1018         nr -= (1 << order);
1019     }
1020 
1021     return rc;
1022 }
1023 
p2m_insert_mapping(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn,p2m_type_t t)1024 static inline int p2m_insert_mapping(struct domain *d,
1025                                      gfn_t start_gfn,
1026                                      unsigned long nr,
1027                                      mfn_t mfn,
1028                                      p2m_type_t t)
1029 {
1030     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1031     int rc;
1032 
1033     p2m_write_lock(p2m);
1034     rc = p2m_set_entry(p2m, start_gfn, nr, mfn, t, p2m->default_access);
1035     p2m_write_unlock(p2m);
1036 
1037     return rc;
1038 }
1039 
p2m_remove_mapping(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)1040 static inline int p2m_remove_mapping(struct domain *d,
1041                                      gfn_t start_gfn,
1042                                      unsigned long nr,
1043                                      mfn_t mfn)
1044 {
1045     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1046     int rc;
1047 
1048     p2m_write_lock(p2m);
1049     rc = p2m_set_entry(p2m, start_gfn, nr, INVALID_MFN,
1050                        p2m_invalid, p2m_access_rwx);
1051     p2m_write_unlock(p2m);
1052 
1053     return rc;
1054 }
1055 
map_regions_p2mt(struct domain * d,gfn_t gfn,unsigned long nr,mfn_t mfn,p2m_type_t p2mt)1056 int map_regions_p2mt(struct domain *d,
1057                      gfn_t gfn,
1058                      unsigned long nr,
1059                      mfn_t mfn,
1060                      p2m_type_t p2mt)
1061 {
1062     return p2m_insert_mapping(d, gfn, nr, mfn, p2mt);
1063 }
1064 
unmap_regions_p2mt(struct domain * d,gfn_t gfn,unsigned long nr,mfn_t mfn)1065 int unmap_regions_p2mt(struct domain *d,
1066                        gfn_t gfn,
1067                        unsigned long nr,
1068                        mfn_t mfn)
1069 {
1070     return p2m_remove_mapping(d, gfn, nr, mfn);
1071 }
1072 
map_mmio_regions(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)1073 int map_mmio_regions(struct domain *d,
1074                      gfn_t start_gfn,
1075                      unsigned long nr,
1076                      mfn_t mfn)
1077 {
1078     return p2m_insert_mapping(d, start_gfn, nr, mfn, p2m_mmio_direct_dev);
1079 }
1080 
unmap_mmio_regions(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)1081 int unmap_mmio_regions(struct domain *d,
1082                        gfn_t start_gfn,
1083                        unsigned long nr,
1084                        mfn_t mfn)
1085 {
1086     return p2m_remove_mapping(d, start_gfn, nr, mfn);
1087 }
1088 
map_dev_mmio_region(struct domain * d,gfn_t gfn,unsigned long nr,mfn_t mfn)1089 int map_dev_mmio_region(struct domain *d,
1090                         gfn_t gfn,
1091                         unsigned long nr,
1092                         mfn_t mfn)
1093 {
1094     int res;
1095 
1096     if ( !(nr && iomem_access_permitted(d, mfn_x(mfn), mfn_x(mfn) + nr - 1)) )
1097         return 0;
1098 
1099     res = p2m_insert_mapping(d, gfn, nr, mfn, p2m_mmio_direct_c);
1100     if ( res < 0 )
1101     {
1102         printk(XENLOG_G_ERR "Unable to map MFNs [%#"PRI_mfn" - %#"PRI_mfn" in Dom%d\n",
1103                mfn_x(mfn), mfn_x(mfn) + nr - 1, d->domain_id);
1104         return res;
1105     }
1106 
1107     return 0;
1108 }
1109 
guest_physmap_add_entry(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned long page_order,p2m_type_t t)1110 int guest_physmap_add_entry(struct domain *d,
1111                             gfn_t gfn,
1112                             mfn_t mfn,
1113                             unsigned long page_order,
1114                             p2m_type_t t)
1115 {
1116     return p2m_insert_mapping(d, gfn, (1 << page_order), mfn, t);
1117 }
1118 
guest_physmap_remove_page(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int page_order)1119 int guest_physmap_remove_page(struct domain *d, gfn_t gfn, mfn_t mfn,
1120                               unsigned int page_order)
1121 {
1122     return p2m_remove_mapping(d, gfn, (1 << page_order), mfn);
1123 }
1124 
p2m_alloc_table(struct domain * d)1125 static int p2m_alloc_table(struct domain *d)
1126 {
1127     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1128     struct page_info *page;
1129     unsigned int i;
1130 
1131     page = alloc_domheap_pages(NULL, P2M_ROOT_ORDER, 0);
1132     if ( page == NULL )
1133         return -ENOMEM;
1134 
1135     /* Clear both first level pages */
1136     for ( i = 0; i < P2M_ROOT_PAGES; i++ )
1137         clear_and_clean_page(page + i);
1138 
1139     p2m->root = page;
1140 
1141     p2m->vttbr = page_to_maddr(p2m->root) | ((uint64_t)p2m->vmid << 48);
1142 
1143     /*
1144      * Make sure that all TLBs corresponding to the new VMID are flushed
1145      * before using it
1146      */
1147     p2m_flush_tlb(p2m);
1148 
1149     return 0;
1150 }
1151 
1152 
1153 static spinlock_t vmid_alloc_lock = SPIN_LOCK_UNLOCKED;
1154 
1155 /*
1156  * VTTBR_EL2 VMID field is 8 or 16 bits. AArch64 may support 16-bit VMID.
1157  * Using a bitmap here limits us to 256 or 65536 (for AArch64) concurrent
1158  * domains. The bitmap space will be allocated dynamically based on
1159  * whether 8 or 16 bit VMIDs are supported.
1160  */
1161 static unsigned long *vmid_mask;
1162 
p2m_vmid_allocator_init(void)1163 static void p2m_vmid_allocator_init(void)
1164 {
1165     /*
1166      * allocate space for vmid_mask based on MAX_VMID
1167      */
1168     vmid_mask = xzalloc_array(unsigned long, BITS_TO_LONGS(MAX_VMID));
1169 
1170     if ( !vmid_mask )
1171         panic("Could not allocate VMID bitmap space");
1172 
1173     set_bit(INVALID_VMID, vmid_mask);
1174 }
1175 
p2m_alloc_vmid(struct domain * d)1176 static int p2m_alloc_vmid(struct domain *d)
1177 {
1178     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1179 
1180     int rc, nr;
1181 
1182     spin_lock(&vmid_alloc_lock);
1183 
1184     nr = find_first_zero_bit(vmid_mask, MAX_VMID);
1185 
1186     ASSERT(nr != INVALID_VMID);
1187 
1188     if ( nr == MAX_VMID )
1189     {
1190         rc = -EBUSY;
1191         printk(XENLOG_ERR "p2m.c: dom%d: VMID pool exhausted\n", d->domain_id);
1192         goto out;
1193     }
1194 
1195     set_bit(nr, vmid_mask);
1196 
1197     p2m->vmid = nr;
1198 
1199     rc = 0;
1200 
1201 out:
1202     spin_unlock(&vmid_alloc_lock);
1203     return rc;
1204 }
1205 
p2m_free_vmid(struct domain * d)1206 static void p2m_free_vmid(struct domain *d)
1207 {
1208     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1209     spin_lock(&vmid_alloc_lock);
1210     if ( p2m->vmid != INVALID_VMID )
1211         clear_bit(p2m->vmid, vmid_mask);
1212 
1213     spin_unlock(&vmid_alloc_lock);
1214 }
1215 
p2m_teardown(struct domain * d)1216 void p2m_teardown(struct domain *d)
1217 {
1218     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1219     struct page_info *pg;
1220 
1221     /* p2m not actually initialized */
1222     if ( !p2m->domain )
1223         return;
1224 
1225     while ( (pg = page_list_remove_head(&p2m->pages)) )
1226         free_domheap_page(pg);
1227 
1228     if ( p2m->root )
1229         free_domheap_pages(p2m->root, P2M_ROOT_ORDER);
1230 
1231     p2m->root = NULL;
1232 
1233     p2m_free_vmid(d);
1234 
1235     radix_tree_destroy(&p2m->mem_access_settings, NULL);
1236 
1237     p2m->domain = NULL;
1238 }
1239 
p2m_init(struct domain * d)1240 int p2m_init(struct domain *d)
1241 {
1242     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1243     int rc = 0;
1244     unsigned int cpu;
1245 
1246     rwlock_init(&p2m->lock);
1247     INIT_PAGE_LIST_HEAD(&p2m->pages);
1248 
1249     p2m->vmid = INVALID_VMID;
1250 
1251     rc = p2m_alloc_vmid(d);
1252     if ( rc != 0 )
1253         return rc;
1254 
1255     p2m->max_mapped_gfn = _gfn(0);
1256     p2m->lowest_mapped_gfn = _gfn(ULONG_MAX);
1257 
1258     p2m->default_access = p2m_access_rwx;
1259     p2m->mem_access_enabled = false;
1260     radix_tree_init(&p2m->mem_access_settings);
1261 
1262     /*
1263      * Some IOMMUs don't support coherent PT walk. When the p2m is
1264      * shared with the CPU, Xen has to make sure that the PT changes have
1265      * reached the memory
1266      */
1267     p2m->clean_pte = iommu_enabled &&
1268         !iommu_has_feature(d, IOMMU_FEAT_COHERENT_WALK);
1269 
1270     rc = p2m_alloc_table(d);
1271 
1272     /*
1273      * Make sure that the type chosen to is able to store the an vCPU ID
1274      * between 0 and the maximum of virtual CPUS supported as long as
1275      * the INVALID_VCPU_ID.
1276      */
1277     BUILD_BUG_ON((1 << (sizeof(p2m->last_vcpu_ran[0]) * 8)) < MAX_VIRT_CPUS);
1278     BUILD_BUG_ON((1 << (sizeof(p2m->last_vcpu_ran[0])* 8)) < INVALID_VCPU_ID);
1279 
1280     for_each_possible_cpu(cpu)
1281        p2m->last_vcpu_ran[cpu] = INVALID_VCPU_ID;
1282 
1283     /*
1284      * Besides getting a domain when we only have the p2m in hand,
1285      * the back pointer to domain is also used in p2m_teardown()
1286      * as an end-of-initialization indicator.
1287      */
1288     p2m->domain = d;
1289 
1290     return rc;
1291 }
1292 
1293 /*
1294  * The function will go through the p2m and remove page reference when it
1295  * is required. The mapping will be removed from the p2m.
1296  *
1297  * XXX: See whether the mapping can be left intact in the p2m.
1298  */
relinquish_p2m_mapping(struct domain * d)1299 int relinquish_p2m_mapping(struct domain *d)
1300 {
1301     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1302     unsigned long count = 0;
1303     p2m_type_t t;
1304     int rc = 0;
1305     unsigned int order;
1306     gfn_t start, end;
1307 
1308     p2m_write_lock(p2m);
1309 
1310     start = p2m->lowest_mapped_gfn;
1311     end = p2m->max_mapped_gfn;
1312 
1313     for ( ; gfn_x(start) < gfn_x(end);
1314           start = gfn_next_boundary(start, order) )
1315     {
1316         mfn_t mfn = p2m_get_entry(p2m, start, &t, NULL, &order);
1317 
1318         count++;
1319         /*
1320          * Arbitrarily preempt every 512 iterations.
1321          */
1322         if ( !(count % 512) && hypercall_preempt_check() )
1323         {
1324             rc = -ERESTART;
1325             break;
1326         }
1327 
1328         /*
1329          * p2m_set_entry will take care of removing reference on page
1330          * when it is necessary and removing the mapping in the p2m.
1331          */
1332         if ( !mfn_eq(mfn, INVALID_MFN) )
1333         {
1334             /*
1335              * For valid mapping, the start will always be aligned as
1336              * entry will be removed whilst relinquishing.
1337              */
1338             rc = __p2m_set_entry(p2m, start, order, INVALID_MFN,
1339                                  p2m_invalid, p2m_access_rwx);
1340             if ( unlikely(rc) )
1341             {
1342                 printk(XENLOG_G_ERR "Unable to remove mapping gfn=%#"PRI_gfn" order=%u from the p2m of domain %d\n", gfn_x(start), order, d->domain_id);
1343                 break;
1344             }
1345         }
1346     }
1347 
1348     /*
1349      * Update lowest_mapped_gfn so on the next call we still start where
1350      * we stopped.
1351      */
1352     p2m->lowest_mapped_gfn = start;
1353 
1354     p2m_write_unlock(p2m);
1355 
1356     return rc;
1357 }
1358 
p2m_cache_flush(struct domain * d,gfn_t start,unsigned long nr)1359 int p2m_cache_flush(struct domain *d, gfn_t start, unsigned long nr)
1360 {
1361     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1362     gfn_t end = gfn_add(start, nr);
1363     gfn_t next_gfn;
1364     p2m_type_t t;
1365     unsigned int order;
1366 
1367     /*
1368      * The operation cache flush will invalidate the RAM assigned to the
1369      * guest in a given range. It will not modify the page table and
1370      * flushing the cache whilst the page is used by another CPU is
1371      * fine. So using read-lock is fine here.
1372      */
1373     p2m_read_lock(p2m);
1374 
1375     start = gfn_max(start, p2m->lowest_mapped_gfn);
1376     end = gfn_min(end, p2m->max_mapped_gfn);
1377 
1378     for ( ; gfn_x(start) < gfn_x(end); start = next_gfn )
1379     {
1380         mfn_t mfn = p2m_get_entry(p2m, start, &t, NULL, &order);
1381 
1382         next_gfn = gfn_next_boundary(start, order);
1383 
1384         /* Skip hole and non-RAM page */
1385         if ( mfn_eq(mfn, INVALID_MFN) || !p2m_is_ram(t) )
1386             continue;
1387 
1388         /* XXX: Implement preemption */
1389         while ( gfn_x(start) < gfn_x(next_gfn) )
1390         {
1391             flush_page_to_ram(mfn_x(mfn), false);
1392 
1393             start = gfn_add(start, 1);
1394             mfn = mfn_add(mfn, 1);
1395         }
1396     }
1397 
1398     invalidate_icache();
1399 
1400     p2m_read_unlock(p2m);
1401 
1402     return 0;
1403 }
1404 
gfn_to_mfn(struct domain * d,gfn_t gfn)1405 mfn_t gfn_to_mfn(struct domain *d, gfn_t gfn)
1406 {
1407     return p2m_lookup(d, gfn, NULL);
1408 }
1409 
get_page_from_gva(struct vcpu * v,vaddr_t va,unsigned long flags)1410 struct page_info *get_page_from_gva(struct vcpu *v, vaddr_t va,
1411                                     unsigned long flags)
1412 {
1413     struct domain *d = v->domain;
1414     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1415     struct page_info *page = NULL;
1416     paddr_t maddr = 0;
1417     uint64_t par;
1418 
1419     /*
1420      * XXX: To support a different vCPU, we would need to load the
1421      * VTTBR_EL2, TTBR0_EL1, TTBR1_EL1 and SCTLR_EL1
1422      */
1423     if ( v != current )
1424         return NULL;
1425 
1426     p2m_read_lock(p2m);
1427 
1428     par = gvirt_to_maddr(va, &maddr, flags);
1429 
1430     if ( par )
1431     {
1432         dprintk(XENLOG_G_DEBUG,
1433                 "%pv: gvirt_to_maddr failed va=%#"PRIvaddr" flags=0x%lx par=%#"PRIx64"\n",
1434                 v, va, flags, par);
1435         goto err;
1436     }
1437 
1438     if ( !mfn_valid(maddr_to_mfn(maddr)) )
1439     {
1440         dprintk(XENLOG_G_DEBUG, "%pv: Invalid MFN %#"PRI_mfn"\n",
1441                 v, mfn_x(maddr_to_mfn(maddr)));
1442         goto err;
1443     }
1444 
1445     page = mfn_to_page(maddr_to_mfn(maddr));
1446     ASSERT(page);
1447 
1448     if ( unlikely(!get_page(page, d)) )
1449     {
1450         dprintk(XENLOG_G_DEBUG, "%pv: Failing to acquire the MFN %#"PRI_mfn"\n",
1451                 v, mfn_x(maddr_to_mfn(maddr)));
1452         page = NULL;
1453     }
1454 
1455 err:
1456     if ( !page && p2m->mem_access_enabled )
1457         page = p2m_mem_access_check_and_get_page(va, flags, v);
1458 
1459     p2m_read_unlock(p2m);
1460 
1461     return page;
1462 }
1463 
setup_virt_paging_one(void * data)1464 static void __init setup_virt_paging_one(void *data)
1465 {
1466     unsigned long val = (unsigned long)data;
1467     WRITE_SYSREG32(val, VTCR_EL2);
1468     isb();
1469 }
1470 
setup_virt_paging(void)1471 void __init setup_virt_paging(void)
1472 {
1473     /* Setup Stage 2 address translation */
1474     unsigned long val = VTCR_RES1|VTCR_SH0_IS|VTCR_ORGN0_WBWA|VTCR_IRGN0_WBWA;
1475 
1476 #ifdef CONFIG_ARM_32
1477     printk("P2M: 40-bit IPA\n");
1478     p2m_ipa_bits = 40;
1479     val |= VTCR_T0SZ(0x18); /* 40 bit IPA */
1480     val |= VTCR_SL0(0x1); /* P2M starts at first level */
1481 #else /* CONFIG_ARM_64 */
1482     const struct {
1483         unsigned int pabits; /* Physical Address Size */
1484         unsigned int t0sz;   /* Desired T0SZ, minimum in comment */
1485         unsigned int root_order; /* Page order of the root of the p2m */
1486         unsigned int sl0;    /* Desired SL0, maximum in comment */
1487     } pa_range_info[] = {
1488         /* T0SZ minimum and SL0 maximum from ARM DDI 0487A.b Table D4-5 */
1489         /*      PA size, t0sz(min), root-order, sl0(max) */
1490         [0] = { 32,      32/*32*/,  0,          1 },
1491         [1] = { 36,      28/*28*/,  0,          1 },
1492         [2] = { 40,      24/*24*/,  1,          1 },
1493         [3] = { 42,      24/*22*/,  1,          1 },
1494         [4] = { 44,      20/*20*/,  0,          2 },
1495         [5] = { 48,      16/*16*/,  0,          2 },
1496         [6] = { 0 }, /* Invalid */
1497         [7] = { 0 }  /* Invalid */
1498     };
1499 
1500     unsigned int cpu;
1501     unsigned int pa_range = 0x10; /* Larger than any possible value */
1502     bool vmid_8_bit = false;
1503 
1504     for_each_online_cpu ( cpu )
1505     {
1506         const struct cpuinfo_arm *info = &cpu_data[cpu];
1507         if ( info->mm64.pa_range < pa_range )
1508             pa_range = info->mm64.pa_range;
1509 
1510         /* Set a flag if the current cpu does not support 16 bit VMIDs. */
1511         if ( info->mm64.vmid_bits != MM64_VMID_16_BITS_SUPPORT )
1512             vmid_8_bit = true;
1513     }
1514 
1515     /*
1516      * If the flag is not set then it means all CPUs support 16-bit
1517      * VMIDs.
1518      */
1519     if ( !vmid_8_bit )
1520         max_vmid = MAX_VMID_16_BIT;
1521 
1522     /* pa_range is 4 bits, but the defined encodings are only 3 bits */
1523     if ( pa_range >= ARRAY_SIZE(pa_range_info) || !pa_range_info[pa_range].pabits )
1524         panic("Unknown encoding of ID_AA64MMFR0_EL1.PARange %x\n", pa_range);
1525 
1526     val |= VTCR_PS(pa_range);
1527     val |= VTCR_TG0_4K;
1528 
1529     /* Set the VS bit only if 16 bit VMID is supported. */
1530     if ( MAX_VMID == MAX_VMID_16_BIT )
1531         val |= VTCR_VS;
1532     val |= VTCR_SL0(pa_range_info[pa_range].sl0);
1533     val |= VTCR_T0SZ(pa_range_info[pa_range].t0sz);
1534 
1535     p2m_root_order = pa_range_info[pa_range].root_order;
1536     p2m_root_level = 2 - pa_range_info[pa_range].sl0;
1537     p2m_ipa_bits = 64 - pa_range_info[pa_range].t0sz;
1538 
1539     printk("P2M: %d-bit IPA with %d-bit PA and %d-bit VMID\n",
1540            p2m_ipa_bits,
1541            pa_range_info[pa_range].pabits,
1542            ( MAX_VMID == MAX_VMID_16_BIT ) ? 16 : 8);
1543 #endif
1544     printk("P2M: %d levels with order-%d root, VTCR 0x%lx\n",
1545            4 - P2M_ROOT_LEVEL, P2M_ROOT_ORDER, val);
1546 
1547     p2m_vmid_allocator_init();
1548 
1549     /* It is not allowed to concatenate a level zero root */
1550     BUG_ON( P2M_ROOT_LEVEL == 0 && P2M_ROOT_ORDER > 0 );
1551     setup_virt_paging_one((void *)val);
1552     smp_call_function(setup_virt_paging_one, (void *)val, 1);
1553 }
1554 
1555 /*
1556  * Local variables:
1557  * mode: C
1558  * c-file-style: "BSD"
1559  * c-basic-offset: 4
1560  * indent-tabs-mode: nil
1561  * End:
1562  */
1563