1 /*
2 * xen/arch/arm/mm.c
3 *
4 * MMU code for an ARMv7-A with virt extensions.
5 *
6 * Tim Deegan <tim@xen.org>
7 * Copyright (c) 2011 Citrix Systems.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 */
19
20 #include <xen/compile.h>
21 #include <xen/types.h>
22 #include <xen/device_tree.h>
23 #include <xen/init.h>
24 #include <xen/mm.h>
25 #include <xen/preempt.h>
26 #include <xen/errno.h>
27 #include <xen/grant_table.h>
28 #include <xen/softirq.h>
29 #include <xen/event.h>
30 #include <xen/guest_access.h>
31 #include <xen/domain_page.h>
32 #include <xen/err.h>
33 #include <asm/page.h>
34 #include <asm/current.h>
35 #include <asm/flushtlb.h>
36 #include <public/memory.h>
37 #include <xen/sched.h>
38 #include <xen/vmap.h>
39 #include <xsm/xsm.h>
40 #include <xen/pfn.h>
41 #include <xen/sizes.h>
42 #include <xen/libfdt/libfdt.h>
43 #include <asm/setup.h>
44
45 struct domain *dom_xen, *dom_io, *dom_cow;
46
47 /* Override macros from asm/page.h to make them work with mfn_t */
48 #undef virt_to_mfn
49 #define virt_to_mfn(va) _mfn(__virt_to_mfn(va))
50 #undef mfn_to_virt
51 #define mfn_to_virt(mfn) __mfn_to_virt(mfn_x(mfn))
52
53 /* Static start-of-day pagetables that we use before the allocators
54 * are up. These are used by all CPUs during bringup before switching
55 * to the CPUs own pagetables.
56 *
57 * These pagetables have a very simple structure. They include:
58 * - 2MB worth of 4K mappings of xen at XEN_VIRT_START, boot_first and
59 * boot_second are used to populate the tables down to boot_third
60 * which contains the actual mapping.
61 * - a 1:1 mapping of xen at its current physical address. This uses a
62 * section mapping at whichever of boot_{pgtable,first,second}
63 * covers that physical address.
64 *
65 * For the boot CPU these mappings point to the address where Xen was
66 * loaded by the bootloader. For secondary CPUs they point to the
67 * relocated copy of Xen for the benefit of secondary CPUs.
68 *
69 * In addition to the above for the boot CPU the device-tree is
70 * initially mapped in the boot misc slot. This mapping is not present
71 * for secondary CPUs.
72 *
73 * Finally, if EARLY_PRINTK is enabled then xen_fixmap will be mapped
74 * by the CPU once it has moved off the 1:1 mapping.
75 */
76 lpae_t boot_pgtable[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
77 #ifdef CONFIG_ARM_64
78 lpae_t boot_first[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
79 lpae_t boot_first_id[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
80 #endif
81 lpae_t boot_second[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
82 lpae_t boot_third[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
83
84 /* Main runtime page tables */
85
86 /*
87 * For arm32 xen_pgtable and xen_dommap are per-PCPU and are allocated before
88 * bringing up each CPU. For arm64 xen_pgtable is common to all PCPUs.
89 *
90 * xen_second, xen_fixmap and xen_xenmap are always shared between all
91 * PCPUs.
92 */
93
94 #ifdef CONFIG_ARM_64
95 #define HYP_PT_ROOT_LEVEL 0
96 lpae_t xen_pgtable[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
97 lpae_t xen_first[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
98 #define THIS_CPU_PGTABLE xen_pgtable
99 #else
100 #define HYP_PT_ROOT_LEVEL 1
101 /* Per-CPU pagetable pages */
102 /* xen_pgtable == root of the trie (zeroeth level on 64-bit, first on 32-bit) */
103 static DEFINE_PER_CPU(lpae_t *, xen_pgtable);
104 #define THIS_CPU_PGTABLE this_cpu(xen_pgtable)
105 /* xen_dommap == pages used by map_domain_page, these pages contain
106 * the second level pagetables which map the domheap region
107 * DOMHEAP_VIRT_START...DOMHEAP_VIRT_END in 2MB chunks. */
108 static DEFINE_PER_CPU(lpae_t *, xen_dommap);
109 /* Root of the trie for cpu0, other CPU's PTs are dynamically allocated */
110 lpae_t cpu0_pgtable[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
111 /* cpu0's domheap page tables */
112 lpae_t cpu0_dommap[LPAE_ENTRIES*DOMHEAP_SECOND_PAGES]
113 __attribute__((__aligned__(4096*DOMHEAP_SECOND_PAGES)));
114 #endif
115
116 #ifdef CONFIG_ARM_64
117 /* The first page of the first level mapping of the xenheap. The
118 * subsequent xenheap first level pages are dynamically allocated, but
119 * we need this one to bootstrap ourselves. */
120 lpae_t xenheap_first_first[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
121 /* The zeroeth level slot which uses xenheap_first_first. Used because
122 * setup_xenheap_mappings otherwise relies on mfn_to_virt which isn't
123 * valid for a non-xenheap mapping. */
124 static __initdata int xenheap_first_first_slot = -1;
125 #endif
126
127 /* Common pagetable leaves */
128 /* Second level page tables.
129 *
130 * The second-level table is 2 contiguous pages long, and covers all
131 * addresses from 0 to 0x7fffffff. Offsets into it are calculated
132 * with second_linear_offset(), not second_table_offset().
133 */
134 lpae_t xen_second[LPAE_ENTRIES*2] __attribute__((__aligned__(4096*2)));
135 /* First level page table used for fixmap */
136 lpae_t xen_fixmap[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
137 /* First level page table used to map Xen itself with the XN bit set
138 * as appropriate. */
139 static lpae_t xen_xenmap[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
140
141 /* Non-boot CPUs use this to find the correct pagetables. */
142 uint64_t init_ttbr;
143
144 static paddr_t phys_offset;
145
146 /* Limits of the Xen heap */
147 mfn_t xenheap_mfn_start __read_mostly = INVALID_MFN_INITIALIZER;
148 mfn_t xenheap_mfn_end __read_mostly;
149 vaddr_t xenheap_virt_end __read_mostly;
150 #ifdef CONFIG_ARM_64
151 vaddr_t xenheap_virt_start __read_mostly;
152 #endif
153
154 unsigned long frametable_base_pdx __read_mostly;
155 unsigned long frametable_virt_end __read_mostly;
156
157 unsigned long max_page;
158 unsigned long total_pages;
159
160 extern char __init_begin[], __init_end[];
161
162 /* Checking VA memory layout alignment. */
check_memory_layout_alignment_constraints(void)163 static inline void check_memory_layout_alignment_constraints(void) {
164 /* 2MB aligned regions */
165 BUILD_BUG_ON(XEN_VIRT_START & ~SECOND_MASK);
166 BUILD_BUG_ON(FIXMAP_ADDR(0) & ~SECOND_MASK);
167 BUILD_BUG_ON(BOOT_RELOC_VIRT_START & ~SECOND_MASK);
168 /* 1GB aligned regions */
169 #ifdef CONFIG_ARM_32
170 BUILD_BUG_ON(XENHEAP_VIRT_START & ~FIRST_MASK);
171 #else
172 BUILD_BUG_ON(DIRECTMAP_VIRT_START & ~FIRST_MASK);
173 #endif
174 /* Page table structure constraints */
175 #ifdef CONFIG_ARM_64
176 BUILD_BUG_ON(zeroeth_table_offset(XEN_VIRT_START));
177 #endif
178 BUILD_BUG_ON(first_table_offset(XEN_VIRT_START));
179 BUILD_BUG_ON(second_linear_offset(XEN_VIRT_START) >= LPAE_ENTRIES);
180 #ifdef CONFIG_DOMAIN_PAGE
181 BUILD_BUG_ON(DOMHEAP_VIRT_START & ~FIRST_MASK);
182 #endif
183 }
184
dump_pt_walk(paddr_t ttbr,paddr_t addr,unsigned int root_level,unsigned int nr_root_tables)185 void dump_pt_walk(paddr_t ttbr, paddr_t addr,
186 unsigned int root_level,
187 unsigned int nr_root_tables)
188 {
189 static const char *level_strs[4] = { "0TH", "1ST", "2ND", "3RD" };
190 const mfn_t root_mfn = maddr_to_mfn(ttbr);
191 const unsigned int offsets[4] = {
192 zeroeth_table_offset(addr),
193 first_table_offset(addr),
194 second_table_offset(addr),
195 third_table_offset(addr)
196 };
197 lpae_t pte, *mapping;
198 unsigned int level, root_table;
199
200 #ifdef CONFIG_ARM_32
201 BUG_ON(root_level < 1);
202 #endif
203 BUG_ON(root_level > 3);
204
205 if ( nr_root_tables > 1 )
206 {
207 /*
208 * Concatenated root-level tables. The table number will be
209 * the offset at the previous level. It is not possible to
210 * concatenate a level-0 root.
211 */
212 BUG_ON(root_level == 0);
213 root_table = offsets[root_level - 1];
214 printk("Using concatenated root table %u\n", root_table);
215 if ( root_table >= nr_root_tables )
216 {
217 printk("Invalid root table offset\n");
218 return;
219 }
220 }
221 else
222 root_table = 0;
223
224 mapping = map_domain_page(mfn_add(root_mfn, root_table));
225
226 for ( level = root_level; ; level++ )
227 {
228 if ( offsets[level] > LPAE_ENTRIES )
229 break;
230
231 pte = mapping[offsets[level]];
232
233 printk("%s[0x%x] = 0x%"PRIpaddr"\n",
234 level_strs[level], offsets[level], pte.bits);
235
236 if ( level == 3 || !pte.walk.valid || !pte.walk.table )
237 break;
238
239 /* For next iteration */
240 unmap_domain_page(mapping);
241 mapping = map_domain_page(_mfn(pte.walk.base));
242 }
243
244 unmap_domain_page(mapping);
245 }
246
dump_hyp_walk(vaddr_t addr)247 void dump_hyp_walk(vaddr_t addr)
248 {
249 uint64_t ttbr = READ_SYSREG64(TTBR0_EL2);
250 lpae_t *pgtable = THIS_CPU_PGTABLE;
251
252 printk("Walking Hypervisor VA 0x%"PRIvaddr" "
253 "on CPU%d via TTBR 0x%016"PRIx64"\n",
254 addr, smp_processor_id(), ttbr);
255
256 if ( smp_processor_id() == 0 )
257 BUG_ON( (lpae_t *)(unsigned long)(ttbr - phys_offset) != pgtable );
258 else
259 BUG_ON( virt_to_maddr(pgtable) != ttbr );
260 dump_pt_walk(ttbr, addr, HYP_PT_ROOT_LEVEL, 1);
261 }
262
263 /*
264 * Standard entry type that we'll use to build Xen's own pagetables.
265 * We put the same permissions at every level, because they're ignored
266 * by the walker in non-leaf entries.
267 */
mfn_to_xen_entry(mfn_t mfn,unsigned attr)268 static inline lpae_t mfn_to_xen_entry(mfn_t mfn, unsigned attr)
269 {
270 lpae_t e = (lpae_t) {
271 .pt = {
272 .valid = 1, /* Mappings are present */
273 .table = 0, /* Set to 1 for links and 4k maps */
274 .ai = attr,
275 .ns = 1, /* Hyp mode is in the non-secure world */
276 .up = 1, /* See below */
277 .ro = 0, /* Assume read-write */
278 .af = 1, /* No need for access tracking */
279 .ng = 1, /* Makes TLB flushes easier */
280 .contig = 0, /* Assume non-contiguous */
281 .xn = 1, /* No need to execute outside .text */
282 .avail = 0, /* Reference count for domheap mapping */
283 }};
284 /*
285 * For EL2 stage-1 page table, up (aka AP[1]) is RES1 as the translation
286 * regime applies to only one exception level (see D4.4.4 and G4.6.1
287 * in ARM DDI 0487B.a). If this changes, remember to update the
288 * hard-coded values in head.S too.
289 */
290
291 switch ( attr )
292 {
293 case MT_NORMAL_NC:
294 /*
295 * ARM ARM: Overlaying the shareability attribute (DDI
296 * 0406C.b B3-1376 to 1377)
297 *
298 * A memory region with a resultant memory type attribute of Normal,
299 * and a resultant cacheability attribute of Inner Non-cacheable,
300 * Outer Non-cacheable, must have a resultant shareability attribute
301 * of Outer Shareable, otherwise shareability is UNPREDICTABLE.
302 *
303 * On ARMv8 sharability is ignored and explicitly treated as Outer
304 * Shareable for Normal Inner Non_cacheable, Outer Non-cacheable.
305 */
306 e.pt.sh = LPAE_SH_OUTER;
307 break;
308 case MT_DEVICE_nGnRnE:
309 case MT_DEVICE_nGnRE:
310 /*
311 * Shareability is ignored for non-Normal memory, Outer is as
312 * good as anything.
313 *
314 * On ARMv8 sharability is ignored and explicitly treated as Outer
315 * Shareable for any device memory type.
316 */
317 e.pt.sh = LPAE_SH_OUTER;
318 break;
319 default:
320 e.pt.sh = LPAE_SH_INNER; /* Xen mappings are SMP coherent */
321 break;
322 }
323
324 ASSERT(!(mfn_to_maddr(mfn) & ~PADDR_MASK));
325
326 e.pt.base = mfn_x(mfn);
327
328 return e;
329 }
330
331 /* Map a 4k page in a fixmap entry */
set_fixmap(unsigned map,mfn_t mfn,unsigned int flags)332 void set_fixmap(unsigned map, mfn_t mfn, unsigned int flags)
333 {
334 lpae_t pte = mfn_to_xen_entry(mfn, PAGE_AI_MASK(flags));
335 pte.pt.table = 1; /* 4k mappings always have this bit set */
336 pte.pt.xn = 1;
337 write_pte(xen_fixmap + third_table_offset(FIXMAP_ADDR(map)), pte);
338 flush_xen_data_tlb_range_va(FIXMAP_ADDR(map), PAGE_SIZE);
339 }
340
341 /* Remove a mapping from a fixmap entry */
clear_fixmap(unsigned map)342 void clear_fixmap(unsigned map)
343 {
344 lpae_t pte = {0};
345 write_pte(xen_fixmap + third_table_offset(FIXMAP_ADDR(map)), pte);
346 flush_xen_data_tlb_range_va(FIXMAP_ADDR(map), PAGE_SIZE);
347 }
348
349 /* Create Xen's mappings of memory.
350 * Mapping_size must be either 2MB or 32MB.
351 * Base and virt must be mapping_size aligned.
352 * Size must be a multiple of mapping_size.
353 * second must be a contiguous set of second level page tables
354 * covering the region starting at virt_offset. */
create_mappings(lpae_t * second,unsigned long virt_offset,unsigned long base_mfn,unsigned long nr_mfns,unsigned int mapping_size)355 static void __init create_mappings(lpae_t *second,
356 unsigned long virt_offset,
357 unsigned long base_mfn,
358 unsigned long nr_mfns,
359 unsigned int mapping_size)
360 {
361 unsigned long i, count;
362 const unsigned long granularity = mapping_size >> PAGE_SHIFT;
363 lpae_t pte, *p;
364
365 ASSERT((mapping_size == MB(2)) || (mapping_size == MB(32)));
366 ASSERT(!((virt_offset >> PAGE_SHIFT) % granularity));
367 ASSERT(!(base_mfn % granularity));
368 ASSERT(!(nr_mfns % granularity));
369
370 count = nr_mfns / LPAE_ENTRIES;
371 p = second + second_linear_offset(virt_offset);
372 pte = mfn_to_xen_entry(_mfn(base_mfn), MT_NORMAL);
373 if ( granularity == 16 * LPAE_ENTRIES )
374 pte.pt.contig = 1; /* These maps are in 16-entry contiguous chunks. */
375 for ( i = 0; i < count; i++ )
376 {
377 write_pte(p + i, pte);
378 pte.pt.base += 1 << LPAE_SHIFT;
379 }
380 flush_xen_data_tlb_local();
381 }
382
383 #ifdef CONFIG_DOMAIN_PAGE
map_domain_page_global(mfn_t mfn)384 void *map_domain_page_global(mfn_t mfn)
385 {
386 return vmap(&mfn, 1);
387 }
388
unmap_domain_page_global(const void * va)389 void unmap_domain_page_global(const void *va)
390 {
391 vunmap(va);
392 }
393
394 /* Map a page of domheap memory */
map_domain_page(mfn_t mfn)395 void *map_domain_page(mfn_t mfn)
396 {
397 unsigned long flags;
398 lpae_t *map = this_cpu(xen_dommap);
399 unsigned long slot_mfn = mfn_x(mfn) & ~LPAE_ENTRY_MASK;
400 vaddr_t va;
401 lpae_t pte;
402 int i, slot;
403
404 local_irq_save(flags);
405
406 /* The map is laid out as an open-addressed hash table where each
407 * entry is a 2MB superpage pte. We use the available bits of each
408 * PTE as a reference count; when the refcount is zero the slot can
409 * be reused. */
410 for ( slot = (slot_mfn >> LPAE_SHIFT) % DOMHEAP_ENTRIES, i = 0;
411 i < DOMHEAP_ENTRIES;
412 slot = (slot + 1) % DOMHEAP_ENTRIES, i++ )
413 {
414 if ( map[slot].pt.avail < 0xf &&
415 map[slot].pt.base == slot_mfn &&
416 map[slot].pt.valid )
417 {
418 /* This slot already points to the right place; reuse it */
419 map[slot].pt.avail++;
420 break;
421 }
422 else if ( map[slot].pt.avail == 0 )
423 {
424 /* Commandeer this 2MB slot */
425 pte = mfn_to_xen_entry(_mfn(slot_mfn), MT_NORMAL);
426 pte.pt.avail = 1;
427 write_pte(map + slot, pte);
428 break;
429 }
430
431 }
432 /* If the map fills up, the callers have misbehaved. */
433 BUG_ON(i == DOMHEAP_ENTRIES);
434
435 #ifndef NDEBUG
436 /* Searching the hash could get slow if the map starts filling up.
437 * Cross that bridge when we come to it */
438 {
439 static int max_tries = 32;
440 if ( i >= max_tries )
441 {
442 dprintk(XENLOG_WARNING, "Domheap map is filling: %i tries\n", i);
443 max_tries *= 2;
444 }
445 }
446 #endif
447
448 local_irq_restore(flags);
449
450 va = (DOMHEAP_VIRT_START
451 + (slot << SECOND_SHIFT)
452 + ((mfn_x(mfn) & LPAE_ENTRY_MASK) << THIRD_SHIFT));
453
454 /*
455 * We may not have flushed this specific subpage at map time,
456 * since we only flush the 4k page not the superpage
457 */
458 flush_xen_data_tlb_range_va_local(va, PAGE_SIZE);
459
460 return (void *)va;
461 }
462
463 /* Release a mapping taken with map_domain_page() */
unmap_domain_page(const void * va)464 void unmap_domain_page(const void *va)
465 {
466 unsigned long flags;
467 lpae_t *map = this_cpu(xen_dommap);
468 int slot = ((unsigned long) va - DOMHEAP_VIRT_START) >> SECOND_SHIFT;
469
470 local_irq_save(flags);
471
472 ASSERT(slot >= 0 && slot < DOMHEAP_ENTRIES);
473 ASSERT(map[slot].pt.avail != 0);
474
475 map[slot].pt.avail--;
476
477 local_irq_restore(flags);
478 }
479
domain_page_map_to_mfn(const void * ptr)480 unsigned long domain_page_map_to_mfn(const void *ptr)
481 {
482 unsigned long va = (unsigned long)ptr;
483 lpae_t *map = this_cpu(xen_dommap);
484 int slot = (va - DOMHEAP_VIRT_START) >> SECOND_SHIFT;
485 unsigned long offset = (va>>THIRD_SHIFT) & LPAE_ENTRY_MASK;
486
487 if ( va >= VMAP_VIRT_START && va < VMAP_VIRT_END )
488 return __virt_to_mfn(va);
489
490 ASSERT(slot >= 0 && slot < DOMHEAP_ENTRIES);
491 ASSERT(map[slot].pt.avail != 0);
492
493 return map[slot].pt.base + offset;
494 }
495 #endif
496
flush_page_to_ram(unsigned long mfn,bool sync_icache)497 void flush_page_to_ram(unsigned long mfn, bool sync_icache)
498 {
499 void *v = map_domain_page(_mfn(mfn));
500
501 clean_and_invalidate_dcache_va_range(v, PAGE_SIZE);
502 unmap_domain_page(v);
503
504 /*
505 * For some of the instruction cache (such as VIPT), the entire I-Cache
506 * needs to be flushed to guarantee that all the aliases of a given
507 * physical address will be removed from the cache.
508 * Invalidating the I-Cache by VA highly depends on the behavior of the
509 * I-Cache (See D4.9.2 in ARM DDI 0487A.k_iss10775). Instead of using flush
510 * by VA on select platforms, we just flush the entire cache here.
511 */
512 if ( sync_icache )
513 invalidate_icache();
514 }
515
arch_init_memory(void)516 void __init arch_init_memory(void)
517 {
518 /*
519 * Initialise our DOMID_XEN domain.
520 * Any Xen-heap pages that we will allow to be mapped will have
521 * their domain field set to dom_xen.
522 */
523 dom_xen = domain_create(DOMID_XEN, DOMCRF_dummy, 0, NULL);
524 BUG_ON(IS_ERR(dom_xen));
525
526 /*
527 * Initialise our DOMID_IO domain.
528 * This domain owns I/O pages that are within the range of the page_info
529 * array. Mappings occur at the priv of the caller.
530 */
531 dom_io = domain_create(DOMID_IO, DOMCRF_dummy, 0, NULL);
532 BUG_ON(IS_ERR(dom_io));
533
534 /*
535 * Initialise our COW domain.
536 * This domain owns sharable pages.
537 */
538 dom_cow = domain_create(DOMID_COW, DOMCRF_dummy, 0, NULL);
539 BUG_ON(IS_ERR(dom_cow));
540 }
541
pte_of_xenaddr(vaddr_t va)542 static inline lpae_t pte_of_xenaddr(vaddr_t va)
543 {
544 paddr_t ma = va + phys_offset;
545
546 return mfn_to_xen_entry(maddr_to_mfn(ma), MT_NORMAL);
547 }
548
549 /* Map the FDT in the early boot page table */
early_fdt_map(paddr_t fdt_paddr)550 void * __init early_fdt_map(paddr_t fdt_paddr)
551 {
552 /* We are using 2MB superpage for mapping the FDT */
553 paddr_t base_paddr = fdt_paddr & SECOND_MASK;
554 paddr_t offset;
555 void *fdt_virt;
556 uint32_t size;
557
558 /*
559 * Check whether the physical FDT address is set and meets the minimum
560 * alignment requirement. Since we are relying on MIN_FDT_ALIGN to be at
561 * least 8 bytes so that we always access the magic and size fields
562 * of the FDT header after mapping the first chunk, double check if
563 * that is indeed the case.
564 */
565 BUILD_BUG_ON(MIN_FDT_ALIGN < 8);
566 if ( !fdt_paddr || fdt_paddr % MIN_FDT_ALIGN )
567 return NULL;
568
569 /* The FDT is mapped using 2MB superpage */
570 BUILD_BUG_ON(BOOT_FDT_VIRT_START % SZ_2M);
571
572 create_mappings(boot_second, BOOT_FDT_VIRT_START, paddr_to_pfn(base_paddr),
573 SZ_2M >> PAGE_SHIFT, SZ_2M);
574
575 offset = fdt_paddr % SECOND_SIZE;
576 fdt_virt = (void *)BOOT_FDT_VIRT_START + offset;
577
578 if ( fdt_magic(fdt_virt) != FDT_MAGIC )
579 return NULL;
580
581 size = fdt_totalsize(fdt_virt);
582 if ( size > MAX_FDT_SIZE )
583 return NULL;
584
585 if ( (offset + size) > SZ_2M )
586 {
587 create_mappings(boot_second, BOOT_FDT_VIRT_START + SZ_2M,
588 paddr_to_pfn(base_paddr + SZ_2M),
589 SZ_2M >> PAGE_SHIFT, SZ_2M);
590 }
591
592 return fdt_virt;
593 }
594
remove_early_mappings(void)595 void __init remove_early_mappings(void)
596 {
597 lpae_t pte = {0};
598 write_pte(xen_second + second_table_offset(BOOT_FDT_VIRT_START), pte);
599 write_pte(xen_second + second_table_offset(BOOT_FDT_VIRT_START + SZ_2M),
600 pte);
601 flush_xen_data_tlb_range_va(BOOT_FDT_VIRT_START, BOOT_FDT_SLOT_SIZE);
602 }
603
604 extern void relocate_xen(uint64_t ttbr, void *src, void *dst, size_t len);
605
606 /* Clear a translation table and clean & invalidate the cache */
clear_table(void * table)607 static void clear_table(void *table)
608 {
609 clear_page(table);
610 clean_and_invalidate_dcache_va_range(table, PAGE_SIZE);
611 }
612
613 /* Boot-time pagetable setup.
614 * Changes here may need matching changes in head.S */
setup_pagetables(unsigned long boot_phys_offset,paddr_t xen_paddr)615 void __init setup_pagetables(unsigned long boot_phys_offset, paddr_t xen_paddr)
616 {
617 uint64_t ttbr;
618 unsigned long dest_va;
619 lpae_t pte, *p;
620 int i;
621
622 /* Calculate virt-to-phys offset for the new location */
623 phys_offset = xen_paddr - (unsigned long) _start;
624
625 #ifdef CONFIG_ARM_64
626 p = (void *) xen_pgtable;
627 p[0] = pte_of_xenaddr((uintptr_t)xen_first);
628 p[0].pt.table = 1;
629 p[0].pt.xn = 0;
630 p = (void *) xen_first;
631 #else
632 p = (void *) cpu0_pgtable;
633 #endif
634
635 /* Initialise first level entries, to point to second level entries */
636 for ( i = 0; i < 2; i++)
637 {
638 p[i] = pte_of_xenaddr((uintptr_t)(xen_second+i*LPAE_ENTRIES));
639 p[i].pt.table = 1;
640 p[i].pt.xn = 0;
641 }
642
643 #ifdef CONFIG_ARM_32
644 for ( i = 0; i < DOMHEAP_SECOND_PAGES; i++ )
645 {
646 p[first_table_offset(DOMHEAP_VIRT_START+i*FIRST_SIZE)]
647 = pte_of_xenaddr((uintptr_t)(cpu0_dommap+i*LPAE_ENTRIES));
648 p[first_table_offset(DOMHEAP_VIRT_START+i*FIRST_SIZE)].pt.table = 1;
649 }
650 #endif
651
652 /* Initialise xen second level entries ... */
653 /* ... Xen's text etc */
654
655 pte = mfn_to_xen_entry(maddr_to_mfn(xen_paddr), MT_NORMAL);
656 pte.pt.xn = 0;/* Contains our text mapping! */
657 xen_second[second_table_offset(XEN_VIRT_START)] = pte;
658
659 /* ... Fixmap */
660 pte = pte_of_xenaddr((vaddr_t)xen_fixmap);
661 pte.pt.table = 1;
662 xen_second[second_table_offset(FIXMAP_ADDR(0))] = pte;
663
664 /* ... DTB */
665 pte = boot_second[second_table_offset(BOOT_FDT_VIRT_START)];
666 xen_second[second_table_offset(BOOT_FDT_VIRT_START)] = pte;
667 pte = boot_second[second_table_offset(BOOT_FDT_VIRT_START + SZ_2M)];
668 xen_second[second_table_offset(BOOT_FDT_VIRT_START + SZ_2M)] = pte;
669
670 /* ... Boot Misc area for xen relocation */
671 dest_va = BOOT_RELOC_VIRT_START;
672 pte = mfn_to_xen_entry(maddr_to_mfn(xen_paddr), MT_NORMAL);
673 /* Map the destination in xen_second. */
674 xen_second[second_table_offset(dest_va)] = pte;
675 /* Map the destination in boot_second. */
676 write_pte(boot_second + second_table_offset(dest_va), pte);
677 flush_xen_data_tlb_range_va_local(dest_va, SECOND_SIZE);
678 #ifdef CONFIG_ARM_64
679 ttbr = (uintptr_t) xen_pgtable + phys_offset;
680 #else
681 ttbr = (uintptr_t) cpu0_pgtable + phys_offset;
682 #endif
683
684 relocate_xen(ttbr, _start, (void*)dest_va, _end - _start);
685
686 /* Clear the copy of the boot pagetables. Each secondary CPU
687 * rebuilds these itself (see head.S) */
688 clear_table(boot_pgtable);
689 #ifdef CONFIG_ARM_64
690 clear_table(boot_first);
691 clear_table(boot_first_id);
692 #endif
693 clear_table(boot_second);
694 clear_table(boot_third);
695
696 /* Break up the Xen mapping into 4k pages and protect them separately. */
697 for ( i = 0; i < LPAE_ENTRIES; i++ )
698 {
699 mfn_t mfn = mfn_add(maddr_to_mfn(xen_paddr), i);
700 unsigned long va = XEN_VIRT_START + (i << PAGE_SHIFT);
701 if ( !is_kernel(va) )
702 break;
703 pte = mfn_to_xen_entry(mfn, MT_NORMAL);
704 pte.pt.table = 1; /* 4k mappings always have this bit set */
705 if ( is_kernel_text(va) || is_kernel_inittext(va) )
706 {
707 pte.pt.xn = 0;
708 pte.pt.ro = 1;
709 }
710 if ( is_kernel_rodata(va) )
711 pte.pt.ro = 1;
712 write_pte(xen_xenmap + i, pte);
713 /* No flush required here as page table is not hooked in yet. */
714 }
715
716 pte = pte_of_xenaddr((vaddr_t)xen_xenmap);
717 pte.pt.table = 1;
718 write_pte(xen_second + second_linear_offset(XEN_VIRT_START), pte);
719 /* TLBFLUSH and ISB would be needed here, but wait until we set WXN */
720
721 /* From now on, no mapping may be both writable and executable. */
722 WRITE_SYSREG32(READ_SYSREG32(SCTLR_EL2) | SCTLR_WXN, SCTLR_EL2);
723 /* Flush everything after setting WXN bit. */
724 flush_xen_text_tlb_local();
725
726 #ifdef CONFIG_ARM_32
727 per_cpu(xen_pgtable, 0) = cpu0_pgtable;
728 per_cpu(xen_dommap, 0) = cpu0_dommap;
729
730 /* Make sure it is clear */
731 memset(this_cpu(xen_dommap), 0, DOMHEAP_SECOND_PAGES*PAGE_SIZE);
732 clean_dcache_va_range(this_cpu(xen_dommap),
733 DOMHEAP_SECOND_PAGES*PAGE_SIZE);
734 #endif
735 }
736
737 #ifdef CONFIG_ARM_64
init_secondary_pagetables(int cpu)738 int init_secondary_pagetables(int cpu)
739 {
740 /* Set init_ttbr for this CPU coming up. All CPus share a single setof
741 * pagetables, but rewrite it each time for consistency with 32 bit. */
742 init_ttbr = (uintptr_t) xen_pgtable + phys_offset;
743 clean_dcache(init_ttbr);
744 return 0;
745 }
746 #else
init_secondary_pagetables(int cpu)747 int init_secondary_pagetables(int cpu)
748 {
749 lpae_t *first, *domheap, pte;
750 int i;
751
752 first = alloc_xenheap_page(); /* root == first level on 32-bit 3-level trie */
753 domheap = alloc_xenheap_pages(get_order_from_pages(DOMHEAP_SECOND_PAGES), 0);
754
755 if ( domheap == NULL || first == NULL )
756 {
757 printk("Not enough free memory for secondary CPU%d pagetables\n", cpu);
758 free_xenheap_pages(domheap, get_order_from_pages(DOMHEAP_SECOND_PAGES));
759 free_xenheap_page(first);
760 return -ENOMEM;
761 }
762
763 /* Initialise root pagetable from root of boot tables */
764 memcpy(first, cpu0_pgtable, PAGE_SIZE);
765
766 /* Ensure the domheap has no stray mappings */
767 memset(domheap, 0, DOMHEAP_SECOND_PAGES*PAGE_SIZE);
768
769 /* Update the first level mapping to reference the local CPUs
770 * domheap mapping pages. */
771 for ( i = 0; i < DOMHEAP_SECOND_PAGES; i++ )
772 {
773 pte = mfn_to_xen_entry(virt_to_mfn(domheap+i*LPAE_ENTRIES),
774 MT_NORMAL);
775 pte.pt.table = 1;
776 write_pte(&first[first_table_offset(DOMHEAP_VIRT_START+i*FIRST_SIZE)], pte);
777 }
778
779 clean_dcache_va_range(first, PAGE_SIZE);
780 clean_dcache_va_range(domheap, DOMHEAP_SECOND_PAGES*PAGE_SIZE);
781
782 per_cpu(xen_pgtable, cpu) = first;
783 per_cpu(xen_dommap, cpu) = domheap;
784
785 /* Set init_ttbr for this CPU coming up */
786 init_ttbr = __pa(first);
787 clean_dcache(init_ttbr);
788
789 return 0;
790 }
791 #endif
792
793 /* MMU setup for secondary CPUS (which already have paging enabled) */
mmu_init_secondary_cpu(void)794 void mmu_init_secondary_cpu(void)
795 {
796 /* From now on, no mapping may be both writable and executable. */
797 WRITE_SYSREG32(READ_SYSREG32(SCTLR_EL2) | SCTLR_WXN, SCTLR_EL2);
798 flush_xen_text_tlb_local();
799 }
800
801 #ifdef CONFIG_ARM_32
802 /* Set up the xenheap: up to 1GB of contiguous, always-mapped memory. */
setup_xenheap_mappings(unsigned long base_mfn,unsigned long nr_mfns)803 void __init setup_xenheap_mappings(unsigned long base_mfn,
804 unsigned long nr_mfns)
805 {
806 create_mappings(xen_second, XENHEAP_VIRT_START, base_mfn, nr_mfns, MB(32));
807
808 /* Record where the xenheap is, for translation routines. */
809 xenheap_virt_end = XENHEAP_VIRT_START + nr_mfns * PAGE_SIZE;
810 xenheap_mfn_start = _mfn(base_mfn);
811 xenheap_mfn_end = _mfn(base_mfn + nr_mfns);
812 }
813 #else /* CONFIG_ARM_64 */
setup_xenheap_mappings(unsigned long base_mfn,unsigned long nr_mfns)814 void __init setup_xenheap_mappings(unsigned long base_mfn,
815 unsigned long nr_mfns)
816 {
817 lpae_t *first, pte;
818 unsigned long mfn, end_mfn;
819 vaddr_t vaddr;
820
821 /* Align to previous 1GB boundary */
822 mfn = base_mfn & ~((FIRST_SIZE>>PAGE_SHIFT)-1);
823
824 /* First call sets the xenheap physical and virtual offset. */
825 if ( mfn_eq(xenheap_mfn_start, INVALID_MFN) )
826 {
827 xenheap_mfn_start = _mfn(base_mfn);
828 xenheap_virt_start = DIRECTMAP_VIRT_START +
829 (base_mfn - mfn) * PAGE_SIZE;
830 }
831
832 if ( base_mfn < mfn_x(xenheap_mfn_start) )
833 panic("cannot add xenheap mapping at %lx below heap start %lx",
834 base_mfn, mfn_x(xenheap_mfn_start));
835
836 end_mfn = base_mfn + nr_mfns;
837
838 /*
839 * Virtual address aligned to previous 1GB to match physical
840 * address alignment done above.
841 */
842 vaddr = (vaddr_t)__mfn_to_virt(base_mfn) & FIRST_MASK;
843
844 while ( mfn < end_mfn )
845 {
846 int slot = zeroeth_table_offset(vaddr);
847 lpae_t *p = &xen_pgtable[slot];
848
849 if ( p->pt.valid )
850 {
851 /* mfn_to_virt is not valid on the 1st 1st mfn, since it
852 * is not within the xenheap. */
853 first = slot == xenheap_first_first_slot ?
854 xenheap_first_first : __mfn_to_virt(p->pt.base);
855 }
856 else if ( xenheap_first_first_slot == -1)
857 {
858 /* Use xenheap_first_first to bootstrap the mappings */
859 first = xenheap_first_first;
860
861 pte = pte_of_xenaddr((vaddr_t)xenheap_first_first);
862 pte.pt.table = 1;
863 write_pte(p, pte);
864
865 xenheap_first_first_slot = slot;
866 }
867 else
868 {
869 mfn_t first_mfn = alloc_boot_pages(1, 1);
870
871 clear_page(mfn_to_virt(first_mfn));
872 pte = mfn_to_xen_entry(first_mfn, MT_NORMAL);
873 pte.pt.table = 1;
874 write_pte(p, pte);
875 first = mfn_to_virt(first_mfn);
876 }
877
878 pte = mfn_to_xen_entry(_mfn(mfn), MT_NORMAL);
879 /* TODO: Set pte.pt.contig when appropriate. */
880 write_pte(&first[first_table_offset(vaddr)], pte);
881
882 mfn += FIRST_SIZE>>PAGE_SHIFT;
883 vaddr += FIRST_SIZE;
884 }
885
886 flush_xen_data_tlb_local();
887 }
888 #endif
889
890 /* Map a frame table to cover physical addresses ps through pe */
setup_frametable_mappings(paddr_t ps,paddr_t pe)891 void __init setup_frametable_mappings(paddr_t ps, paddr_t pe)
892 {
893 unsigned long nr_pages = (pe - ps) >> PAGE_SHIFT;
894 unsigned long nr_pdxs = pfn_to_pdx(nr_pages);
895 unsigned long frametable_size = nr_pdxs * sizeof(struct page_info);
896 mfn_t base_mfn;
897 const unsigned long mapping_size = frametable_size < MB(32) ? MB(2) : MB(32);
898 #ifdef CONFIG_ARM_64
899 lpae_t *second, pte;
900 unsigned long nr_second;
901 mfn_t second_base;
902 int i;
903 #endif
904
905 frametable_base_pdx = pfn_to_pdx(ps >> PAGE_SHIFT);
906 /* Round up to 2M or 32M boundary, as appropriate. */
907 frametable_size = ROUNDUP(frametable_size, mapping_size);
908 base_mfn = alloc_boot_pages(frametable_size >> PAGE_SHIFT, 32<<(20-12));
909
910 #ifdef CONFIG_ARM_64
911 /* Compute the number of second level pages. */
912 nr_second = ROUNDUP(frametable_size, FIRST_SIZE) >> FIRST_SHIFT;
913 second_base = alloc_boot_pages(nr_second, 1);
914 second = mfn_to_virt(second_base);
915 for ( i = 0; i < nr_second; i++ )
916 {
917 clear_page(mfn_to_virt(mfn_add(second_base, i)));
918 pte = mfn_to_xen_entry(mfn_add(second_base, i), MT_NORMAL);
919 pte.pt.table = 1;
920 write_pte(&xen_first[first_table_offset(FRAMETABLE_VIRT_START)+i], pte);
921 }
922 create_mappings(second, 0, mfn_x(base_mfn), frametable_size >> PAGE_SHIFT,
923 mapping_size);
924 #else
925 create_mappings(xen_second, FRAMETABLE_VIRT_START, mfn_x(base_mfn),
926 frametable_size >> PAGE_SHIFT, mapping_size);
927 #endif
928
929 memset(&frame_table[0], 0, nr_pdxs * sizeof(struct page_info));
930 memset(&frame_table[nr_pdxs], -1,
931 frametable_size - (nr_pdxs * sizeof(struct page_info)));
932
933 frametable_virt_end = FRAMETABLE_VIRT_START + (nr_pdxs * sizeof(struct page_info));
934 }
935
arch_vmap_virt_end(void)936 void *__init arch_vmap_virt_end(void)
937 {
938 return (void *)VMAP_VIRT_END;
939 }
940
941 /*
942 * This function should only be used to remap device address ranges
943 * TODO: add a check to verify this assumption
944 */
ioremap_attr(paddr_t pa,size_t len,unsigned int attributes)945 void *ioremap_attr(paddr_t pa, size_t len, unsigned int attributes)
946 {
947 mfn_t mfn = _mfn(PFN_DOWN(pa));
948 unsigned int offs = pa & (PAGE_SIZE - 1);
949 unsigned int nr = PFN_UP(offs + len);
950 void *ptr = __vmap(&mfn, nr, 1, 1, attributes, VMAP_DEFAULT);
951
952 if ( ptr == NULL )
953 return NULL;
954
955 return ptr + offs;
956 }
957
ioremap(paddr_t pa,size_t len)958 void *ioremap(paddr_t pa, size_t len)
959 {
960 return ioremap_attr(pa, len, PAGE_HYPERVISOR_NOCACHE);
961 }
962
create_xen_table(lpae_t * entry)963 static int create_xen_table(lpae_t *entry)
964 {
965 void *p;
966 lpae_t pte;
967
968 p = alloc_xenheap_page();
969 if ( p == NULL )
970 return -ENOMEM;
971 clear_page(p);
972 pte = mfn_to_xen_entry(virt_to_mfn(p), MT_NORMAL);
973 pte.pt.table = 1;
974 write_pte(entry, pte);
975 return 0;
976 }
977
978 enum xenmap_operation {
979 INSERT,
980 REMOVE,
981 MODIFY,
982 RESERVE
983 };
984
create_xen_entries(enum xenmap_operation op,unsigned long virt,mfn_t mfn,unsigned long nr_mfns,unsigned int flags)985 static int create_xen_entries(enum xenmap_operation op,
986 unsigned long virt,
987 mfn_t mfn,
988 unsigned long nr_mfns,
989 unsigned int flags)
990 {
991 int rc;
992 unsigned long addr = virt, addr_end = addr + nr_mfns * PAGE_SIZE;
993 lpae_t pte, *entry;
994 lpae_t *third = NULL;
995
996 for(; addr < addr_end; addr += PAGE_SIZE, mfn = mfn_add(mfn, 1))
997 {
998 entry = &xen_second[second_linear_offset(addr)];
999 if ( !lpae_table(*entry) )
1000 {
1001 rc = create_xen_table(entry);
1002 if ( rc < 0 ) {
1003 printk("%s: L2 failed\n", __func__);
1004 goto out;
1005 }
1006 }
1007
1008 BUG_ON(!lpae_valid(*entry));
1009
1010 third = __mfn_to_virt(entry->pt.base);
1011 entry = &third[third_table_offset(addr)];
1012
1013 switch ( op ) {
1014 case INSERT:
1015 case RESERVE:
1016 if ( lpae_valid(*entry) )
1017 {
1018 printk("%s: trying to replace an existing mapping addr=%lx mfn=%"PRI_mfn"\n",
1019 __func__, addr, mfn_x(mfn));
1020 return -EINVAL;
1021 }
1022 if ( op == RESERVE )
1023 break;
1024 pte = mfn_to_xen_entry(mfn, PAGE_AI_MASK(flags));
1025 pte.pt.ro = PAGE_RO_MASK(flags);
1026 pte.pt.xn = PAGE_XN_MASK(flags);
1027 BUG_ON(!pte.pt.ro && !pte.pt.xn);
1028 pte.pt.table = 1;
1029 write_pte(entry, pte);
1030 break;
1031 case MODIFY:
1032 case REMOVE:
1033 if ( !lpae_valid(*entry) )
1034 {
1035 printk("%s: trying to %s a non-existing mapping addr=%lx\n",
1036 __func__, op == REMOVE ? "remove" : "modify", addr);
1037 return -EINVAL;
1038 }
1039 if ( op == REMOVE )
1040 pte.bits = 0;
1041 else
1042 {
1043 pte = *entry;
1044 pte.pt.ro = PAGE_RO_MASK(flags);
1045 pte.pt.xn = PAGE_XN_MASK(flags);
1046 if ( !pte.pt.ro && !pte.pt.xn )
1047 {
1048 printk("%s: Incorrect combination for addr=%lx\n",
1049 __func__, addr);
1050 return -EINVAL;
1051 }
1052 }
1053 write_pte(entry, pte);
1054 break;
1055 default:
1056 BUG();
1057 }
1058 }
1059 flush_xen_data_tlb_range_va(virt, PAGE_SIZE * nr_mfns);
1060
1061 rc = 0;
1062
1063 out:
1064 return rc;
1065 }
1066
map_pages_to_xen(unsigned long virt,unsigned long mfn,unsigned long nr_mfns,unsigned int flags)1067 int map_pages_to_xen(unsigned long virt,
1068 unsigned long mfn,
1069 unsigned long nr_mfns,
1070 unsigned int flags)
1071 {
1072 return create_xen_entries(INSERT, virt, _mfn(mfn), nr_mfns, flags);
1073 }
1074
populate_pt_range(unsigned long virt,unsigned long mfn,unsigned long nr_mfns)1075 int populate_pt_range(unsigned long virt, unsigned long mfn,
1076 unsigned long nr_mfns)
1077 {
1078 return create_xen_entries(RESERVE, virt, _mfn(mfn), nr_mfns, 0);
1079 }
1080
destroy_xen_mappings(unsigned long v,unsigned long e)1081 int destroy_xen_mappings(unsigned long v, unsigned long e)
1082 {
1083 return create_xen_entries(REMOVE, v, INVALID_MFN, (e - v) >> PAGE_SHIFT, 0);
1084 }
1085
modify_xen_mappings(unsigned long s,unsigned long e,unsigned int flags)1086 int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int flags)
1087 {
1088 return create_xen_entries(MODIFY, s, INVALID_MFN, (e - s) >> PAGE_SHIFT,
1089 flags);
1090 }
1091
1092 enum mg { mg_clear, mg_ro, mg_rw, mg_rx };
set_pte_flags_on_range(const char * p,unsigned long l,enum mg mg)1093 static void set_pte_flags_on_range(const char *p, unsigned long l, enum mg mg)
1094 {
1095 lpae_t pte;
1096 int i;
1097
1098 ASSERT(is_kernel(p) && is_kernel(p + l));
1099
1100 /* Can only guard in page granularity */
1101 ASSERT(!((unsigned long) p & ~PAGE_MASK));
1102 ASSERT(!(l & ~PAGE_MASK));
1103
1104 for ( i = (p - _start) / PAGE_SIZE;
1105 i < (p + l - _start) / PAGE_SIZE;
1106 i++ )
1107 {
1108 pte = xen_xenmap[i];
1109 switch ( mg )
1110 {
1111 case mg_clear:
1112 pte.pt.valid = 0;
1113 break;
1114 case mg_ro:
1115 pte.pt.valid = 1;
1116 pte.pt.pxn = 1;
1117 pte.pt.xn = 1;
1118 pte.pt.ro = 1;
1119 break;
1120 case mg_rw:
1121 pte.pt.valid = 1;
1122 pte.pt.pxn = 1;
1123 pte.pt.xn = 1;
1124 pte.pt.ro = 0;
1125 break;
1126 case mg_rx:
1127 pte.pt.valid = 1;
1128 pte.pt.pxn = 0;
1129 pte.pt.xn = 0;
1130 pte.pt.ro = 1;
1131 break;
1132 }
1133 write_pte(xen_xenmap + i, pte);
1134 }
1135 flush_xen_text_tlb_local();
1136 }
1137
1138 /* Release all __init and __initdata ranges to be reused */
free_init_memory(void)1139 void free_init_memory(void)
1140 {
1141 paddr_t pa = virt_to_maddr(__init_begin);
1142 unsigned long len = __init_end - __init_begin;
1143 uint32_t insn;
1144 unsigned int i, nr = len / sizeof(insn);
1145 uint32_t *p;
1146
1147 set_pte_flags_on_range(__init_begin, len, mg_rw);
1148 #ifdef CONFIG_ARM_32
1149 /* udf instruction i.e (see A8.8.247 in ARM DDI 0406C.c) */
1150 insn = 0xe7f000f0;
1151 #else
1152 insn = AARCH64_BREAK_FAULT;
1153 #endif
1154 p = (uint32_t *)__init_begin;
1155 for ( i = 0; i < nr; i++ )
1156 *(p + i) = insn;
1157
1158 set_pte_flags_on_range(__init_begin, len, mg_clear);
1159 init_domheap_pages(pa, pa + len);
1160 printk("Freed %ldkB init memory.\n", (long)(__init_end-__init_begin)>>10);
1161 }
1162
arch_dump_shared_mem_info(void)1163 void arch_dump_shared_mem_info(void)
1164 {
1165 }
1166
donate_page(struct domain * d,struct page_info * page,unsigned int memflags)1167 int donate_page(struct domain *d, struct page_info *page, unsigned int memflags)
1168 {
1169 ASSERT_UNREACHABLE();
1170 return -ENOSYS;
1171 }
1172
steal_page(struct domain * d,struct page_info * page,unsigned int memflags)1173 int steal_page(
1174 struct domain *d, struct page_info *page, unsigned int memflags)
1175 {
1176 return -EOPNOTSUPP;
1177 }
1178
page_is_ram_type(unsigned long mfn,unsigned long mem_type)1179 int page_is_ram_type(unsigned long mfn, unsigned long mem_type)
1180 {
1181 ASSERT_UNREACHABLE();
1182 return 0;
1183 }
1184
domain_get_maximum_gpfn(struct domain * d)1185 unsigned long domain_get_maximum_gpfn(struct domain *d)
1186 {
1187 return gfn_x(d->arch.p2m.max_mapped_gfn);
1188 }
1189
share_xen_page_with_guest(struct page_info * page,struct domain * d,int readonly)1190 void share_xen_page_with_guest(struct page_info *page,
1191 struct domain *d, int readonly)
1192 {
1193 if ( page_get_owner(page) == d )
1194 return;
1195
1196 spin_lock(&d->page_alloc_lock);
1197
1198 /* The incremented type count pins as writable or read-only. */
1199 page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page) | 1;
1200
1201 page_set_owner(page, d);
1202 smp_wmb(); /* install valid domain ptr before updating refcnt. */
1203 ASSERT((page->count_info & ~PGC_xen_heap) == 0);
1204
1205 /* Only add to the allocation list if the domain isn't dying. */
1206 if ( !d->is_dying )
1207 {
1208 page->count_info |= PGC_allocated | 1;
1209 if ( unlikely(d->xenheap_pages++ == 0) )
1210 get_knownalive_domain(d);
1211 page_list_add_tail(page, &d->xenpage_list);
1212 }
1213
1214 spin_unlock(&d->page_alloc_lock);
1215 }
1216
share_xen_page_with_privileged_guests(struct page_info * page,int readonly)1217 void share_xen_page_with_privileged_guests(
1218 struct page_info *page, int readonly)
1219 {
1220 share_xen_page_with_guest(page, dom_xen, readonly);
1221 }
1222
xenmem_add_to_physmap_one(struct domain * d,unsigned int space,union xen_add_to_physmap_batch_extra extra,unsigned long idx,gfn_t gfn)1223 int xenmem_add_to_physmap_one(
1224 struct domain *d,
1225 unsigned int space,
1226 union xen_add_to_physmap_batch_extra extra,
1227 unsigned long idx,
1228 gfn_t gfn)
1229 {
1230 mfn_t mfn = INVALID_MFN;
1231 int rc;
1232 p2m_type_t t;
1233 struct page_info *page = NULL;
1234
1235 switch ( space )
1236 {
1237 case XENMAPSPACE_grant_table:
1238 rc = gnttab_map_frame(d, idx, gfn, &mfn);
1239 if ( rc )
1240 return rc;
1241
1242 t = p2m_ram_rw;
1243
1244 break;
1245 case XENMAPSPACE_shared_info:
1246 if ( idx != 0 )
1247 return -EINVAL;
1248
1249 mfn = virt_to_mfn(d->shared_info);
1250 t = p2m_ram_rw;
1251
1252 break;
1253 case XENMAPSPACE_gmfn_foreign:
1254 {
1255 struct domain *od;
1256 p2m_type_t p2mt;
1257
1258 od = rcu_lock_domain_by_any_id(extra.foreign_domid);
1259 if ( od == NULL )
1260 return -ESRCH;
1261
1262 if ( od == d )
1263 {
1264 rcu_unlock_domain(od);
1265 return -EINVAL;
1266 }
1267
1268 rc = xsm_map_gmfn_foreign(XSM_TARGET, d, od);
1269 if ( rc )
1270 {
1271 rcu_unlock_domain(od);
1272 return rc;
1273 }
1274
1275 /* Take reference to the foreign domain page.
1276 * Reference will be released in XENMEM_remove_from_physmap */
1277 page = get_page_from_gfn(od, idx, &p2mt, P2M_ALLOC);
1278 if ( !page )
1279 {
1280 rcu_unlock_domain(od);
1281 return -EINVAL;
1282 }
1283
1284 if ( !p2m_is_ram(p2mt) )
1285 {
1286 put_page(page);
1287 rcu_unlock_domain(od);
1288 return -EINVAL;
1289 }
1290
1291 mfn = _mfn(page_to_mfn(page));
1292 t = p2m_map_foreign;
1293
1294 rcu_unlock_domain(od);
1295 break;
1296 }
1297 case XENMAPSPACE_dev_mmio:
1298 /* extra should be 0. Reserved for future use. */
1299 if ( extra.res0 )
1300 return -EOPNOTSUPP;
1301
1302 rc = map_dev_mmio_region(d, gfn, 1, _mfn(idx));
1303 return rc;
1304
1305 default:
1306 return -ENOSYS;
1307 }
1308
1309 /* Map at new location. */
1310 rc = guest_physmap_add_entry(d, gfn, mfn, 0, t);
1311
1312 /* If we fail to add the mapping, we need to drop the reference we
1313 * took earlier on foreign pages */
1314 if ( rc && space == XENMAPSPACE_gmfn_foreign )
1315 {
1316 ASSERT(page != NULL);
1317 put_page(page);
1318 }
1319
1320 return rc;
1321 }
1322
arch_memory_op(int op,XEN_GUEST_HANDLE_PARAM (void)arg)1323 long arch_memory_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg)
1324 {
1325 switch ( op )
1326 {
1327 /* XXX: memsharing not working yet */
1328 case XENMEM_get_sharing_shared_pages:
1329 case XENMEM_get_sharing_freed_pages:
1330 return 0;
1331
1332 default:
1333 return -ENOSYS;
1334 }
1335
1336 return 0;
1337 }
1338
page_get_owner_and_reference(struct page_info * page)1339 struct domain *page_get_owner_and_reference(struct page_info *page)
1340 {
1341 unsigned long x, y = page->count_info;
1342 struct domain *owner;
1343
1344 do {
1345 x = y;
1346 /*
1347 * Count == 0: Page is not allocated, so we cannot take a reference.
1348 * Count == -1: Reference count would wrap, which is invalid.
1349 */
1350 if ( unlikely(((x + 1) & PGC_count_mask) <= 1) )
1351 return NULL;
1352 }
1353 while ( (y = cmpxchg(&page->count_info, x, x + 1)) != x );
1354
1355 owner = page_get_owner(page);
1356 ASSERT(owner);
1357
1358 return owner;
1359 }
1360
put_page(struct page_info * page)1361 void put_page(struct page_info *page)
1362 {
1363 unsigned long nx, x, y = page->count_info;
1364
1365 do {
1366 ASSERT((y & PGC_count_mask) != 0);
1367 x = y;
1368 nx = x - 1;
1369 }
1370 while ( unlikely((y = cmpxchg(&page->count_info, x, nx)) != x) );
1371
1372 if ( unlikely((nx & PGC_count_mask) == 0) )
1373 {
1374 free_domheap_page(page);
1375 }
1376 }
1377
get_page(struct page_info * page,struct domain * domain)1378 int get_page(struct page_info *page, struct domain *domain)
1379 {
1380 struct domain *owner = page_get_owner_and_reference(page);
1381
1382 if ( likely(owner == domain) )
1383 return 1;
1384
1385 if ( owner != NULL )
1386 put_page(page);
1387
1388 return 0;
1389 }
1390
1391 /* Common code requires get_page_type and put_page_type.
1392 * We don't care about typecounts so we just do the minimum to make it
1393 * happy. */
get_page_type(struct page_info * page,unsigned long type)1394 int get_page_type(struct page_info *page, unsigned long type)
1395 {
1396 return 1;
1397 }
1398
put_page_type(struct page_info * page)1399 void put_page_type(struct page_info *page)
1400 {
1401 return;
1402 }
1403
gnttab_clear_flag(unsigned long nr,uint16_t * addr)1404 void gnttab_clear_flag(unsigned long nr, uint16_t *addr)
1405 {
1406 /*
1407 * Note that this cannot be clear_bit(), as the access must be
1408 * confined to the specified 2 bytes.
1409 */
1410 uint16_t mask = ~(1 << nr), old;
1411
1412 do {
1413 old = *addr;
1414 } while (cmpxchg(addr, old, old & mask) != old);
1415 }
1416
gnttab_mark_dirty(struct domain * d,unsigned long l)1417 void gnttab_mark_dirty(struct domain *d, unsigned long l)
1418 {
1419 /* XXX: mark dirty */
1420 static int warning;
1421 if (!warning) {
1422 gdprintk(XENLOG_WARNING, "gnttab_mark_dirty not implemented yet\n");
1423 warning = 1;
1424 }
1425 }
1426
create_grant_host_mapping(unsigned long addr,unsigned long frame,unsigned int flags,unsigned int cache_flags)1427 int create_grant_host_mapping(unsigned long addr, unsigned long frame,
1428 unsigned int flags, unsigned int cache_flags)
1429 {
1430 int rc;
1431 p2m_type_t t = p2m_grant_map_rw;
1432
1433 if ( cache_flags || (flags & ~GNTMAP_readonly) != GNTMAP_host_map )
1434 return GNTST_general_error;
1435
1436 if ( flags & GNTMAP_readonly )
1437 t = p2m_grant_map_ro;
1438
1439 rc = guest_physmap_add_entry(current->domain, _gfn(addr >> PAGE_SHIFT),
1440 _mfn(frame), 0, t);
1441
1442 if ( rc )
1443 return GNTST_general_error;
1444 else
1445 return GNTST_okay;
1446 }
1447
replace_grant_host_mapping(unsigned long addr,unsigned long mfn,unsigned long new_addr,unsigned int flags)1448 int replace_grant_host_mapping(unsigned long addr, unsigned long mfn,
1449 unsigned long new_addr, unsigned int flags)
1450 {
1451 gfn_t gfn = _gfn(addr >> PAGE_SHIFT);
1452 struct domain *d = current->domain;
1453 int rc;
1454
1455 if ( new_addr != 0 || (flags & GNTMAP_contains_pte) )
1456 return GNTST_general_error;
1457
1458 rc = guest_physmap_remove_page(d, gfn, _mfn(mfn), 0);
1459
1460 return rc ? GNTST_general_error : GNTST_okay;
1461 }
1462
is_iomem_page(mfn_t mfn)1463 bool is_iomem_page(mfn_t mfn)
1464 {
1465 return !mfn_valid(mfn);
1466 }
1467
clear_and_clean_page(struct page_info * page)1468 void clear_and_clean_page(struct page_info *page)
1469 {
1470 void *p = __map_domain_page(page);
1471
1472 clear_page(p);
1473 clean_dcache_va_range(p, PAGE_SIZE);
1474 unmap_domain_page(p);
1475 }
1476
get_upper_mfn_bound(void)1477 unsigned long get_upper_mfn_bound(void)
1478 {
1479 /* No memory hotplug yet, so current memory limit is the final one. */
1480 return max_page - 1;
1481 }
1482
1483 /*
1484 * Local variables:
1485 * mode: C
1486 * c-file-style: "BSD"
1487 * c-basic-offset: 4
1488 * indent-tabs-mode: nil
1489 * End:
1490 */
1491