1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright IBM Corp. 2006
4 */
5
6 #include <linux/memory_hotplug.h>
7 #include <linux/memblock.h>
8 #include <linux/pfn.h>
9 #include <linux/mm.h>
10 #include <linux/init.h>
11 #include <linux/list.h>
12 #include <linux/hugetlb.h>
13 #include <linux/slab.h>
14 #include <linux/sort.h>
15 #include <asm/cacheflush.h>
16 #include <asm/nospec-branch.h>
17 #include <asm/pgalloc.h>
18 #include <asm/setup.h>
19 #include <asm/tlbflush.h>
20 #include <asm/sections.h>
21 #include <asm/set_memory.h>
22
23 static DEFINE_MUTEX(vmem_mutex);
24
vmem_alloc_pages(unsigned int order)25 static void __ref *vmem_alloc_pages(unsigned int order)
26 {
27 unsigned long size = PAGE_SIZE << order;
28
29 if (slab_is_available())
30 return (void *)__get_free_pages(GFP_KERNEL, order);
31 return memblock_alloc(size, size);
32 }
33
vmem_free_pages(unsigned long addr,int order)34 static void vmem_free_pages(unsigned long addr, int order)
35 {
36 /* We don't expect boot memory to be removed ever. */
37 if (!slab_is_available() ||
38 WARN_ON_ONCE(PageReserved(virt_to_page(addr))))
39 return;
40 free_pages(addr, order);
41 }
42
vmem_crst_alloc(unsigned long val)43 void *vmem_crst_alloc(unsigned long val)
44 {
45 unsigned long *table;
46
47 table = vmem_alloc_pages(CRST_ALLOC_ORDER);
48 if (table)
49 crst_table_init(table, val);
50 return table;
51 }
52
vmem_pte_alloc(void)53 pte_t __ref *vmem_pte_alloc(void)
54 {
55 unsigned long size = PTRS_PER_PTE * sizeof(pte_t);
56 pte_t *pte;
57
58 if (slab_is_available())
59 pte = (pte_t *) page_table_alloc(&init_mm);
60 else
61 pte = (pte_t *) memblock_alloc(size, size);
62 if (!pte)
63 return NULL;
64 memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
65 return pte;
66 }
67
vmem_pte_free(unsigned long * table)68 static void vmem_pte_free(unsigned long *table)
69 {
70 /* We don't expect boot memory to be removed ever. */
71 if (!slab_is_available() ||
72 WARN_ON_ONCE(PageReserved(virt_to_page(table))))
73 return;
74 page_table_free(&init_mm, table);
75 }
76
77 #define PAGE_UNUSED 0xFD
78
79 /*
80 * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges
81 * from unused_sub_pmd_start to next PMD_SIZE boundary.
82 */
83 static unsigned long unused_sub_pmd_start;
84
vmemmap_flush_unused_sub_pmd(void)85 static void vmemmap_flush_unused_sub_pmd(void)
86 {
87 if (!unused_sub_pmd_start)
88 return;
89 memset((void *)unused_sub_pmd_start, PAGE_UNUSED,
90 ALIGN(unused_sub_pmd_start, PMD_SIZE) - unused_sub_pmd_start);
91 unused_sub_pmd_start = 0;
92 }
93
vmemmap_mark_sub_pmd_used(unsigned long start,unsigned long end)94 static void vmemmap_mark_sub_pmd_used(unsigned long start, unsigned long end)
95 {
96 /*
97 * As we expect to add in the same granularity as we remove, it's
98 * sufficient to mark only some piece used to block the memmap page from
99 * getting removed (just in case the memmap never gets initialized,
100 * e.g., because the memory block never gets onlined).
101 */
102 memset((void *)start, 0, sizeof(struct page));
103 }
104
vmemmap_use_sub_pmd(unsigned long start,unsigned long end)105 static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
106 {
107 /*
108 * We only optimize if the new used range directly follows the
109 * previously unused range (esp., when populating consecutive sections).
110 */
111 if (unused_sub_pmd_start == start) {
112 unused_sub_pmd_start = end;
113 if (likely(IS_ALIGNED(unused_sub_pmd_start, PMD_SIZE)))
114 unused_sub_pmd_start = 0;
115 return;
116 }
117 vmemmap_flush_unused_sub_pmd();
118 vmemmap_mark_sub_pmd_used(start, end);
119 }
120
vmemmap_use_new_sub_pmd(unsigned long start,unsigned long end)121 static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
122 {
123 unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
124
125 vmemmap_flush_unused_sub_pmd();
126
127 /* Could be our memmap page is filled with PAGE_UNUSED already ... */
128 vmemmap_mark_sub_pmd_used(start, end);
129
130 /* Mark the unused parts of the new memmap page PAGE_UNUSED. */
131 if (!IS_ALIGNED(start, PMD_SIZE))
132 memset((void *)page, PAGE_UNUSED, start - page);
133 /*
134 * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
135 * consecutive sections. Remember for the last added PMD the last
136 * unused range in the populated PMD.
137 */
138 if (!IS_ALIGNED(end, PMD_SIZE))
139 unused_sub_pmd_start = end;
140 }
141
142 /* Returns true if the PMD is completely unused and can be freed. */
vmemmap_unuse_sub_pmd(unsigned long start,unsigned long end)143 static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
144 {
145 unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
146
147 vmemmap_flush_unused_sub_pmd();
148 memset((void *)start, PAGE_UNUSED, end - start);
149 return !memchr_inv((void *)page, PAGE_UNUSED, PMD_SIZE);
150 }
151
152 /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
modify_pte_table(pmd_t * pmd,unsigned long addr,unsigned long end,bool add,bool direct)153 static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
154 unsigned long end, bool add, bool direct)
155 {
156 unsigned long prot, pages = 0;
157 int ret = -ENOMEM;
158 pte_t *pte;
159
160 prot = pgprot_val(PAGE_KERNEL);
161 if (!MACHINE_HAS_NX)
162 prot &= ~_PAGE_NOEXEC;
163
164 pte = pte_offset_kernel(pmd, addr);
165 for (; addr < end; addr += PAGE_SIZE, pte++) {
166 if (!add) {
167 if (pte_none(*pte))
168 continue;
169 if (!direct)
170 vmem_free_pages((unsigned long) pfn_to_virt(pte_pfn(*pte)), 0);
171 pte_clear(&init_mm, addr, pte);
172 } else if (pte_none(*pte)) {
173 if (!direct) {
174 void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
175
176 if (!new_page)
177 goto out;
178 set_pte(pte, __pte(__pa(new_page) | prot));
179 } else {
180 set_pte(pte, __pte(__pa(addr) | prot));
181 }
182 } else {
183 continue;
184 }
185 pages++;
186 }
187 ret = 0;
188 out:
189 if (direct)
190 update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages);
191 return ret;
192 }
193
try_free_pte_table(pmd_t * pmd,unsigned long start)194 static void try_free_pte_table(pmd_t *pmd, unsigned long start)
195 {
196 pte_t *pte;
197 int i;
198
199 /* We can safely assume this is fully in 1:1 mapping & vmemmap area */
200 pte = pte_offset_kernel(pmd, start);
201 for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
202 if (!pte_none(*pte))
203 return;
204 }
205 vmem_pte_free((unsigned long *) pmd_deref(*pmd));
206 pmd_clear(pmd);
207 }
208
209 /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
modify_pmd_table(pud_t * pud,unsigned long addr,unsigned long end,bool add,bool direct)210 static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
211 unsigned long end, bool add, bool direct)
212 {
213 unsigned long next, prot, pages = 0;
214 int ret = -ENOMEM;
215 pmd_t *pmd;
216 pte_t *pte;
217
218 prot = pgprot_val(SEGMENT_KERNEL);
219 if (!MACHINE_HAS_NX)
220 prot &= ~_SEGMENT_ENTRY_NOEXEC;
221
222 pmd = pmd_offset(pud, addr);
223 for (; addr < end; addr = next, pmd++) {
224 next = pmd_addr_end(addr, end);
225 if (!add) {
226 if (pmd_none(*pmd))
227 continue;
228 if (pmd_large(*pmd)) {
229 if (IS_ALIGNED(addr, PMD_SIZE) &&
230 IS_ALIGNED(next, PMD_SIZE)) {
231 if (!direct)
232 vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
233 pmd_clear(pmd);
234 pages++;
235 } else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) {
236 vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
237 pmd_clear(pmd);
238 }
239 continue;
240 }
241 } else if (pmd_none(*pmd)) {
242 if (IS_ALIGNED(addr, PMD_SIZE) &&
243 IS_ALIGNED(next, PMD_SIZE) &&
244 MACHINE_HAS_EDAT1 && direct &&
245 !debug_pagealloc_enabled()) {
246 set_pmd(pmd, __pmd(__pa(addr) | prot));
247 pages++;
248 continue;
249 } else if (!direct && MACHINE_HAS_EDAT1) {
250 void *new_page;
251
252 /*
253 * Use 1MB frames for vmemmap if available. We
254 * always use large frames even if they are only
255 * partially used. Otherwise we would have also
256 * page tables since vmemmap_populate gets
257 * called for each section separately.
258 */
259 new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE);
260 if (new_page) {
261 set_pmd(pmd, __pmd(__pa(new_page) | prot));
262 if (!IS_ALIGNED(addr, PMD_SIZE) ||
263 !IS_ALIGNED(next, PMD_SIZE)) {
264 vmemmap_use_new_sub_pmd(addr, next);
265 }
266 continue;
267 }
268 }
269 pte = vmem_pte_alloc();
270 if (!pte)
271 goto out;
272 pmd_populate(&init_mm, pmd, pte);
273 } else if (pmd_large(*pmd)) {
274 if (!direct)
275 vmemmap_use_sub_pmd(addr, next);
276 continue;
277 }
278 ret = modify_pte_table(pmd, addr, next, add, direct);
279 if (ret)
280 goto out;
281 if (!add)
282 try_free_pte_table(pmd, addr & PMD_MASK);
283 }
284 ret = 0;
285 out:
286 if (direct)
287 update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages);
288 return ret;
289 }
290
try_free_pmd_table(pud_t * pud,unsigned long start)291 static void try_free_pmd_table(pud_t *pud, unsigned long start)
292 {
293 const unsigned long end = start + PUD_SIZE;
294 pmd_t *pmd;
295 int i;
296
297 /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
298 if (end > VMALLOC_START)
299 return;
300
301 pmd = pmd_offset(pud, start);
302 for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
303 if (!pmd_none(*pmd))
304 return;
305 vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER);
306 pud_clear(pud);
307 }
308
modify_pud_table(p4d_t * p4d,unsigned long addr,unsigned long end,bool add,bool direct)309 static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
310 bool add, bool direct)
311 {
312 unsigned long next, prot, pages = 0;
313 int ret = -ENOMEM;
314 pud_t *pud;
315 pmd_t *pmd;
316
317 prot = pgprot_val(REGION3_KERNEL);
318 if (!MACHINE_HAS_NX)
319 prot &= ~_REGION_ENTRY_NOEXEC;
320 pud = pud_offset(p4d, addr);
321 for (; addr < end; addr = next, pud++) {
322 next = pud_addr_end(addr, end);
323 if (!add) {
324 if (pud_none(*pud))
325 continue;
326 if (pud_large(*pud)) {
327 if (IS_ALIGNED(addr, PUD_SIZE) &&
328 IS_ALIGNED(next, PUD_SIZE)) {
329 pud_clear(pud);
330 pages++;
331 }
332 continue;
333 }
334 } else if (pud_none(*pud)) {
335 if (IS_ALIGNED(addr, PUD_SIZE) &&
336 IS_ALIGNED(next, PUD_SIZE) &&
337 MACHINE_HAS_EDAT2 && direct &&
338 !debug_pagealloc_enabled()) {
339 set_pud(pud, __pud(__pa(addr) | prot));
340 pages++;
341 continue;
342 }
343 pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
344 if (!pmd)
345 goto out;
346 pud_populate(&init_mm, pud, pmd);
347 } else if (pud_large(*pud)) {
348 continue;
349 }
350 ret = modify_pmd_table(pud, addr, next, add, direct);
351 if (ret)
352 goto out;
353 if (!add)
354 try_free_pmd_table(pud, addr & PUD_MASK);
355 }
356 ret = 0;
357 out:
358 if (direct)
359 update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages);
360 return ret;
361 }
362
try_free_pud_table(p4d_t * p4d,unsigned long start)363 static void try_free_pud_table(p4d_t *p4d, unsigned long start)
364 {
365 const unsigned long end = start + P4D_SIZE;
366 pud_t *pud;
367 int i;
368
369 /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
370 if (end > VMALLOC_START)
371 return;
372
373 pud = pud_offset(p4d, start);
374 for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
375 if (!pud_none(*pud))
376 return;
377 }
378 vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER);
379 p4d_clear(p4d);
380 }
381
modify_p4d_table(pgd_t * pgd,unsigned long addr,unsigned long end,bool add,bool direct)382 static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
383 bool add, bool direct)
384 {
385 unsigned long next;
386 int ret = -ENOMEM;
387 p4d_t *p4d;
388 pud_t *pud;
389
390 p4d = p4d_offset(pgd, addr);
391 for (; addr < end; addr = next, p4d++) {
392 next = p4d_addr_end(addr, end);
393 if (!add) {
394 if (p4d_none(*p4d))
395 continue;
396 } else if (p4d_none(*p4d)) {
397 pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
398 if (!pud)
399 goto out;
400 p4d_populate(&init_mm, p4d, pud);
401 }
402 ret = modify_pud_table(p4d, addr, next, add, direct);
403 if (ret)
404 goto out;
405 if (!add)
406 try_free_pud_table(p4d, addr & P4D_MASK);
407 }
408 ret = 0;
409 out:
410 return ret;
411 }
412
try_free_p4d_table(pgd_t * pgd,unsigned long start)413 static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
414 {
415 const unsigned long end = start + PGDIR_SIZE;
416 p4d_t *p4d;
417 int i;
418
419 /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
420 if (end > VMALLOC_START)
421 return;
422
423 p4d = p4d_offset(pgd, start);
424 for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
425 if (!p4d_none(*p4d))
426 return;
427 }
428 vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER);
429 pgd_clear(pgd);
430 }
431
modify_pagetable(unsigned long start,unsigned long end,bool add,bool direct)432 static int modify_pagetable(unsigned long start, unsigned long end, bool add,
433 bool direct)
434 {
435 unsigned long addr, next;
436 int ret = -ENOMEM;
437 pgd_t *pgd;
438 p4d_t *p4d;
439
440 if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
441 return -EINVAL;
442 for (addr = start; addr < end; addr = next) {
443 next = pgd_addr_end(addr, end);
444 pgd = pgd_offset_k(addr);
445
446 if (!add) {
447 if (pgd_none(*pgd))
448 continue;
449 } else if (pgd_none(*pgd)) {
450 p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
451 if (!p4d)
452 goto out;
453 pgd_populate(&init_mm, pgd, p4d);
454 }
455 ret = modify_p4d_table(pgd, addr, next, add, direct);
456 if (ret)
457 goto out;
458 if (!add)
459 try_free_p4d_table(pgd, addr & PGDIR_MASK);
460 }
461 ret = 0;
462 out:
463 if (!add)
464 flush_tlb_kernel_range(start, end);
465 return ret;
466 }
467
add_pagetable(unsigned long start,unsigned long end,bool direct)468 static int add_pagetable(unsigned long start, unsigned long end, bool direct)
469 {
470 return modify_pagetable(start, end, true, direct);
471 }
472
remove_pagetable(unsigned long start,unsigned long end,bool direct)473 static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
474 {
475 return modify_pagetable(start, end, false, direct);
476 }
477
478 /*
479 * Add a physical memory range to the 1:1 mapping.
480 */
vmem_add_range(unsigned long start,unsigned long size)481 static int vmem_add_range(unsigned long start, unsigned long size)
482 {
483 return add_pagetable(start, start + size, true);
484 }
485
486 /*
487 * Remove a physical memory range from the 1:1 mapping.
488 */
vmem_remove_range(unsigned long start,unsigned long size)489 static void vmem_remove_range(unsigned long start, unsigned long size)
490 {
491 remove_pagetable(start, start + size, true);
492 }
493
494 /*
495 * Add a backed mem_map array to the virtual mem_map array.
496 */
vmemmap_populate(unsigned long start,unsigned long end,int node,struct vmem_altmap * altmap)497 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
498 struct vmem_altmap *altmap)
499 {
500 int ret;
501
502 mutex_lock(&vmem_mutex);
503 /* We don't care about the node, just use NUMA_NO_NODE on allocations */
504 ret = add_pagetable(start, end, false);
505 if (ret)
506 remove_pagetable(start, end, false);
507 mutex_unlock(&vmem_mutex);
508 return ret;
509 }
510
vmemmap_free(unsigned long start,unsigned long end,struct vmem_altmap * altmap)511 void vmemmap_free(unsigned long start, unsigned long end,
512 struct vmem_altmap *altmap)
513 {
514 mutex_lock(&vmem_mutex);
515 remove_pagetable(start, end, false);
516 mutex_unlock(&vmem_mutex);
517 }
518
vmem_remove_mapping(unsigned long start,unsigned long size)519 void vmem_remove_mapping(unsigned long start, unsigned long size)
520 {
521 mutex_lock(&vmem_mutex);
522 vmem_remove_range(start, size);
523 mutex_unlock(&vmem_mutex);
524 }
525
arch_get_mappable_range(void)526 struct range arch_get_mappable_range(void)
527 {
528 struct range mhp_range;
529
530 mhp_range.start = 0;
531 mhp_range.end = VMEM_MAX_PHYS - 1;
532 return mhp_range;
533 }
534
vmem_add_mapping(unsigned long start,unsigned long size)535 int vmem_add_mapping(unsigned long start, unsigned long size)
536 {
537 struct range range = arch_get_mappable_range();
538 int ret;
539
540 if (start < range.start ||
541 start + size > range.end + 1 ||
542 start + size < start)
543 return -ERANGE;
544
545 mutex_lock(&vmem_mutex);
546 ret = vmem_add_range(start, size);
547 if (ret)
548 vmem_remove_range(start, size);
549 mutex_unlock(&vmem_mutex);
550 return ret;
551 }
552
553 /*
554 * Allocate new or return existing page-table entry, but do not map it
555 * to any physical address. If missing, allocate segment- and region-
556 * table entries along. Meeting a large segment- or region-table entry
557 * while traversing is an error, since the function is expected to be
558 * called against virtual regions reserverd for 4KB mappings only.
559 */
vmem_get_alloc_pte(unsigned long addr,bool alloc)560 pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc)
561 {
562 pte_t *ptep = NULL;
563 pgd_t *pgd;
564 p4d_t *p4d;
565 pud_t *pud;
566 pmd_t *pmd;
567 pte_t *pte;
568
569 pgd = pgd_offset_k(addr);
570 if (pgd_none(*pgd)) {
571 if (!alloc)
572 goto out;
573 p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
574 if (!p4d)
575 goto out;
576 pgd_populate(&init_mm, pgd, p4d);
577 }
578 p4d = p4d_offset(pgd, addr);
579 if (p4d_none(*p4d)) {
580 if (!alloc)
581 goto out;
582 pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
583 if (!pud)
584 goto out;
585 p4d_populate(&init_mm, p4d, pud);
586 }
587 pud = pud_offset(p4d, addr);
588 if (pud_none(*pud)) {
589 if (!alloc)
590 goto out;
591 pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
592 if (!pmd)
593 goto out;
594 pud_populate(&init_mm, pud, pmd);
595 } else if (WARN_ON_ONCE(pud_large(*pud))) {
596 goto out;
597 }
598 pmd = pmd_offset(pud, addr);
599 if (pmd_none(*pmd)) {
600 if (!alloc)
601 goto out;
602 pte = vmem_pte_alloc();
603 if (!pte)
604 goto out;
605 pmd_populate(&init_mm, pmd, pte);
606 } else if (WARN_ON_ONCE(pmd_large(*pmd))) {
607 goto out;
608 }
609 ptep = pte_offset_kernel(pmd, addr);
610 out:
611 return ptep;
612 }
613
__vmem_map_4k_page(unsigned long addr,unsigned long phys,pgprot_t prot,bool alloc)614 int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc)
615 {
616 pte_t *ptep, pte;
617
618 if (!IS_ALIGNED(addr, PAGE_SIZE))
619 return -EINVAL;
620 ptep = vmem_get_alloc_pte(addr, alloc);
621 if (!ptep)
622 return -ENOMEM;
623 __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
624 pte = mk_pte_phys(phys, prot);
625 set_pte(ptep, pte);
626 return 0;
627 }
628
vmem_map_4k_page(unsigned long addr,unsigned long phys,pgprot_t prot)629 int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot)
630 {
631 int rc;
632
633 mutex_lock(&vmem_mutex);
634 rc = __vmem_map_4k_page(addr, phys, prot, true);
635 mutex_unlock(&vmem_mutex);
636 return rc;
637 }
638
vmem_unmap_4k_page(unsigned long addr)639 void vmem_unmap_4k_page(unsigned long addr)
640 {
641 pte_t *ptep;
642
643 mutex_lock(&vmem_mutex);
644 ptep = virt_to_kpte(addr);
645 __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
646 pte_clear(&init_mm, addr, ptep);
647 mutex_unlock(&vmem_mutex);
648 }
649
memblock_region_cmp(const void * a,const void * b)650 static int __init memblock_region_cmp(const void *a, const void *b)
651 {
652 const struct memblock_region *r1 = a;
653 const struct memblock_region *r2 = b;
654
655 if (r1->base < r2->base)
656 return -1;
657 if (r1->base > r2->base)
658 return 1;
659 return 0;
660 }
661
memblock_region_swap(void * a,void * b,int size)662 static void __init memblock_region_swap(void *a, void *b, int size)
663 {
664 swap(*(struct memblock_region *)a, *(struct memblock_region *)b);
665 }
666
667 /*
668 * map whole physical memory to virtual memory (identity mapping)
669 * we reserve enough space in the vmalloc area for vmemmap to hotplug
670 * additional memory segments.
671 */
vmem_map_init(void)672 void __init vmem_map_init(void)
673 {
674 struct memblock_region memory_rwx_regions[] = {
675 {
676 .base = 0,
677 .size = sizeof(struct lowcore),
678 .flags = MEMBLOCK_NONE,
679 #ifdef CONFIG_NUMA
680 .nid = NUMA_NO_NODE,
681 #endif
682 },
683 {
684 .base = __pa(_stext),
685 .size = _etext - _stext,
686 .flags = MEMBLOCK_NONE,
687 #ifdef CONFIG_NUMA
688 .nid = NUMA_NO_NODE,
689 #endif
690 },
691 {
692 .base = __pa(_sinittext),
693 .size = _einittext - _sinittext,
694 .flags = MEMBLOCK_NONE,
695 #ifdef CONFIG_NUMA
696 .nid = NUMA_NO_NODE,
697 #endif
698 },
699 {
700 .base = __stext_amode31,
701 .size = __etext_amode31 - __stext_amode31,
702 .flags = MEMBLOCK_NONE,
703 #ifdef CONFIG_NUMA
704 .nid = NUMA_NO_NODE,
705 #endif
706 },
707 };
708 struct memblock_type memory_rwx = {
709 .regions = memory_rwx_regions,
710 .cnt = ARRAY_SIZE(memory_rwx_regions),
711 .max = ARRAY_SIZE(memory_rwx_regions),
712 };
713 phys_addr_t base, end;
714 u64 i;
715
716 /*
717 * Set RW+NX attribute on all memory, except regions enumerated with
718 * memory_rwx exclude type. These regions need different attributes,
719 * which are enforced afterwards.
720 *
721 * __for_each_mem_range() iterate and exclude types should be sorted.
722 * The relative location of _stext and _sinittext is hardcoded in the
723 * linker script. However a location of __stext_amode31 and the kernel
724 * image itself are chosen dynamically. Thus, sort the exclude type.
725 */
726 sort(&memory_rwx_regions,
727 ARRAY_SIZE(memory_rwx_regions), sizeof(memory_rwx_regions[0]),
728 memblock_region_cmp, memblock_region_swap);
729 __for_each_mem_range(i, &memblock.memory, &memory_rwx,
730 NUMA_NO_NODE, MEMBLOCK_NONE, &base, &end, NULL) {
731 __set_memory((unsigned long)__va(base),
732 (end - base) >> PAGE_SHIFT,
733 SET_MEMORY_RW | SET_MEMORY_NX);
734 }
735
736 __set_memory((unsigned long)_stext,
737 (unsigned long)(_etext - _stext) >> PAGE_SHIFT,
738 SET_MEMORY_RO | SET_MEMORY_X);
739 __set_memory((unsigned long)_etext,
740 (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT,
741 SET_MEMORY_RO);
742 __set_memory((unsigned long)_sinittext,
743 (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
744 SET_MEMORY_RO | SET_MEMORY_X);
745 __set_memory(__stext_amode31,
746 (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
747 SET_MEMORY_RO | SET_MEMORY_X);
748
749 /* lowcore must be executable for LPSWE */
750 if (static_key_enabled(&cpu_has_bear))
751 set_memory_nx(0, 1);
752 set_memory_nx(PAGE_SIZE, 1);
753
754 pr_info("Write protected kernel read-only data: %luk\n",
755 (unsigned long)(__end_rodata - _stext) >> 10);
756 }
757