1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2020 Google LLC
4 * Author: Quentin Perret <qperret@google.com>
5 */
6
7 #include <linux/kvm_host.h>
8 #include <asm/kvm_emulate.h>
9 #include <asm/kvm_hyp.h>
10 #include <asm/kvm_mmu.h>
11 #include <asm/kvm_pgtable.h>
12 #include <asm/kvm_pkvm.h>
13 #include <asm/stage2_pgtable.h>
14
15 #include <hyp/fault.h>
16
17 #include <nvhe/gfp.h>
18 #include <nvhe/memory.h>
19 #include <nvhe/mem_protect.h>
20 #include <nvhe/mm.h>
21
22 #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP)
23
24 struct host_mmu host_mmu;
25
26 static struct hyp_pool host_s2_pool;
27
28 static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm);
29 #define current_vm (*this_cpu_ptr(&__current_vm))
30
guest_lock_component(struct pkvm_hyp_vm * vm)31 static void guest_lock_component(struct pkvm_hyp_vm *vm)
32 {
33 hyp_spin_lock(&vm->lock);
34 current_vm = vm;
35 }
36
guest_unlock_component(struct pkvm_hyp_vm * vm)37 static void guest_unlock_component(struct pkvm_hyp_vm *vm)
38 {
39 current_vm = NULL;
40 hyp_spin_unlock(&vm->lock);
41 }
42
host_lock_component(void)43 static void host_lock_component(void)
44 {
45 hyp_spin_lock(&host_mmu.lock);
46 }
47
host_unlock_component(void)48 static void host_unlock_component(void)
49 {
50 hyp_spin_unlock(&host_mmu.lock);
51 }
52
hyp_lock_component(void)53 static void hyp_lock_component(void)
54 {
55 hyp_spin_lock(&pkvm_pgd_lock);
56 }
57
hyp_unlock_component(void)58 static void hyp_unlock_component(void)
59 {
60 hyp_spin_unlock(&pkvm_pgd_lock);
61 }
62
host_s2_zalloc_pages_exact(size_t size)63 static void *host_s2_zalloc_pages_exact(size_t size)
64 {
65 void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
66
67 hyp_split_page(hyp_virt_to_page(addr));
68
69 /*
70 * The size of concatenated PGDs is always a power of two of PAGE_SIZE,
71 * so there should be no need to free any of the tail pages to make the
72 * allocation exact.
73 */
74 WARN_ON(size != (PAGE_SIZE << get_order(size)));
75
76 return addr;
77 }
78
host_s2_zalloc_page(void * pool)79 static void *host_s2_zalloc_page(void *pool)
80 {
81 return hyp_alloc_pages(pool, 0);
82 }
83
host_s2_get_page(void * addr)84 static void host_s2_get_page(void *addr)
85 {
86 hyp_get_page(&host_s2_pool, addr);
87 }
88
host_s2_put_page(void * addr)89 static void host_s2_put_page(void *addr)
90 {
91 hyp_put_page(&host_s2_pool, addr);
92 }
93
host_s2_free_removed_table(void * addr,u32 level)94 static void host_s2_free_removed_table(void *addr, u32 level)
95 {
96 kvm_pgtable_stage2_free_removed(&host_mmu.mm_ops, addr, level);
97 }
98
prepare_s2_pool(void * pgt_pool_base)99 static int prepare_s2_pool(void *pgt_pool_base)
100 {
101 unsigned long nr_pages, pfn;
102 int ret;
103
104 pfn = hyp_virt_to_pfn(pgt_pool_base);
105 nr_pages = host_s2_pgtable_pages();
106 ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0);
107 if (ret)
108 return ret;
109
110 host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) {
111 .zalloc_pages_exact = host_s2_zalloc_pages_exact,
112 .zalloc_page = host_s2_zalloc_page,
113 .free_removed_table = host_s2_free_removed_table,
114 .phys_to_virt = hyp_phys_to_virt,
115 .virt_to_phys = hyp_virt_to_phys,
116 .page_count = hyp_page_count,
117 .get_page = host_s2_get_page,
118 .put_page = host_s2_put_page,
119 };
120
121 return 0;
122 }
123
prepare_host_vtcr(void)124 static void prepare_host_vtcr(void)
125 {
126 u32 parange, phys_shift;
127
128 /* The host stage 2 is id-mapped, so use parange for T0SZ */
129 parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val);
130 phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);
131
132 host_mmu.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
133 id_aa64mmfr1_el1_sys_val, phys_shift);
134 }
135
136 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot);
137
kvm_host_prepare_stage2(void * pgt_pool_base)138 int kvm_host_prepare_stage2(void *pgt_pool_base)
139 {
140 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
141 int ret;
142
143 prepare_host_vtcr();
144 hyp_spin_lock_init(&host_mmu.lock);
145 mmu->arch = &host_mmu.arch;
146
147 ret = prepare_s2_pool(pgt_pool_base);
148 if (ret)
149 return ret;
150
151 ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu,
152 &host_mmu.mm_ops, KVM_HOST_S2_FLAGS,
153 host_stage2_force_pte_cb);
154 if (ret)
155 return ret;
156
157 mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd);
158 mmu->pgt = &host_mmu.pgt;
159 atomic64_set(&mmu->vmid.id, 0);
160
161 return 0;
162 }
163
guest_stage2_force_pte_cb(u64 addr,u64 end,enum kvm_pgtable_prot prot)164 static bool guest_stage2_force_pte_cb(u64 addr, u64 end,
165 enum kvm_pgtable_prot prot)
166 {
167 return true;
168 }
169
guest_s2_zalloc_pages_exact(size_t size)170 static void *guest_s2_zalloc_pages_exact(size_t size)
171 {
172 void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size));
173
174 WARN_ON(size != (PAGE_SIZE << get_order(size)));
175 hyp_split_page(hyp_virt_to_page(addr));
176
177 return addr;
178 }
179
guest_s2_free_pages_exact(void * addr,unsigned long size)180 static void guest_s2_free_pages_exact(void *addr, unsigned long size)
181 {
182 u8 order = get_order(size);
183 unsigned int i;
184
185 for (i = 0; i < (1 << order); i++)
186 hyp_put_page(¤t_vm->pool, addr + (i * PAGE_SIZE));
187 }
188
guest_s2_zalloc_page(void * mc)189 static void *guest_s2_zalloc_page(void *mc)
190 {
191 struct hyp_page *p;
192 void *addr;
193
194 addr = hyp_alloc_pages(¤t_vm->pool, 0);
195 if (addr)
196 return addr;
197
198 addr = pop_hyp_memcache(mc, hyp_phys_to_virt);
199 if (!addr)
200 return addr;
201
202 memset(addr, 0, PAGE_SIZE);
203 p = hyp_virt_to_page(addr);
204 memset(p, 0, sizeof(*p));
205 p->refcount = 1;
206
207 return addr;
208 }
209
guest_s2_get_page(void * addr)210 static void guest_s2_get_page(void *addr)
211 {
212 hyp_get_page(¤t_vm->pool, addr);
213 }
214
guest_s2_put_page(void * addr)215 static void guest_s2_put_page(void *addr)
216 {
217 hyp_put_page(¤t_vm->pool, addr);
218 }
219
clean_dcache_guest_page(void * va,size_t size)220 static void clean_dcache_guest_page(void *va, size_t size)
221 {
222 __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size);
223 hyp_fixmap_unmap();
224 }
225
invalidate_icache_guest_page(void * va,size_t size)226 static void invalidate_icache_guest_page(void *va, size_t size)
227 {
228 __invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size);
229 hyp_fixmap_unmap();
230 }
231
kvm_guest_prepare_stage2(struct pkvm_hyp_vm * vm,void * pgd)232 int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
233 {
234 struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu;
235 unsigned long nr_pages;
236 int ret;
237
238 nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT;
239 ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0);
240 if (ret)
241 return ret;
242
243 hyp_spin_lock_init(&vm->lock);
244 vm->mm_ops = (struct kvm_pgtable_mm_ops) {
245 .zalloc_pages_exact = guest_s2_zalloc_pages_exact,
246 .free_pages_exact = guest_s2_free_pages_exact,
247 .zalloc_page = guest_s2_zalloc_page,
248 .phys_to_virt = hyp_phys_to_virt,
249 .virt_to_phys = hyp_virt_to_phys,
250 .page_count = hyp_page_count,
251 .get_page = guest_s2_get_page,
252 .put_page = guest_s2_put_page,
253 .dcache_clean_inval_poc = clean_dcache_guest_page,
254 .icache_inval_pou = invalidate_icache_guest_page,
255 };
256
257 guest_lock_component(vm);
258 ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0,
259 guest_stage2_force_pte_cb);
260 guest_unlock_component(vm);
261 if (ret)
262 return ret;
263
264 vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd);
265
266 return 0;
267 }
268
reclaim_guest_pages(struct pkvm_hyp_vm * vm,struct kvm_hyp_memcache * mc)269 void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
270 {
271 void *addr;
272
273 /* Dump all pgtable pages in the hyp_pool */
274 guest_lock_component(vm);
275 kvm_pgtable_stage2_destroy(&vm->pgt);
276 vm->kvm.arch.mmu.pgd_phys = 0ULL;
277 guest_unlock_component(vm);
278
279 /* Drain the hyp_pool into the memcache */
280 addr = hyp_alloc_pages(&vm->pool, 0);
281 while (addr) {
282 memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page));
283 push_hyp_memcache(mc, addr, hyp_virt_to_phys);
284 WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
285 addr = hyp_alloc_pages(&vm->pool, 0);
286 }
287 }
288
__pkvm_prot_finalize(void)289 int __pkvm_prot_finalize(void)
290 {
291 struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
292 struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
293
294 if (params->hcr_el2 & HCR_VM)
295 return -EPERM;
296
297 params->vttbr = kvm_get_vttbr(mmu);
298 params->vtcr = host_mmu.arch.vtcr;
299 params->hcr_el2 |= HCR_VM;
300 kvm_flush_dcache_to_poc(params, sizeof(*params));
301
302 write_sysreg(params->hcr_el2, hcr_el2);
303 __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);
304
305 /*
306 * Make sure to have an ISB before the TLB maintenance below but only
307 * when __load_stage2() doesn't include one already.
308 */
309 asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
310
311 /* Invalidate stale HCR bits that may be cached in TLBs */
312 __tlbi(vmalls12e1);
313 dsb(nsh);
314 isb();
315
316 return 0;
317 }
318
host_stage2_unmap_dev_all(void)319 static int host_stage2_unmap_dev_all(void)
320 {
321 struct kvm_pgtable *pgt = &host_mmu.pgt;
322 struct memblock_region *reg;
323 u64 addr = 0;
324 int i, ret;
325
326 /* Unmap all non-memory regions to recycle the pages */
327 for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) {
328 reg = &hyp_memory[i];
329 ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr);
330 if (ret)
331 return ret;
332 }
333 return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr);
334 }
335
336 struct kvm_mem_range {
337 u64 start;
338 u64 end;
339 };
340
find_mem_range(phys_addr_t addr,struct kvm_mem_range * range)341 static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
342 {
343 int cur, left = 0, right = hyp_memblock_nr;
344 struct memblock_region *reg;
345 phys_addr_t end;
346
347 range->start = 0;
348 range->end = ULONG_MAX;
349
350 /* The list of memblock regions is sorted, binary search it */
351 while (left < right) {
352 cur = (left + right) >> 1;
353 reg = &hyp_memory[cur];
354 end = reg->base + reg->size;
355 if (addr < reg->base) {
356 right = cur;
357 range->end = reg->base;
358 } else if (addr >= end) {
359 left = cur + 1;
360 range->start = end;
361 } else {
362 range->start = reg->base;
363 range->end = end;
364 return reg;
365 }
366 }
367
368 return NULL;
369 }
370
addr_is_memory(phys_addr_t phys)371 bool addr_is_memory(phys_addr_t phys)
372 {
373 struct kvm_mem_range range;
374
375 return !!find_mem_range(phys, &range);
376 }
377
addr_is_allowed_memory(phys_addr_t phys)378 static bool addr_is_allowed_memory(phys_addr_t phys)
379 {
380 struct memblock_region *reg;
381 struct kvm_mem_range range;
382
383 reg = find_mem_range(phys, &range);
384
385 return reg && !(reg->flags & MEMBLOCK_NOMAP);
386 }
387
is_in_mem_range(u64 addr,struct kvm_mem_range * range)388 static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
389 {
390 return range->start <= addr && addr < range->end;
391 }
392
range_is_memory(u64 start,u64 end)393 static bool range_is_memory(u64 start, u64 end)
394 {
395 struct kvm_mem_range r;
396
397 if (!find_mem_range(start, &r))
398 return false;
399
400 return is_in_mem_range(end - 1, &r);
401 }
402
__host_stage2_idmap(u64 start,u64 end,enum kvm_pgtable_prot prot)403 static inline int __host_stage2_idmap(u64 start, u64 end,
404 enum kvm_pgtable_prot prot)
405 {
406 return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start,
407 prot, &host_s2_pool, 0);
408 }
409
410 /*
411 * The pool has been provided with enough pages to cover all of memory with
412 * page granularity, but it is difficult to know how much of the MMIO range
413 * we will need to cover upfront, so we may need to 'recycle' the pages if we
414 * run out.
415 */
416 #define host_stage2_try(fn, ...) \
417 ({ \
418 int __ret; \
419 hyp_assert_lock_held(&host_mmu.lock); \
420 __ret = fn(__VA_ARGS__); \
421 if (__ret == -ENOMEM) { \
422 __ret = host_stage2_unmap_dev_all(); \
423 if (!__ret) \
424 __ret = fn(__VA_ARGS__); \
425 } \
426 __ret; \
427 })
428
range_included(struct kvm_mem_range * child,struct kvm_mem_range * parent)429 static inline bool range_included(struct kvm_mem_range *child,
430 struct kvm_mem_range *parent)
431 {
432 return parent->start <= child->start && child->end <= parent->end;
433 }
434
host_stage2_adjust_range(u64 addr,struct kvm_mem_range * range)435 static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
436 {
437 struct kvm_mem_range cur;
438 kvm_pte_t pte;
439 u32 level;
440 int ret;
441
442 hyp_assert_lock_held(&host_mmu.lock);
443 ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level);
444 if (ret)
445 return ret;
446
447 if (kvm_pte_valid(pte))
448 return -EAGAIN;
449
450 if (pte)
451 return -EPERM;
452
453 do {
454 u64 granule = kvm_granule_size(level);
455 cur.start = ALIGN_DOWN(addr, granule);
456 cur.end = cur.start + granule;
457 level++;
458 } while ((level < KVM_PGTABLE_MAX_LEVELS) &&
459 !(kvm_level_supports_block_mapping(level) &&
460 range_included(&cur, range)));
461
462 *range = cur;
463
464 return 0;
465 }
466
host_stage2_idmap_locked(phys_addr_t addr,u64 size,enum kvm_pgtable_prot prot)467 int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
468 enum kvm_pgtable_prot prot)
469 {
470 return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
471 }
472
host_stage2_set_owner_locked(phys_addr_t addr,u64 size,u8 owner_id)473 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
474 {
475 return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
476 addr, size, &host_s2_pool, owner_id);
477 }
478
host_stage2_force_pte_cb(u64 addr,u64 end,enum kvm_pgtable_prot prot)479 static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
480 {
481 /*
482 * Block mappings must be used with care in the host stage-2 as a
483 * kvm_pgtable_stage2_map() operation targeting a page in the range of
484 * an existing block will delete the block under the assumption that
485 * mappings in the rest of the block range can always be rebuilt lazily.
486 * That assumption is correct for the host stage-2 with RWX mappings
487 * targeting memory or RW mappings targeting MMIO ranges (see
488 * host_stage2_idmap() below which implements some of the host memory
489 * abort logic). However, this is not safe for any other mappings where
490 * the host stage-2 page-table is in fact the only place where this
491 * state is stored. In all those cases, it is safer to use page-level
492 * mappings, hence avoiding to lose the state because of side-effects in
493 * kvm_pgtable_stage2_map().
494 */
495 if (range_is_memory(addr, end))
496 return prot != PKVM_HOST_MEM_PROT;
497 else
498 return prot != PKVM_HOST_MMIO_PROT;
499 }
500
host_stage2_idmap(u64 addr)501 static int host_stage2_idmap(u64 addr)
502 {
503 struct kvm_mem_range range;
504 bool is_memory = !!find_mem_range(addr, &range);
505 enum kvm_pgtable_prot prot;
506 int ret;
507
508 prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT;
509
510 host_lock_component();
511 ret = host_stage2_adjust_range(addr, &range);
512 if (ret)
513 goto unlock;
514
515 ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot);
516 unlock:
517 host_unlock_component();
518
519 return ret;
520 }
521
handle_host_mem_abort(struct kvm_cpu_context * host_ctxt)522 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
523 {
524 struct kvm_vcpu_fault_info fault;
525 u64 esr, addr;
526 int ret = 0;
527
528 esr = read_sysreg_el2(SYS_ESR);
529 BUG_ON(!__get_fault_info(esr, &fault));
530
531 addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
532 ret = host_stage2_idmap(addr);
533 BUG_ON(ret && ret != -EAGAIN);
534 }
535
536 struct pkvm_mem_transition {
537 u64 nr_pages;
538
539 struct {
540 enum pkvm_component_id id;
541 /* Address in the initiator's address space */
542 u64 addr;
543
544 union {
545 struct {
546 /* Address in the completer's address space */
547 u64 completer_addr;
548 } host;
549 struct {
550 u64 completer_addr;
551 } hyp;
552 };
553 } initiator;
554
555 struct {
556 enum pkvm_component_id id;
557 } completer;
558 };
559
560 struct pkvm_mem_share {
561 const struct pkvm_mem_transition tx;
562 const enum kvm_pgtable_prot completer_prot;
563 };
564
565 struct pkvm_mem_donation {
566 const struct pkvm_mem_transition tx;
567 };
568
569 struct check_walk_data {
570 enum pkvm_page_state desired;
571 enum pkvm_page_state (*get_page_state)(kvm_pte_t pte);
572 };
573
__check_page_state_visitor(const struct kvm_pgtable_visit_ctx * ctx,enum kvm_pgtable_walk_flags visit)574 static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
575 enum kvm_pgtable_walk_flags visit)
576 {
577 struct check_walk_data *d = ctx->arg;
578
579 if (kvm_pte_valid(ctx->old) && !addr_is_allowed_memory(kvm_pte_to_phys(ctx->old)))
580 return -EINVAL;
581
582 return d->get_page_state(ctx->old) == d->desired ? 0 : -EPERM;
583 }
584
check_page_state_range(struct kvm_pgtable * pgt,u64 addr,u64 size,struct check_walk_data * data)585 static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
586 struct check_walk_data *data)
587 {
588 struct kvm_pgtable_walker walker = {
589 .cb = __check_page_state_visitor,
590 .arg = data,
591 .flags = KVM_PGTABLE_WALK_LEAF,
592 };
593
594 return kvm_pgtable_walk(pgt, addr, size, &walker);
595 }
596
host_get_page_state(kvm_pte_t pte)597 static enum pkvm_page_state host_get_page_state(kvm_pte_t pte)
598 {
599 if (!kvm_pte_valid(pte) && pte)
600 return PKVM_NOPAGE;
601
602 return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
603 }
604
__host_check_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)605 static int __host_check_page_state_range(u64 addr, u64 size,
606 enum pkvm_page_state state)
607 {
608 struct check_walk_data d = {
609 .desired = state,
610 .get_page_state = host_get_page_state,
611 };
612
613 hyp_assert_lock_held(&host_mmu.lock);
614 return check_page_state_range(&host_mmu.pgt, addr, size, &d);
615 }
616
__host_set_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)617 static int __host_set_page_state_range(u64 addr, u64 size,
618 enum pkvm_page_state state)
619 {
620 enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state);
621
622 return host_stage2_idmap_locked(addr, size, prot);
623 }
624
host_request_owned_transition(u64 * completer_addr,const struct pkvm_mem_transition * tx)625 static int host_request_owned_transition(u64 *completer_addr,
626 const struct pkvm_mem_transition *tx)
627 {
628 u64 size = tx->nr_pages * PAGE_SIZE;
629 u64 addr = tx->initiator.addr;
630
631 *completer_addr = tx->initiator.host.completer_addr;
632 return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
633 }
634
host_request_unshare(u64 * completer_addr,const struct pkvm_mem_transition * tx)635 static int host_request_unshare(u64 *completer_addr,
636 const struct pkvm_mem_transition *tx)
637 {
638 u64 size = tx->nr_pages * PAGE_SIZE;
639 u64 addr = tx->initiator.addr;
640
641 *completer_addr = tx->initiator.host.completer_addr;
642 return __host_check_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
643 }
644
host_initiate_share(u64 * completer_addr,const struct pkvm_mem_transition * tx)645 static int host_initiate_share(u64 *completer_addr,
646 const struct pkvm_mem_transition *tx)
647 {
648 u64 size = tx->nr_pages * PAGE_SIZE;
649 u64 addr = tx->initiator.addr;
650
651 *completer_addr = tx->initiator.host.completer_addr;
652 return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED);
653 }
654
host_initiate_unshare(u64 * completer_addr,const struct pkvm_mem_transition * tx)655 static int host_initiate_unshare(u64 *completer_addr,
656 const struct pkvm_mem_transition *tx)
657 {
658 u64 size = tx->nr_pages * PAGE_SIZE;
659 u64 addr = tx->initiator.addr;
660
661 *completer_addr = tx->initiator.host.completer_addr;
662 return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED);
663 }
664
host_initiate_donation(u64 * completer_addr,const struct pkvm_mem_transition * tx)665 static int host_initiate_donation(u64 *completer_addr,
666 const struct pkvm_mem_transition *tx)
667 {
668 u8 owner_id = tx->completer.id;
669 u64 size = tx->nr_pages * PAGE_SIZE;
670
671 *completer_addr = tx->initiator.host.completer_addr;
672 return host_stage2_set_owner_locked(tx->initiator.addr, size, owner_id);
673 }
674
__host_ack_skip_pgtable_check(const struct pkvm_mem_transition * tx)675 static bool __host_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
676 {
677 return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
678 tx->initiator.id != PKVM_ID_HYP);
679 }
680
__host_ack_transition(u64 addr,const struct pkvm_mem_transition * tx,enum pkvm_page_state state)681 static int __host_ack_transition(u64 addr, const struct pkvm_mem_transition *tx,
682 enum pkvm_page_state state)
683 {
684 u64 size = tx->nr_pages * PAGE_SIZE;
685
686 if (__host_ack_skip_pgtable_check(tx))
687 return 0;
688
689 return __host_check_page_state_range(addr, size, state);
690 }
691
host_ack_donation(u64 addr,const struct pkvm_mem_transition * tx)692 static int host_ack_donation(u64 addr, const struct pkvm_mem_transition *tx)
693 {
694 return __host_ack_transition(addr, tx, PKVM_NOPAGE);
695 }
696
host_complete_donation(u64 addr,const struct pkvm_mem_transition * tx)697 static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx)
698 {
699 u64 size = tx->nr_pages * PAGE_SIZE;
700 u8 host_id = tx->completer.id;
701
702 return host_stage2_set_owner_locked(addr, size, host_id);
703 }
704
hyp_get_page_state(kvm_pte_t pte)705 static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
706 {
707 if (!kvm_pte_valid(pte))
708 return PKVM_NOPAGE;
709
710 return pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
711 }
712
__hyp_check_page_state_range(u64 addr,u64 size,enum pkvm_page_state state)713 static int __hyp_check_page_state_range(u64 addr, u64 size,
714 enum pkvm_page_state state)
715 {
716 struct check_walk_data d = {
717 .desired = state,
718 .get_page_state = hyp_get_page_state,
719 };
720
721 hyp_assert_lock_held(&pkvm_pgd_lock);
722 return check_page_state_range(&pkvm_pgtable, addr, size, &d);
723 }
724
hyp_request_donation(u64 * completer_addr,const struct pkvm_mem_transition * tx)725 static int hyp_request_donation(u64 *completer_addr,
726 const struct pkvm_mem_transition *tx)
727 {
728 u64 size = tx->nr_pages * PAGE_SIZE;
729 u64 addr = tx->initiator.addr;
730
731 *completer_addr = tx->initiator.hyp.completer_addr;
732 return __hyp_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
733 }
734
hyp_initiate_donation(u64 * completer_addr,const struct pkvm_mem_transition * tx)735 static int hyp_initiate_donation(u64 *completer_addr,
736 const struct pkvm_mem_transition *tx)
737 {
738 u64 size = tx->nr_pages * PAGE_SIZE;
739 int ret;
740
741 *completer_addr = tx->initiator.hyp.completer_addr;
742 ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, tx->initiator.addr, size);
743 return (ret != size) ? -EFAULT : 0;
744 }
745
__hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition * tx)746 static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
747 {
748 return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
749 tx->initiator.id != PKVM_ID_HOST);
750 }
751
hyp_ack_share(u64 addr,const struct pkvm_mem_transition * tx,enum kvm_pgtable_prot perms)752 static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx,
753 enum kvm_pgtable_prot perms)
754 {
755 u64 size = tx->nr_pages * PAGE_SIZE;
756
757 if (perms != PAGE_HYP)
758 return -EPERM;
759
760 if (__hyp_ack_skip_pgtable_check(tx))
761 return 0;
762
763 return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
764 }
765
hyp_ack_unshare(u64 addr,const struct pkvm_mem_transition * tx)766 static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx)
767 {
768 u64 size = tx->nr_pages * PAGE_SIZE;
769
770 if (tx->initiator.id == PKVM_ID_HOST && hyp_page_count((void *)addr))
771 return -EBUSY;
772
773 if (__hyp_ack_skip_pgtable_check(tx))
774 return 0;
775
776 return __hyp_check_page_state_range(addr, size,
777 PKVM_PAGE_SHARED_BORROWED);
778 }
779
hyp_ack_donation(u64 addr,const struct pkvm_mem_transition * tx)780 static int hyp_ack_donation(u64 addr, const struct pkvm_mem_transition *tx)
781 {
782 u64 size = tx->nr_pages * PAGE_SIZE;
783
784 if (__hyp_ack_skip_pgtable_check(tx))
785 return 0;
786
787 return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
788 }
789
hyp_complete_share(u64 addr,const struct pkvm_mem_transition * tx,enum kvm_pgtable_prot perms)790 static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx,
791 enum kvm_pgtable_prot perms)
792 {
793 void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
794 enum kvm_pgtable_prot prot;
795
796 prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED);
797 return pkvm_create_mappings_locked(start, end, prot);
798 }
799
hyp_complete_unshare(u64 addr,const struct pkvm_mem_transition * tx)800 static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx)
801 {
802 u64 size = tx->nr_pages * PAGE_SIZE;
803 int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, size);
804
805 return (ret != size) ? -EFAULT : 0;
806 }
807
hyp_complete_donation(u64 addr,const struct pkvm_mem_transition * tx)808 static int hyp_complete_donation(u64 addr,
809 const struct pkvm_mem_transition *tx)
810 {
811 void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
812 enum kvm_pgtable_prot prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED);
813
814 return pkvm_create_mappings_locked(start, end, prot);
815 }
816
check_share(struct pkvm_mem_share * share)817 static int check_share(struct pkvm_mem_share *share)
818 {
819 const struct pkvm_mem_transition *tx = &share->tx;
820 u64 completer_addr;
821 int ret;
822
823 switch (tx->initiator.id) {
824 case PKVM_ID_HOST:
825 ret = host_request_owned_transition(&completer_addr, tx);
826 break;
827 default:
828 ret = -EINVAL;
829 }
830
831 if (ret)
832 return ret;
833
834 switch (tx->completer.id) {
835 case PKVM_ID_HYP:
836 ret = hyp_ack_share(completer_addr, tx, share->completer_prot);
837 break;
838 default:
839 ret = -EINVAL;
840 }
841
842 return ret;
843 }
844
__do_share(struct pkvm_mem_share * share)845 static int __do_share(struct pkvm_mem_share *share)
846 {
847 const struct pkvm_mem_transition *tx = &share->tx;
848 u64 completer_addr;
849 int ret;
850
851 switch (tx->initiator.id) {
852 case PKVM_ID_HOST:
853 ret = host_initiate_share(&completer_addr, tx);
854 break;
855 default:
856 ret = -EINVAL;
857 }
858
859 if (ret)
860 return ret;
861
862 switch (tx->completer.id) {
863 case PKVM_ID_HYP:
864 ret = hyp_complete_share(completer_addr, tx, share->completer_prot);
865 break;
866 default:
867 ret = -EINVAL;
868 }
869
870 return ret;
871 }
872
873 /*
874 * do_share():
875 *
876 * The page owner grants access to another component with a given set
877 * of permissions.
878 *
879 * Initiator: OWNED => SHARED_OWNED
880 * Completer: NOPAGE => SHARED_BORROWED
881 */
do_share(struct pkvm_mem_share * share)882 static int do_share(struct pkvm_mem_share *share)
883 {
884 int ret;
885
886 ret = check_share(share);
887 if (ret)
888 return ret;
889
890 return WARN_ON(__do_share(share));
891 }
892
check_unshare(struct pkvm_mem_share * share)893 static int check_unshare(struct pkvm_mem_share *share)
894 {
895 const struct pkvm_mem_transition *tx = &share->tx;
896 u64 completer_addr;
897 int ret;
898
899 switch (tx->initiator.id) {
900 case PKVM_ID_HOST:
901 ret = host_request_unshare(&completer_addr, tx);
902 break;
903 default:
904 ret = -EINVAL;
905 }
906
907 if (ret)
908 return ret;
909
910 switch (tx->completer.id) {
911 case PKVM_ID_HYP:
912 ret = hyp_ack_unshare(completer_addr, tx);
913 break;
914 default:
915 ret = -EINVAL;
916 }
917
918 return ret;
919 }
920
__do_unshare(struct pkvm_mem_share * share)921 static int __do_unshare(struct pkvm_mem_share *share)
922 {
923 const struct pkvm_mem_transition *tx = &share->tx;
924 u64 completer_addr;
925 int ret;
926
927 switch (tx->initiator.id) {
928 case PKVM_ID_HOST:
929 ret = host_initiate_unshare(&completer_addr, tx);
930 break;
931 default:
932 ret = -EINVAL;
933 }
934
935 if (ret)
936 return ret;
937
938 switch (tx->completer.id) {
939 case PKVM_ID_HYP:
940 ret = hyp_complete_unshare(completer_addr, tx);
941 break;
942 default:
943 ret = -EINVAL;
944 }
945
946 return ret;
947 }
948
949 /*
950 * do_unshare():
951 *
952 * The page owner revokes access from another component for a range of
953 * pages which were previously shared using do_share().
954 *
955 * Initiator: SHARED_OWNED => OWNED
956 * Completer: SHARED_BORROWED => NOPAGE
957 */
do_unshare(struct pkvm_mem_share * share)958 static int do_unshare(struct pkvm_mem_share *share)
959 {
960 int ret;
961
962 ret = check_unshare(share);
963 if (ret)
964 return ret;
965
966 return WARN_ON(__do_unshare(share));
967 }
968
check_donation(struct pkvm_mem_donation * donation)969 static int check_donation(struct pkvm_mem_donation *donation)
970 {
971 const struct pkvm_mem_transition *tx = &donation->tx;
972 u64 completer_addr;
973 int ret;
974
975 switch (tx->initiator.id) {
976 case PKVM_ID_HOST:
977 ret = host_request_owned_transition(&completer_addr, tx);
978 break;
979 case PKVM_ID_HYP:
980 ret = hyp_request_donation(&completer_addr, tx);
981 break;
982 default:
983 ret = -EINVAL;
984 }
985
986 if (ret)
987 return ret;
988
989 switch (tx->completer.id) {
990 case PKVM_ID_HOST:
991 ret = host_ack_donation(completer_addr, tx);
992 break;
993 case PKVM_ID_HYP:
994 ret = hyp_ack_donation(completer_addr, tx);
995 break;
996 default:
997 ret = -EINVAL;
998 }
999
1000 return ret;
1001 }
1002
__do_donate(struct pkvm_mem_donation * donation)1003 static int __do_donate(struct pkvm_mem_donation *donation)
1004 {
1005 const struct pkvm_mem_transition *tx = &donation->tx;
1006 u64 completer_addr;
1007 int ret;
1008
1009 switch (tx->initiator.id) {
1010 case PKVM_ID_HOST:
1011 ret = host_initiate_donation(&completer_addr, tx);
1012 break;
1013 case PKVM_ID_HYP:
1014 ret = hyp_initiate_donation(&completer_addr, tx);
1015 break;
1016 default:
1017 ret = -EINVAL;
1018 }
1019
1020 if (ret)
1021 return ret;
1022
1023 switch (tx->completer.id) {
1024 case PKVM_ID_HOST:
1025 ret = host_complete_donation(completer_addr, tx);
1026 break;
1027 case PKVM_ID_HYP:
1028 ret = hyp_complete_donation(completer_addr, tx);
1029 break;
1030 default:
1031 ret = -EINVAL;
1032 }
1033
1034 return ret;
1035 }
1036
1037 /*
1038 * do_donate():
1039 *
1040 * The page owner transfers ownership to another component, losing access
1041 * as a consequence.
1042 *
1043 * Initiator: OWNED => NOPAGE
1044 * Completer: NOPAGE => OWNED
1045 */
do_donate(struct pkvm_mem_donation * donation)1046 static int do_donate(struct pkvm_mem_donation *donation)
1047 {
1048 int ret;
1049
1050 ret = check_donation(donation);
1051 if (ret)
1052 return ret;
1053
1054 return WARN_ON(__do_donate(donation));
1055 }
1056
__pkvm_host_share_hyp(u64 pfn)1057 int __pkvm_host_share_hyp(u64 pfn)
1058 {
1059 int ret;
1060 u64 host_addr = hyp_pfn_to_phys(pfn);
1061 u64 hyp_addr = (u64)__hyp_va(host_addr);
1062 struct pkvm_mem_share share = {
1063 .tx = {
1064 .nr_pages = 1,
1065 .initiator = {
1066 .id = PKVM_ID_HOST,
1067 .addr = host_addr,
1068 .host = {
1069 .completer_addr = hyp_addr,
1070 },
1071 },
1072 .completer = {
1073 .id = PKVM_ID_HYP,
1074 },
1075 },
1076 .completer_prot = PAGE_HYP,
1077 };
1078
1079 host_lock_component();
1080 hyp_lock_component();
1081
1082 ret = do_share(&share);
1083
1084 hyp_unlock_component();
1085 host_unlock_component();
1086
1087 return ret;
1088 }
1089
__pkvm_host_unshare_hyp(u64 pfn)1090 int __pkvm_host_unshare_hyp(u64 pfn)
1091 {
1092 int ret;
1093 u64 host_addr = hyp_pfn_to_phys(pfn);
1094 u64 hyp_addr = (u64)__hyp_va(host_addr);
1095 struct pkvm_mem_share share = {
1096 .tx = {
1097 .nr_pages = 1,
1098 .initiator = {
1099 .id = PKVM_ID_HOST,
1100 .addr = host_addr,
1101 .host = {
1102 .completer_addr = hyp_addr,
1103 },
1104 },
1105 .completer = {
1106 .id = PKVM_ID_HYP,
1107 },
1108 },
1109 .completer_prot = PAGE_HYP,
1110 };
1111
1112 host_lock_component();
1113 hyp_lock_component();
1114
1115 ret = do_unshare(&share);
1116
1117 hyp_unlock_component();
1118 host_unlock_component();
1119
1120 return ret;
1121 }
1122
__pkvm_host_donate_hyp(u64 pfn,u64 nr_pages)1123 int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
1124 {
1125 int ret;
1126 u64 host_addr = hyp_pfn_to_phys(pfn);
1127 u64 hyp_addr = (u64)__hyp_va(host_addr);
1128 struct pkvm_mem_donation donation = {
1129 .tx = {
1130 .nr_pages = nr_pages,
1131 .initiator = {
1132 .id = PKVM_ID_HOST,
1133 .addr = host_addr,
1134 .host = {
1135 .completer_addr = hyp_addr,
1136 },
1137 },
1138 .completer = {
1139 .id = PKVM_ID_HYP,
1140 },
1141 },
1142 };
1143
1144 host_lock_component();
1145 hyp_lock_component();
1146
1147 ret = do_donate(&donation);
1148
1149 hyp_unlock_component();
1150 host_unlock_component();
1151
1152 return ret;
1153 }
1154
__pkvm_hyp_donate_host(u64 pfn,u64 nr_pages)1155 int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
1156 {
1157 int ret;
1158 u64 host_addr = hyp_pfn_to_phys(pfn);
1159 u64 hyp_addr = (u64)__hyp_va(host_addr);
1160 struct pkvm_mem_donation donation = {
1161 .tx = {
1162 .nr_pages = nr_pages,
1163 .initiator = {
1164 .id = PKVM_ID_HYP,
1165 .addr = hyp_addr,
1166 .hyp = {
1167 .completer_addr = host_addr,
1168 },
1169 },
1170 .completer = {
1171 .id = PKVM_ID_HOST,
1172 },
1173 },
1174 };
1175
1176 host_lock_component();
1177 hyp_lock_component();
1178
1179 ret = do_donate(&donation);
1180
1181 hyp_unlock_component();
1182 host_unlock_component();
1183
1184 return ret;
1185 }
1186
hyp_pin_shared_mem(void * from,void * to)1187 int hyp_pin_shared_mem(void *from, void *to)
1188 {
1189 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
1190 u64 end = PAGE_ALIGN((u64)to);
1191 u64 size = end - start;
1192 int ret;
1193
1194 host_lock_component();
1195 hyp_lock_component();
1196
1197 ret = __host_check_page_state_range(__hyp_pa(start), size,
1198 PKVM_PAGE_SHARED_OWNED);
1199 if (ret)
1200 goto unlock;
1201
1202 ret = __hyp_check_page_state_range(start, size,
1203 PKVM_PAGE_SHARED_BORROWED);
1204 if (ret)
1205 goto unlock;
1206
1207 for (cur = start; cur < end; cur += PAGE_SIZE)
1208 hyp_page_ref_inc(hyp_virt_to_page(cur));
1209
1210 unlock:
1211 hyp_unlock_component();
1212 host_unlock_component();
1213
1214 return ret;
1215 }
1216
hyp_unpin_shared_mem(void * from,void * to)1217 void hyp_unpin_shared_mem(void *from, void *to)
1218 {
1219 u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
1220 u64 end = PAGE_ALIGN((u64)to);
1221
1222 host_lock_component();
1223 hyp_lock_component();
1224
1225 for (cur = start; cur < end; cur += PAGE_SIZE)
1226 hyp_page_ref_dec(hyp_virt_to_page(cur));
1227
1228 hyp_unlock_component();
1229 host_unlock_component();
1230 }
1231