1 /******************************************************************************
2 * arch/x86/mm/p2m.c
3 *
4 * physical-to-machine mappings for automatically-translated domains.
5 *
6 * Parts of this code are Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp)
7 * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
8 * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
9 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
10 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; If not, see <http://www.gnu.org/licenses/>.
24 */
25
26 #include <xen/guest_access.h> /* copy_from_guest() */
27 #include <xen/iommu.h>
28 #include <xen/vm_event.h>
29 #include <xen/event.h>
30 #include <public/vm_event.h>
31 #include <asm/domain.h>
32 #include <asm/page.h>
33 #include <asm/paging.h>
34 #include <asm/p2m.h>
35 #include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
36 #include <asm/mem_sharing.h>
37 #include <asm/hvm/nestedhvm.h>
38 #include <asm/altp2m.h>
39 #include <asm/hvm/svm/amd-iommu-proto.h>
40 #include <asm/vm_event.h>
41 #include <xsm/xsm.h>
42
43 #include "mm-locks.h"
44
45 /* Turn on/off host superpage page table support for hap, default on. */
46 bool_t __initdata opt_hap_1gb = 1, __initdata opt_hap_2mb = 1;
47 boolean_param("hap_1gb", opt_hap_1gb);
48 boolean_param("hap_2mb", opt_hap_2mb);
49
50 /* Override macros from asm/page.h to make them work with mfn_t */
51 #undef mfn_to_page
52 #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
53 #undef page_to_mfn
54 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
55
56 DEFINE_PERCPU_RWLOCK_GLOBAL(p2m_percpu_rwlock);
57
58 /* Init the datastructures for later use by the p2m code */
p2m_initialise(struct domain * d,struct p2m_domain * p2m)59 static int p2m_initialise(struct domain *d, struct p2m_domain *p2m)
60 {
61 unsigned int i;
62 int ret = 0;
63
64 mm_rwlock_init(&p2m->lock);
65 mm_lock_init(&p2m->pod.lock);
66 INIT_LIST_HEAD(&p2m->np2m_list);
67 INIT_PAGE_LIST_HEAD(&p2m->pages);
68 INIT_PAGE_LIST_HEAD(&p2m->pod.super);
69 INIT_PAGE_LIST_HEAD(&p2m->pod.single);
70
71 p2m->domain = d;
72 p2m->default_access = p2m_access_rwx;
73 p2m->p2m_class = p2m_host;
74
75 p2m->np2m_base = P2M_BASE_EADDR;
76 p2m->np2m_generation = 0;
77
78 for ( i = 0; i < ARRAY_SIZE(p2m->pod.mrp.list); ++i )
79 p2m->pod.mrp.list[i] = gfn_x(INVALID_GFN);
80
81 if ( hap_enabled(d) && cpu_has_vmx )
82 ret = ept_p2m_init(p2m);
83 else
84 p2m_pt_init(p2m);
85
86 spin_lock_init(&p2m->ioreq.lock);
87
88 return ret;
89 }
90
p2m_init_one(struct domain * d)91 static struct p2m_domain *p2m_init_one(struct domain *d)
92 {
93 struct p2m_domain *p2m = xzalloc(struct p2m_domain);
94
95 if ( !p2m )
96 return NULL;
97
98 if ( !zalloc_cpumask_var(&p2m->dirty_cpumask) )
99 goto free_p2m;
100
101 if ( p2m_initialise(d, p2m) )
102 goto free_cpumask;
103 return p2m;
104
105 free_cpumask:
106 free_cpumask_var(p2m->dirty_cpumask);
107 free_p2m:
108 xfree(p2m);
109 return NULL;
110 }
111
p2m_free_one(struct p2m_domain * p2m)112 static void p2m_free_one(struct p2m_domain *p2m)
113 {
114 if ( hap_enabled(p2m->domain) && cpu_has_vmx )
115 ept_p2m_uninit(p2m);
116 free_cpumask_var(p2m->dirty_cpumask);
117 xfree(p2m);
118 }
119
p2m_init_hostp2m(struct domain * d)120 static int p2m_init_hostp2m(struct domain *d)
121 {
122 struct p2m_domain *p2m = p2m_init_one(d);
123
124 if ( p2m )
125 {
126 p2m->logdirty_ranges = rangeset_new(d, "log-dirty",
127 RANGESETF_prettyprint_hex);
128 if ( p2m->logdirty_ranges )
129 {
130 d->arch.p2m = p2m;
131 return 0;
132 }
133 p2m_free_one(p2m);
134 }
135 return -ENOMEM;
136 }
137
p2m_teardown_hostp2m(struct domain * d)138 static void p2m_teardown_hostp2m(struct domain *d)
139 {
140 /* Iterate over all p2m tables per domain */
141 struct p2m_domain *p2m = p2m_get_hostp2m(d);
142
143 if ( p2m )
144 {
145 rangeset_destroy(p2m->logdirty_ranges);
146 p2m_free_one(p2m);
147 d->arch.p2m = NULL;
148 }
149 }
150
p2m_teardown_nestedp2m(struct domain * d)151 static void p2m_teardown_nestedp2m(struct domain *d)
152 {
153 unsigned int i;
154 struct p2m_domain *p2m;
155
156 for ( i = 0; i < MAX_NESTEDP2M; i++ )
157 {
158 if ( !d->arch.nested_p2m[i] )
159 continue;
160 p2m = d->arch.nested_p2m[i];
161 list_del(&p2m->np2m_list);
162 p2m_free_one(p2m);
163 d->arch.nested_p2m[i] = NULL;
164 }
165 }
166
p2m_init_nestedp2m(struct domain * d)167 static int p2m_init_nestedp2m(struct domain *d)
168 {
169 unsigned int i;
170 struct p2m_domain *p2m;
171
172 mm_lock_init(&d->arch.nested_p2m_lock);
173 for ( i = 0; i < MAX_NESTEDP2M; i++ )
174 {
175 d->arch.nested_p2m[i] = p2m = p2m_init_one(d);
176 if ( p2m == NULL )
177 {
178 p2m_teardown_nestedp2m(d);
179 return -ENOMEM;
180 }
181 p2m->p2m_class = p2m_nested;
182 p2m->write_p2m_entry = nestedp2m_write_p2m_entry;
183 list_add(&p2m->np2m_list, &p2m_get_hostp2m(d)->np2m_list);
184 }
185
186 return 0;
187 }
188
p2m_teardown_altp2m(struct domain * d)189 static void p2m_teardown_altp2m(struct domain *d)
190 {
191 unsigned int i;
192 struct p2m_domain *p2m;
193
194 for ( i = 0; i < MAX_ALTP2M; i++ )
195 {
196 if ( !d->arch.altp2m_p2m[i] )
197 continue;
198 p2m = d->arch.altp2m_p2m[i];
199 d->arch.altp2m_p2m[i] = NULL;
200 p2m_free_one(p2m);
201 }
202 }
203
p2m_init_altp2m(struct domain * d)204 static int p2m_init_altp2m(struct domain *d)
205 {
206 unsigned int i;
207 struct p2m_domain *p2m;
208
209 mm_lock_init(&d->arch.altp2m_list_lock);
210 for ( i = 0; i < MAX_ALTP2M; i++ )
211 {
212 d->arch.altp2m_p2m[i] = p2m = p2m_init_one(d);
213 if ( p2m == NULL )
214 {
215 p2m_teardown_altp2m(d);
216 return -ENOMEM;
217 }
218 p2m->p2m_class = p2m_alternate;
219 p2m->access_required = 1;
220 _atomic_set(&p2m->active_vcpus, 0);
221 }
222
223 return 0;
224 }
225
p2m_init(struct domain * d)226 int p2m_init(struct domain *d)
227 {
228 int rc;
229
230 rc = p2m_init_hostp2m(d);
231 if ( rc )
232 return rc;
233
234 /* Must initialise nestedp2m unconditionally
235 * since nestedhvm_enabled(d) returns false here.
236 * (p2m_init runs too early for HVM_PARAM_* options) */
237 rc = p2m_init_nestedp2m(d);
238 if ( rc )
239 {
240 p2m_teardown_hostp2m(d);
241 return rc;
242 }
243
244 rc = p2m_init_altp2m(d);
245 if ( rc )
246 {
247 p2m_teardown_hostp2m(d);
248 p2m_teardown_nestedp2m(d);
249 }
250
251 return rc;
252 }
253
p2m_is_logdirty_range(struct p2m_domain * p2m,unsigned long start,unsigned long end)254 int p2m_is_logdirty_range(struct p2m_domain *p2m, unsigned long start,
255 unsigned long end)
256 {
257 ASSERT(p2m_is_hostp2m(p2m));
258 if ( p2m->global_logdirty ||
259 rangeset_contains_range(p2m->logdirty_ranges, start, end) )
260 return 1;
261 if ( rangeset_overlaps_range(p2m->logdirty_ranges, start, end) )
262 return -1;
263 return 0;
264 }
265
p2m_change_entry_type_global(struct domain * d,p2m_type_t ot,p2m_type_t nt)266 void p2m_change_entry_type_global(struct domain *d,
267 p2m_type_t ot, p2m_type_t nt)
268 {
269 struct p2m_domain *p2m = p2m_get_hostp2m(d);
270
271 ASSERT(ot != nt);
272 ASSERT(p2m_is_changeable(ot) && p2m_is_changeable(nt));
273
274 p2m_lock(p2m);
275 p2m->change_entry_type_global(p2m, ot, nt);
276 p2m->global_logdirty = (nt == p2m_ram_logdirty);
277 p2m_unlock(p2m);
278 }
279
p2m_memory_type_changed(struct domain * d)280 void p2m_memory_type_changed(struct domain *d)
281 {
282 struct p2m_domain *p2m = p2m_get_hostp2m(d);
283
284 if ( p2m->memory_type_changed )
285 {
286 p2m_lock(p2m);
287 p2m->memory_type_changed(p2m);
288 p2m_unlock(p2m);
289 }
290 }
291
p2m_set_ioreq_server(struct domain * d,unsigned int flags,struct hvm_ioreq_server * s)292 int p2m_set_ioreq_server(struct domain *d,
293 unsigned int flags,
294 struct hvm_ioreq_server *s)
295 {
296 struct p2m_domain *p2m = p2m_get_hostp2m(d);
297 int rc;
298
299 /*
300 * Use lock to prevent concurrent setting attempts
301 * from multiple ioreq servers.
302 */
303 spin_lock(&p2m->ioreq.lock);
304
305 /* Unmap ioreq server from p2m type by passing flags with 0. */
306 if ( flags == 0 )
307 {
308 rc = -EINVAL;
309 if ( p2m->ioreq.server != s )
310 goto out;
311
312 p2m->ioreq.server = NULL;
313 p2m->ioreq.flags = 0;
314 }
315 else
316 {
317 rc = -EBUSY;
318 if ( p2m->ioreq.server != NULL )
319 goto out;
320
321 /*
322 * It is possible that an ioreq server has just been unmapped,
323 * released the spin lock, with some p2m_ioreq_server entries
324 * in p2m table remained. We shall refuse another ioreq server
325 * mapping request in such case.
326 */
327 if ( read_atomic(&p2m->ioreq.entry_count) )
328 goto out;
329
330 p2m->ioreq.server = s;
331 p2m->ioreq.flags = flags;
332 }
333
334 rc = 0;
335
336 out:
337 spin_unlock(&p2m->ioreq.lock);
338
339 return rc;
340 }
341
p2m_get_ioreq_server(struct domain * d,unsigned int * flags)342 struct hvm_ioreq_server *p2m_get_ioreq_server(struct domain *d,
343 unsigned int *flags)
344 {
345 struct p2m_domain *p2m = p2m_get_hostp2m(d);
346 struct hvm_ioreq_server *s;
347
348 spin_lock(&p2m->ioreq.lock);
349
350 s = p2m->ioreq.server;
351 *flags = p2m->ioreq.flags;
352
353 spin_unlock(&p2m->ioreq.lock);
354 return s;
355 }
356
p2m_enable_hardware_log_dirty(struct domain * d)357 void p2m_enable_hardware_log_dirty(struct domain *d)
358 {
359 struct p2m_domain *p2m = p2m_get_hostp2m(d);
360
361 if ( p2m->enable_hardware_log_dirty )
362 {
363 p2m_lock(p2m);
364 p2m->enable_hardware_log_dirty(p2m);
365 p2m_unlock(p2m);
366 }
367 }
368
p2m_disable_hardware_log_dirty(struct domain * d)369 void p2m_disable_hardware_log_dirty(struct domain *d)
370 {
371 struct p2m_domain *p2m = p2m_get_hostp2m(d);
372
373 if ( p2m->disable_hardware_log_dirty )
374 {
375 p2m_lock(p2m);
376 p2m->disable_hardware_log_dirty(p2m);
377 p2m_unlock(p2m);
378 }
379 }
380
p2m_flush_hardware_cached_dirty(struct domain * d)381 void p2m_flush_hardware_cached_dirty(struct domain *d)
382 {
383 struct p2m_domain *p2m = p2m_get_hostp2m(d);
384
385 if ( p2m->flush_hardware_cached_dirty )
386 {
387 p2m_lock(p2m);
388 p2m->flush_hardware_cached_dirty(p2m);
389 p2m_unlock(p2m);
390 }
391 }
392
393 /*
394 * Force a synchronous P2M TLB flush if a deferred flush is pending.
395 *
396 * Must be called with the p2m lock held.
397 */
p2m_tlb_flush_sync(struct p2m_domain * p2m)398 void p2m_tlb_flush_sync(struct p2m_domain *p2m)
399 {
400 if ( p2m->need_flush ) {
401 p2m->need_flush = 0;
402 p2m->tlb_flush(p2m);
403 }
404 }
405
406 /*
407 * Unlock the p2m lock and do a P2M TLB flush if needed.
408 */
p2m_unlock_and_tlb_flush(struct p2m_domain * p2m)409 void p2m_unlock_and_tlb_flush(struct p2m_domain *p2m)
410 {
411 if ( p2m->need_flush ) {
412 p2m->need_flush = 0;
413 mm_write_unlock(&p2m->lock);
414 p2m->tlb_flush(p2m);
415 } else
416 mm_write_unlock(&p2m->lock);
417 }
418
__get_gfn_type_access(struct p2m_domain * p2m,unsigned long gfn_l,p2m_type_t * t,p2m_access_t * a,p2m_query_t q,unsigned int * page_order,bool_t locked)419 mfn_t __get_gfn_type_access(struct p2m_domain *p2m, unsigned long gfn_l,
420 p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
421 unsigned int *page_order, bool_t locked)
422 {
423 mfn_t mfn;
424 gfn_t gfn = _gfn(gfn_l);
425
426 /* Unshare makes no sense withuot populate. */
427 if ( q & P2M_UNSHARE )
428 q |= P2M_ALLOC;
429
430 if ( !p2m || !paging_mode_translate(p2m->domain) )
431 {
432 /* Not necessarily true, but for non-translated guests, we claim
433 * it's the most generic kind of memory */
434 *t = p2m_ram_rw;
435 return _mfn(gfn_l);
436 }
437
438 if ( locked )
439 /* Grab the lock here, don't release until put_gfn */
440 gfn_lock(p2m, gfn, 0);
441
442 mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
443
444 if ( (q & P2M_UNSHARE) && p2m_is_shared(*t) )
445 {
446 ASSERT(p2m_is_hostp2m(p2m));
447 /* Try to unshare. If we fail, communicate ENOMEM without
448 * sleeping. */
449 if ( mem_sharing_unshare_page(p2m->domain, gfn_l, 0) < 0 )
450 (void)mem_sharing_notify_enomem(p2m->domain, gfn_l, 0);
451 mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
452 }
453
454 if (unlikely((p2m_is_broken(*t))))
455 {
456 /* Return invalid_mfn to avoid caller's access */
457 mfn = INVALID_MFN;
458 if ( q & P2M_ALLOC )
459 domain_crash(p2m->domain);
460 }
461
462 return mfn;
463 }
464
__put_gfn(struct p2m_domain * p2m,unsigned long gfn)465 void __put_gfn(struct p2m_domain *p2m, unsigned long gfn)
466 {
467 if ( !p2m || !paging_mode_translate(p2m->domain) )
468 /* Nothing to do in this case */
469 return;
470
471 ASSERT(gfn_locked_by_me(p2m, gfn));
472
473 gfn_unlock(p2m, gfn, 0);
474 }
475
476 /* Atomically look up a GFN and take a reference count on the backing page. */
p2m_get_page_from_gfn(struct p2m_domain * p2m,gfn_t gfn,p2m_type_t * t,p2m_access_t * a,p2m_query_t q)477 struct page_info *p2m_get_page_from_gfn(
478 struct p2m_domain *p2m, gfn_t gfn,
479 p2m_type_t *t, p2m_access_t *a, p2m_query_t q)
480 {
481 struct page_info *page = NULL;
482 p2m_access_t _a;
483 p2m_type_t _t;
484 mfn_t mfn;
485
486 /* Allow t or a to be NULL */
487 t = t ?: &_t;
488 a = a ?: &_a;
489
490 if ( likely(!p2m_locked_by_me(p2m)) )
491 {
492 /* Fast path: look up and get out */
493 p2m_read_lock(p2m);
494 mfn = __get_gfn_type_access(p2m, gfn_x(gfn), t, a, 0, NULL, 0);
495 if ( p2m_is_any_ram(*t) && mfn_valid(mfn)
496 && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) )
497 {
498 page = mfn_to_page(mfn);
499 if ( unlikely(p2m_is_foreign(*t)) )
500 {
501 struct domain *fdom = page_get_owner_and_reference(page);
502
503 ASSERT(fdom != p2m->domain);
504 if ( fdom == NULL )
505 page = NULL;
506 }
507 else if ( !get_page(page, p2m->domain) &&
508 /* Page could be shared */
509 (!p2m_is_shared(*t) || !get_page(page, dom_cow)) )
510 page = NULL;
511 }
512 p2m_read_unlock(p2m);
513
514 if ( page )
515 return page;
516
517 /* Error path: not a suitable GFN at all */
518 if ( !p2m_is_ram(*t) && !p2m_is_paging(*t) && !p2m_is_pod(*t) )
519 return NULL;
520 }
521
522 /* Slow path: take the write lock and do fixups */
523 mfn = get_gfn_type_access(p2m, gfn_x(gfn), t, a, q, NULL);
524 if ( p2m_is_ram(*t) && mfn_valid(mfn) )
525 {
526 page = mfn_to_page(mfn);
527 if ( !get_page(page, p2m->domain) )
528 page = NULL;
529 }
530 put_gfn(p2m->domain, gfn_x(gfn));
531
532 return page;
533 }
534
535 /* Returns: 0 for success, -errno for failure */
p2m_set_entry(struct p2m_domain * p2m,gfn_t gfn,mfn_t mfn,unsigned int page_order,p2m_type_t p2mt,p2m_access_t p2ma)536 int p2m_set_entry(struct p2m_domain *p2m, gfn_t gfn, mfn_t mfn,
537 unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma)
538 {
539 struct domain *d = p2m->domain;
540 unsigned long todo = 1ul << page_order;
541 unsigned int order;
542 int set_rc, rc = 0;
543
544 ASSERT(gfn_locked_by_me(p2m, gfn));
545
546 while ( todo )
547 {
548 if ( hap_enabled(d) )
549 {
550 unsigned long fn_mask = !mfn_eq(mfn, INVALID_MFN) ? mfn_x(mfn) : 0;
551
552 fn_mask |= gfn_x(gfn) | todo;
553
554 order = (!(fn_mask & ((1ul << PAGE_ORDER_1G) - 1)) &&
555 hap_has_1gb) ? PAGE_ORDER_1G :
556 (!(fn_mask & ((1ul << PAGE_ORDER_2M) - 1)) &&
557 hap_has_2mb) ? PAGE_ORDER_2M : PAGE_ORDER_4K;
558 }
559 else
560 order = 0;
561
562 set_rc = p2m->set_entry(p2m, gfn, mfn, order, p2mt, p2ma, -1);
563 if ( set_rc )
564 rc = set_rc;
565
566 gfn = gfn_add(gfn, 1ul << order);
567 if ( !mfn_eq(mfn, INVALID_MFN) )
568 mfn = mfn_add(mfn, 1ul << order);
569 todo -= 1ul << order;
570 }
571
572 return rc;
573 }
574
p2m_alloc_ptp(struct p2m_domain * p2m,unsigned int level)575 mfn_t p2m_alloc_ptp(struct p2m_domain *p2m, unsigned int level)
576 {
577 struct page_info *pg;
578
579 ASSERT(p2m);
580 ASSERT(p2m->domain);
581 ASSERT(p2m->domain->arch.paging.alloc_page);
582 pg = p2m->domain->arch.paging.alloc_page(p2m->domain);
583 if ( !pg )
584 return INVALID_MFN;
585
586 page_list_add_tail(pg, &p2m->pages);
587 BUILD_BUG_ON(PGT_l1_page_table * 2 != PGT_l2_page_table);
588 BUILD_BUG_ON(PGT_l1_page_table * 3 != PGT_l3_page_table);
589 BUILD_BUG_ON(PGT_l1_page_table * 4 != PGT_l4_page_table);
590 pg->u.inuse.type_info = (PGT_l1_page_table * level) | 1 | PGT_validated;
591
592 return page_to_mfn(pg);
593 }
594
p2m_free_ptp(struct p2m_domain * p2m,struct page_info * pg)595 void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg)
596 {
597 ASSERT(pg);
598 ASSERT(p2m);
599 ASSERT(p2m->domain);
600 ASSERT(p2m->domain->arch.paging.free_page);
601
602 page_list_del(pg, &p2m->pages);
603 p2m->domain->arch.paging.free_page(p2m->domain, pg);
604
605 return;
606 }
607
608 /*
609 * Allocate a new p2m table for a domain.
610 *
611 * The structure of the p2m table is that of a pagetable for xen (i.e. it is
612 * controlled by CONFIG_PAGING_LEVELS).
613 *
614 * Returns 0 for success, -errno for failure.
615 */
p2m_alloc_table(struct p2m_domain * p2m)616 int p2m_alloc_table(struct p2m_domain *p2m)
617 {
618 mfn_t top_mfn;
619 struct domain *d = p2m->domain;
620 int rc = 0;
621
622 p2m_lock(p2m);
623
624 if ( p2m_is_hostp2m(p2m)
625 && !page_list_empty(&d->page_list) )
626 {
627 P2M_ERROR("dom %d already has memory allocated\n", d->domain_id);
628 p2m_unlock(p2m);
629 return -EINVAL;
630 }
631
632 if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 )
633 {
634 P2M_ERROR("p2m already allocated for this domain\n");
635 p2m_unlock(p2m);
636 return -EINVAL;
637 }
638
639 P2M_PRINTK("allocating p2m table\n");
640
641 top_mfn = p2m_alloc_ptp(p2m, 4);
642 if ( mfn_eq(top_mfn, INVALID_MFN) )
643 {
644 p2m_unlock(p2m);
645 return -ENOMEM;
646 }
647
648 p2m->phys_table = pagetable_from_mfn(top_mfn);
649
650 if ( hap_enabled(d) )
651 iommu_share_p2m_table(d);
652
653 P2M_PRINTK("populating p2m table\n");
654
655 /* Initialise physmap tables for slot zero. Other code assumes this. */
656 p2m->defer_nested_flush = 1;
657 rc = p2m_set_entry(p2m, _gfn(0), INVALID_MFN, PAGE_ORDER_4K,
658 p2m_invalid, p2m->default_access);
659 p2m->defer_nested_flush = 0;
660 p2m_unlock(p2m);
661 if ( !rc )
662 P2M_PRINTK("p2m table initialised for slot zero\n");
663 else
664 P2M_PRINTK("failed to initialise p2m table for slot zero (%d)\n", rc);
665 return rc;
666 }
667
668 /*
669 * hvm fixme: when adding support for pvh non-hardware domains, this path must
670 * cleanup any foreign p2m types (release refcnts on them).
671 */
p2m_teardown(struct p2m_domain * p2m)672 void p2m_teardown(struct p2m_domain *p2m)
673 /* Return all the p2m pages to Xen.
674 * We know we don't have any extra mappings to these pages */
675 {
676 struct page_info *pg;
677 struct domain *d;
678
679 if (p2m == NULL)
680 return;
681
682 d = p2m->domain;
683
684 p2m_lock(p2m);
685 ASSERT(atomic_read(&d->shr_pages) == 0);
686 p2m->phys_table = pagetable_null();
687
688 while ( (pg = page_list_remove_head(&p2m->pages)) )
689 d->arch.paging.free_page(d, pg);
690 p2m_unlock(p2m);
691 }
692
p2m_final_teardown(struct domain * d)693 void p2m_final_teardown(struct domain *d)
694 {
695 /*
696 * We must teardown both of them unconditionally because
697 * we initialise them unconditionally.
698 */
699 p2m_teardown_altp2m(d);
700 p2m_teardown_nestedp2m(d);
701
702 /* Iterate over all p2m tables per domain */
703 p2m_teardown_hostp2m(d);
704 }
705
706
707 static int
p2m_remove_page(struct p2m_domain * p2m,unsigned long gfn_l,unsigned long mfn,unsigned int page_order)708 p2m_remove_page(struct p2m_domain *p2m, unsigned long gfn_l, unsigned long mfn,
709 unsigned int page_order)
710 {
711 unsigned long i;
712 gfn_t gfn = _gfn(gfn_l);
713 mfn_t mfn_return;
714 p2m_type_t t;
715 p2m_access_t a;
716
717 if ( !paging_mode_translate(p2m->domain) )
718 {
719 int rc = 0;
720
721 if ( need_iommu(p2m->domain) )
722 {
723 for ( i = 0; i < (1 << page_order); i++ )
724 {
725 int ret = iommu_unmap_page(p2m->domain, mfn + i);
726
727 if ( !rc )
728 rc = ret;
729 }
730 }
731
732 return rc;
733 }
734
735 ASSERT(gfn_locked_by_me(p2m, gfn));
736 P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn_l, mfn);
737
738 if ( mfn_valid(_mfn(mfn)) )
739 {
740 for ( i = 0; i < (1UL << page_order); i++ )
741 {
742 mfn_return = p2m->get_entry(p2m, gfn_add(gfn, i), &t, &a, 0,
743 NULL, NULL);
744 if ( !p2m_is_grant(t) && !p2m_is_shared(t) && !p2m_is_foreign(t) )
745 set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
746 ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) );
747 }
748 }
749 return p2m_set_entry(p2m, gfn, INVALID_MFN, page_order, p2m_invalid,
750 p2m->default_access);
751 }
752
753 int
guest_physmap_remove_page(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int page_order)754 guest_physmap_remove_page(struct domain *d, gfn_t gfn,
755 mfn_t mfn, unsigned int page_order)
756 {
757 struct p2m_domain *p2m = p2m_get_hostp2m(d);
758 int rc;
759 gfn_lock(p2m, gfn, page_order);
760 rc = p2m_remove_page(p2m, gfn_x(gfn), mfn_x(mfn), page_order);
761 gfn_unlock(p2m, gfn, page_order);
762 return rc;
763 }
764
765 int
guest_physmap_add_entry(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int page_order,p2m_type_t t)766 guest_physmap_add_entry(struct domain *d, gfn_t gfn, mfn_t mfn,
767 unsigned int page_order, p2m_type_t t)
768 {
769 struct p2m_domain *p2m = p2m_get_hostp2m(d);
770 unsigned long i;
771 gfn_t ogfn;
772 p2m_type_t ot;
773 p2m_access_t a;
774 mfn_t omfn;
775 int pod_count = 0;
776 int rc = 0;
777
778 if ( !paging_mode_translate(d) )
779 {
780 if ( need_iommu(d) && t == p2m_ram_rw )
781 {
782 for ( i = 0; i < (1 << page_order); i++ )
783 {
784 rc = iommu_map_page(d, mfn_x(mfn_add(mfn, i)),
785 mfn_x(mfn_add(mfn, i)),
786 IOMMUF_readable|IOMMUF_writable);
787 if ( rc != 0 )
788 {
789 while ( i-- > 0 )
790 /* If statement to satisfy __must_check. */
791 if ( iommu_unmap_page(d, mfn_x(mfn_add(mfn, i))) )
792 continue;
793
794 return rc;
795 }
796 }
797 }
798 return 0;
799 }
800
801 /* foreign pages are added thru p2m_add_foreign */
802 if ( p2m_is_foreign(t) )
803 return -EINVAL;
804
805 p2m_lock(p2m);
806
807 P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn_x(gfn), mfn_x(mfn));
808
809 /* First, remove m->p mappings for existing p->m mappings */
810 for ( i = 0; i < (1UL << page_order); i++ )
811 {
812 omfn = p2m->get_entry(p2m, gfn_add(gfn, i), &ot,
813 &a, 0, NULL, NULL);
814 if ( p2m_is_shared(ot) )
815 {
816 /* Do an unshare to cleanly take care of all corner
817 * cases. */
818 int rc;
819 rc = mem_sharing_unshare_page(p2m->domain,
820 gfn_x(gfn_add(gfn, i)), 0);
821 if ( rc )
822 {
823 p2m_unlock(p2m);
824 /* NOTE: Should a guest domain bring this upon itself,
825 * there is not a whole lot we can do. We are buried
826 * deep in locks from most code paths by now. So, fail
827 * the call and don't try to sleep on a wait queue
828 * while placing the mem event.
829 *
830 * However, all current (changeset 3432abcf9380) code
831 * paths avoid this unsavoury situation. For now.
832 *
833 * Foreign domains are okay to place an event as they
834 * won't go to sleep. */
835 (void)mem_sharing_notify_enomem(p2m->domain,
836 gfn_x(gfn_add(gfn, i)),
837 0);
838 return rc;
839 }
840 omfn = p2m->get_entry(p2m, gfn_add(gfn, i),
841 &ot, &a, 0, NULL, NULL);
842 ASSERT(!p2m_is_shared(ot));
843 }
844 if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
845 {
846 /* Really shouldn't be unmapping grant/foreign maps this way */
847 domain_crash(d);
848 p2m_unlock(p2m);
849
850 return -EINVAL;
851 }
852 else if ( p2m_is_ram(ot) && !p2m_is_paged(ot) )
853 {
854 ASSERT(mfn_valid(omfn));
855 set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
856 }
857 else if ( ot == p2m_populate_on_demand )
858 {
859 /* Count how man PoD entries we'll be replacing if successful */
860 pod_count++;
861 }
862 else if ( p2m_is_paging(ot) && (ot != p2m_ram_paging_out) )
863 {
864 /* We're plugging a hole in the physmap where a paged out page was */
865 atomic_dec(&d->paged_pages);
866 }
867 }
868
869 /* Then, look for m->p mappings for this range and deal with them */
870 for ( i = 0; i < (1UL << page_order); i++ )
871 {
872 if ( page_get_owner(mfn_to_page(mfn_add(mfn, i))) == dom_cow )
873 {
874 /* This is no way to add a shared page to your physmap! */
875 gdprintk(XENLOG_ERR, "Adding shared mfn %lx directly to dom%d physmap not allowed.\n",
876 mfn_x(mfn_add(mfn, i)), d->domain_id);
877 p2m_unlock(p2m);
878 return -EINVAL;
879 }
880 if ( page_get_owner(mfn_to_page(mfn_add(mfn, i))) != d )
881 continue;
882 ogfn = _gfn(mfn_to_gfn(d, mfn_add(mfn, i)));
883 if ( !gfn_eq(ogfn, _gfn(INVALID_M2P_ENTRY)) &&
884 !gfn_eq(ogfn, gfn_add(gfn, i)) )
885 {
886 /* This machine frame is already mapped at another physical
887 * address */
888 P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
889 mfn_x(mfn_add(mfn, i)), gfn_x(ogfn),
890 gfn_x(gfn_add(gfn, i)));
891 omfn = p2m->get_entry(p2m, ogfn, &ot, &a, 0, NULL, NULL);
892 if ( p2m_is_ram(ot) && !p2m_is_paged(ot) )
893 {
894 ASSERT(mfn_valid(omfn));
895 P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
896 gfn_x(ogfn) , mfn_x(omfn));
897 if ( mfn_eq(omfn, mfn_add(mfn, i)) )
898 p2m_remove_page(p2m, gfn_x(ogfn), mfn_x(mfn_add(mfn, i)),
899 0);
900 }
901 }
902 }
903
904 /* Now, actually do the two-way mapping */
905 if ( mfn_valid(mfn) )
906 {
907 rc = p2m_set_entry(p2m, gfn, mfn, page_order, t,
908 p2m->default_access);
909 if ( rc )
910 goto out; /* Failed to update p2m, bail without updating m2p. */
911
912 if ( !p2m_is_grant(t) )
913 {
914 for ( i = 0; i < (1UL << page_order); i++ )
915 set_gpfn_from_mfn(mfn_x(mfn_add(mfn, i)),
916 gfn_x(gfn_add(gfn, i)));
917 }
918 }
919 else
920 {
921 gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
922 gfn_x(gfn), mfn_x(mfn));
923 rc = p2m_set_entry(p2m, gfn, INVALID_MFN, page_order,
924 p2m_invalid, p2m->default_access);
925 if ( rc == 0 )
926 {
927 pod_lock(p2m);
928 p2m->pod.entry_count -= pod_count;
929 BUG_ON(p2m->pod.entry_count < 0);
930 pod_unlock(p2m);
931 }
932 }
933
934 out:
935 p2m_unlock(p2m);
936
937 return rc;
938 }
939
940
941 /*
942 * Modify the p2m type of a single gfn from ot to nt.
943 * Returns: 0 for success, -errno for failure.
944 * Resets the access permissions.
945 */
p2m_change_type_one(struct domain * d,unsigned long gfn_l,p2m_type_t ot,p2m_type_t nt)946 int p2m_change_type_one(struct domain *d, unsigned long gfn_l,
947 p2m_type_t ot, p2m_type_t nt)
948 {
949 p2m_access_t a;
950 p2m_type_t pt;
951 gfn_t gfn = _gfn(gfn_l);
952 mfn_t mfn;
953 struct p2m_domain *p2m = p2m_get_hostp2m(d);
954 int rc;
955
956 BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
957 BUG_ON(p2m_is_foreign(ot) || p2m_is_foreign(nt));
958
959 gfn_lock(p2m, gfn, 0);
960
961 mfn = p2m->get_entry(p2m, gfn, &pt, &a, 0, NULL, NULL);
962 rc = likely(pt == ot)
963 ? p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt,
964 p2m->default_access)
965 : -EBUSY;
966
967 gfn_unlock(p2m, gfn, 0);
968
969 return rc;
970 }
971
972 /* Modify the p2m type of a range of gfns from ot to nt. */
p2m_change_type_range(struct domain * d,unsigned long start,unsigned long end,p2m_type_t ot,p2m_type_t nt)973 void p2m_change_type_range(struct domain *d,
974 unsigned long start, unsigned long end,
975 p2m_type_t ot, p2m_type_t nt)
976 {
977 unsigned long gfn = start;
978 struct p2m_domain *p2m = p2m_get_hostp2m(d);
979 int rc = 0;
980
981 ASSERT(ot != nt);
982 ASSERT(p2m_is_changeable(ot) && p2m_is_changeable(nt));
983
984 p2m_lock(p2m);
985 p2m->defer_nested_flush = 1;
986
987 if ( unlikely(end > p2m->max_mapped_pfn) )
988 {
989 if ( !gfn )
990 {
991 p2m->change_entry_type_global(p2m, ot, nt);
992 gfn = end;
993 }
994 end = p2m->max_mapped_pfn + 1;
995 }
996 if ( gfn < end )
997 rc = p2m->change_entry_type_range(p2m, ot, nt, gfn, end - 1);
998 if ( rc )
999 {
1000 printk(XENLOG_G_ERR "Error %d changing Dom%d GFNs [%lx,%lx] from %d to %d\n",
1001 rc, d->domain_id, start, end - 1, ot, nt);
1002 domain_crash(d);
1003 }
1004
1005 switch ( nt )
1006 {
1007 case p2m_ram_rw:
1008 if ( ot == p2m_ram_logdirty )
1009 rc = rangeset_remove_range(p2m->logdirty_ranges, start, end - 1);
1010 break;
1011 case p2m_ram_logdirty:
1012 if ( ot == p2m_ram_rw )
1013 rc = rangeset_add_range(p2m->logdirty_ranges, start, end - 1);
1014 break;
1015 default:
1016 break;
1017 }
1018 if ( rc )
1019 {
1020 printk(XENLOG_G_ERR "Error %d manipulating Dom%d's log-dirty ranges\n",
1021 rc, d->domain_id);
1022 domain_crash(d);
1023 }
1024
1025 p2m->defer_nested_flush = 0;
1026 if ( nestedhvm_enabled(d) )
1027 p2m_flush_nestedp2m(d);
1028 p2m_unlock(p2m);
1029 }
1030
1031 /*
1032 * Finish p2m type change for gfns which are marked as need_recalc in a range.
1033 * Returns: 0/1 for success, negative for failure
1034 */
p2m_finish_type_change(struct domain * d,gfn_t first_gfn,unsigned long max_nr)1035 int p2m_finish_type_change(struct domain *d,
1036 gfn_t first_gfn, unsigned long max_nr)
1037 {
1038 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1039 unsigned long gfn = gfn_x(first_gfn);
1040 unsigned long last_gfn = gfn + max_nr - 1;
1041 int rc = 0;
1042
1043 p2m_lock(p2m);
1044
1045 last_gfn = min(last_gfn, p2m->max_mapped_pfn);
1046 while ( gfn <= last_gfn )
1047 {
1048 rc = p2m->recalc(p2m, gfn);
1049 /*
1050 * ept->recalc could return 0/1/-ENOMEM. pt->recalc could return
1051 * 0/-ENOMEM/-ENOENT, -ENOENT isn't an error as we are looping
1052 * gfn here.
1053 */
1054 if ( rc == -ENOENT )
1055 rc = 0;
1056 else if ( rc < 0 )
1057 {
1058 gdprintk(XENLOG_ERR, "p2m->recalc failed! Dom%d gfn=%lx\n",
1059 d->domain_id, gfn);
1060 break;
1061 }
1062
1063 gfn++;
1064 }
1065
1066 p2m_unlock(p2m);
1067
1068 return rc;
1069 }
1070
1071 /*
1072 * Returns:
1073 * 0 for success
1074 * -errno for failure
1075 * 1 + new order for caller to retry with smaller order (guaranteed
1076 * to be smaller than order passed in)
1077 */
set_typed_p2m_entry(struct domain * d,unsigned long gfn_l,mfn_t mfn,unsigned int order,p2m_type_t gfn_p2mt,p2m_access_t access)1078 static int set_typed_p2m_entry(struct domain *d, unsigned long gfn_l,
1079 mfn_t mfn, unsigned int order,
1080 p2m_type_t gfn_p2mt, p2m_access_t access)
1081 {
1082 int rc = 0;
1083 p2m_access_t a;
1084 p2m_type_t ot;
1085 mfn_t omfn;
1086 gfn_t gfn = _gfn(gfn_l);
1087 unsigned int cur_order = 0;
1088 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1089
1090 if ( !paging_mode_translate(d) )
1091 return -EIO;
1092
1093 gfn_lock(p2m, gfn, order);
1094 omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, &cur_order, NULL);
1095 if ( cur_order < order )
1096 {
1097 gfn_unlock(p2m, gfn, order);
1098 return cur_order + 1;
1099 }
1100 if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
1101 {
1102 gfn_unlock(p2m, gfn, order);
1103 domain_crash(d);
1104 return -ENOENT;
1105 }
1106 else if ( p2m_is_ram(ot) )
1107 {
1108 unsigned long i;
1109
1110 for ( i = 0; i < (1UL << order); ++i )
1111 {
1112 ASSERT(mfn_valid(_mfn(mfn_x(omfn) + i)));
1113 set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY);
1114 }
1115 }
1116
1117 P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn_l, mfn_x(mfn));
1118 rc = p2m_set_entry(p2m, gfn, mfn, order, gfn_p2mt, access);
1119 if ( rc )
1120 gdprintk(XENLOG_ERR, "p2m_set_entry: %#lx:%u -> %d (0x%"PRI_mfn")\n",
1121 gfn_l, order, rc, mfn_x(mfn));
1122 else if ( p2m_is_pod(ot) )
1123 {
1124 pod_lock(p2m);
1125 p2m->pod.entry_count -= 1UL << order;
1126 BUG_ON(p2m->pod.entry_count < 0);
1127 pod_unlock(p2m);
1128 }
1129 gfn_unlock(p2m, gfn, order);
1130
1131 return rc;
1132 }
1133
1134 /* Set foreign mfn in the given guest's p2m table. */
set_foreign_p2m_entry(struct domain * d,unsigned long gfn,mfn_t mfn)1135 static int set_foreign_p2m_entry(struct domain *d, unsigned long gfn,
1136 mfn_t mfn)
1137 {
1138 return set_typed_p2m_entry(d, gfn, mfn, PAGE_ORDER_4K, p2m_map_foreign,
1139 p2m_get_hostp2m(d)->default_access);
1140 }
1141
set_mmio_p2m_entry(struct domain * d,unsigned long gfn,mfn_t mfn,unsigned int order,p2m_access_t access)1142 int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
1143 unsigned int order, p2m_access_t access)
1144 {
1145 if ( order > PAGE_ORDER_4K &&
1146 rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn),
1147 mfn_x(mfn) + (1UL << order) - 1) )
1148 return PAGE_ORDER_4K + 1;
1149
1150 return set_typed_p2m_entry(d, gfn, mfn, order, p2m_mmio_direct, access);
1151 }
1152
set_identity_p2m_entry(struct domain * d,unsigned long gfn_l,p2m_access_t p2ma,unsigned int flag)1153 int set_identity_p2m_entry(struct domain *d, unsigned long gfn_l,
1154 p2m_access_t p2ma, unsigned int flag)
1155 {
1156 p2m_type_t p2mt;
1157 p2m_access_t a;
1158 gfn_t gfn = _gfn(gfn_l);
1159 mfn_t mfn;
1160 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1161 int ret;
1162
1163 if ( !paging_mode_translate(p2m->domain) )
1164 {
1165 if ( !need_iommu(d) )
1166 return 0;
1167 return iommu_map_page(d, gfn_l, gfn_l, IOMMUF_readable|IOMMUF_writable);
1168 }
1169
1170 gfn_lock(p2m, gfn, 0);
1171
1172 mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1173
1174 if ( p2mt == p2m_invalid || p2mt == p2m_mmio_dm )
1175 ret = p2m_set_entry(p2m, gfn, _mfn(gfn_l), PAGE_ORDER_4K,
1176 p2m_mmio_direct, p2ma);
1177 else if ( mfn_x(mfn) == gfn_l && p2mt == p2m_mmio_direct && a == p2ma )
1178 ret = 0;
1179 else
1180 {
1181 if ( flag & XEN_DOMCTL_DEV_RDM_RELAXED )
1182 ret = 0;
1183 else
1184 ret = -EBUSY;
1185 printk(XENLOG_G_WARNING
1186 "Cannot setup identity map d%d:%lx,"
1187 " gfn already mapped to %lx.\n",
1188 d->domain_id, gfn_l, mfn_x(mfn));
1189 }
1190
1191 gfn_unlock(p2m, gfn, 0);
1192 return ret;
1193 }
1194
1195 /*
1196 * Returns:
1197 * 0 for success
1198 * -errno for failure
1199 * order+1 for caller to retry with order (guaranteed smaller than
1200 * the order value passed in)
1201 */
clear_mmio_p2m_entry(struct domain * d,unsigned long gfn_l,mfn_t mfn,unsigned int order)1202 int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn_l, mfn_t mfn,
1203 unsigned int order)
1204 {
1205 int rc = -EINVAL;
1206 gfn_t gfn = _gfn(gfn_l);
1207 mfn_t actual_mfn;
1208 p2m_access_t a;
1209 p2m_type_t t;
1210 unsigned int cur_order = 0;
1211 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1212
1213 if ( !paging_mode_translate(d) )
1214 return -EIO;
1215
1216 gfn_lock(p2m, gfn, order);
1217 actual_mfn = p2m->get_entry(p2m, gfn, &t, &a, 0, &cur_order, NULL);
1218 if ( cur_order < order )
1219 {
1220 rc = cur_order + 1;
1221 goto out;
1222 }
1223
1224 /* Do not use mfn_valid() here as it will usually fail for MMIO pages. */
1225 if ( mfn_eq(actual_mfn, INVALID_MFN) || (t != p2m_mmio_direct) )
1226 {
1227 gdprintk(XENLOG_ERR,
1228 "gfn_to_mfn failed! gfn=%08lx type:%d\n", gfn_l, t);
1229 goto out;
1230 }
1231 if ( mfn_x(mfn) != mfn_x(actual_mfn) )
1232 gdprintk(XENLOG_WARNING,
1233 "no mapping between mfn %08lx and gfn %08lx\n",
1234 mfn_x(mfn), gfn_l);
1235 rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order, p2m_invalid,
1236 p2m->default_access);
1237
1238 out:
1239 gfn_unlock(p2m, gfn, order);
1240
1241 return rc;
1242 }
1243
clear_identity_p2m_entry(struct domain * d,unsigned long gfn_l)1244 int clear_identity_p2m_entry(struct domain *d, unsigned long gfn_l)
1245 {
1246 p2m_type_t p2mt;
1247 p2m_access_t a;
1248 gfn_t gfn = _gfn(gfn_l);
1249 mfn_t mfn;
1250 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1251 int ret;
1252
1253 if ( !paging_mode_translate(d) )
1254 {
1255 if ( !need_iommu(d) )
1256 return 0;
1257 return iommu_unmap_page(d, gfn_l);
1258 }
1259
1260 gfn_lock(p2m, gfn, 0);
1261
1262 mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1263 if ( p2mt == p2m_mmio_direct && mfn_x(mfn) == gfn_l )
1264 {
1265 ret = p2m_set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_4K,
1266 p2m_invalid, p2m->default_access);
1267 gfn_unlock(p2m, gfn, 0);
1268 }
1269 else
1270 {
1271 gfn_unlock(p2m, gfn, 0);
1272 printk(XENLOG_G_WARNING
1273 "non-identity map d%d:%lx not cleared (mapped to %lx)\n",
1274 d->domain_id, gfn_l, mfn_x(mfn));
1275 ret = 0;
1276 }
1277
1278 return ret;
1279 }
1280
1281 /* Returns: 0 for success, -errno for failure */
set_shared_p2m_entry(struct domain * d,unsigned long gfn_l,mfn_t mfn)1282 int set_shared_p2m_entry(struct domain *d, unsigned long gfn_l, mfn_t mfn)
1283 {
1284 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1285 int rc = 0;
1286 gfn_t gfn = _gfn(gfn_l);
1287 p2m_access_t a;
1288 p2m_type_t ot;
1289 mfn_t omfn;
1290 unsigned long pg_type;
1291
1292 if ( !paging_mode_translate(p2m->domain) )
1293 return -EIO;
1294
1295 gfn_lock(p2m, gfn, 0);
1296 omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL, NULL);
1297 /* At the moment we only allow p2m change if gfn has already been made
1298 * sharable first */
1299 ASSERT(p2m_is_shared(ot));
1300 ASSERT(mfn_valid(omfn));
1301 /* Set the m2p entry to invalid only if there are no further type
1302 * refs to this page as shared */
1303 pg_type = read_atomic(&(mfn_to_page(omfn)->u.inuse.type_info));
1304 if ( (pg_type & PGT_count_mask) == 0
1305 || (pg_type & PGT_type_mask) != PGT_shared_page )
1306 set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
1307
1308 P2M_DEBUG("set shared %lx %lx\n", gfn_l, mfn_x(mfn));
1309 rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_shared,
1310 p2m->default_access);
1311 gfn_unlock(p2m, gfn, 0);
1312 if ( rc )
1313 gdprintk(XENLOG_ERR,
1314 "p2m_set_entry failed! mfn=%08lx rc:%d\n",
1315 mfn_x(get_gfn_query_unlocked(p2m->domain, gfn_l, &ot)), rc);
1316 return rc;
1317 }
1318
1319 /**
1320 * p2m_mem_paging_nominate - Mark a guest page as to-be-paged-out
1321 * @d: guest domain
1322 * @gfn: guest page to nominate
1323 *
1324 * Returns 0 for success or negative errno values if gfn is not pageable.
1325 *
1326 * p2m_mem_paging_nominate() is called by the pager and checks if a guest page
1327 * can be paged out. If the following conditions are met the p2mt will be
1328 * changed:
1329 * - the gfn is backed by a mfn
1330 * - the p2mt of the gfn is pageable
1331 * - the mfn is not used for IO
1332 * - the mfn has exactly one user and has no special meaning
1333 *
1334 * Once the p2mt is changed the page is readonly for the guest. On success the
1335 * pager can write the page contents to disk and later evict the page.
1336 */
p2m_mem_paging_nominate(struct domain * d,unsigned long gfn_l)1337 int p2m_mem_paging_nominate(struct domain *d, unsigned long gfn_l)
1338 {
1339 struct page_info *page;
1340 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1341 p2m_type_t p2mt;
1342 p2m_access_t a;
1343 gfn_t gfn = _gfn(gfn_l);
1344 mfn_t mfn;
1345 int ret = -EBUSY;
1346
1347 gfn_lock(p2m, gfn, 0);
1348
1349 mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1350
1351 /* Check if mfn is valid */
1352 if ( !mfn_valid(mfn) )
1353 goto out;
1354
1355 /* Check p2m type */
1356 if ( !p2m_is_pageable(p2mt) )
1357 goto out;
1358
1359 /* Check for io memory page */
1360 if ( is_iomem_page(mfn) )
1361 goto out;
1362
1363 /* Check page count and type */
1364 page = mfn_to_page(mfn);
1365 if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
1366 (1 | PGC_allocated) )
1367 goto out;
1368
1369 if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
1370 goto out;
1371
1372 /* Fix p2m entry */
1373 ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_out, a);
1374
1375 out:
1376 gfn_unlock(p2m, gfn, 0);
1377 return ret;
1378 }
1379
1380 /**
1381 * p2m_mem_paging_evict - Mark a guest page as paged-out
1382 * @d: guest domain
1383 * @gfn: guest page to evict
1384 *
1385 * Returns 0 for success or negative errno values if eviction is not possible.
1386 *
1387 * p2m_mem_paging_evict() is called by the pager and will free a guest page and
1388 * release it back to Xen. If the following conditions are met the page can be
1389 * freed:
1390 * - the gfn is backed by a mfn
1391 * - the gfn was nominated
1392 * - the mfn has still exactly one user and has no special meaning
1393 *
1394 * After successful nomination some other process could have mapped the page. In
1395 * this case eviction can not be done. If the gfn was populated before the pager
1396 * could evict it, eviction can not be done either. In this case the gfn is
1397 * still backed by a mfn.
1398 */
p2m_mem_paging_evict(struct domain * d,unsigned long gfn_l)1399 int p2m_mem_paging_evict(struct domain *d, unsigned long gfn_l)
1400 {
1401 struct page_info *page;
1402 p2m_type_t p2mt;
1403 p2m_access_t a;
1404 gfn_t gfn = _gfn(gfn_l);
1405 mfn_t mfn;
1406 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1407 int ret = -EBUSY;
1408
1409 gfn_lock(p2m, gfn, 0);
1410
1411 /* Get mfn */
1412 mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1413 if ( unlikely(!mfn_valid(mfn)) )
1414 goto out;
1415
1416 /* Allow only nominated pages */
1417 if ( p2mt != p2m_ram_paging_out )
1418 goto out;
1419
1420 /* Get the page so it doesn't get modified under Xen's feet */
1421 page = mfn_to_page(mfn);
1422 if ( unlikely(!get_page(page, d)) )
1423 goto out;
1424
1425 /* Check page count and type once more */
1426 if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
1427 (2 | PGC_allocated) )
1428 goto out_put;
1429
1430 if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
1431 goto out_put;
1432
1433 /* Decrement guest domain's ref count of the page */
1434 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
1435 put_page(page);
1436
1437 /* Remove mapping from p2m table */
1438 ret = p2m_set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_4K,
1439 p2m_ram_paged, a);
1440
1441 /* Clear content before returning the page to Xen */
1442 scrub_one_page(page);
1443
1444 /* Track number of paged gfns */
1445 atomic_inc(&d->paged_pages);
1446
1447 out_put:
1448 /* Put the page back so it gets freed */
1449 put_page(page);
1450
1451 out:
1452 gfn_unlock(p2m, gfn, 0);
1453 return ret;
1454 }
1455
1456 /**
1457 * p2m_mem_paging_drop_page - Tell pager to drop its reference to a paged page
1458 * @d: guest domain
1459 * @gfn: guest page to drop
1460 *
1461 * p2m_mem_paging_drop_page() will notify the pager that a paged-out gfn was
1462 * released by the guest. The pager is supposed to drop its reference of the
1463 * gfn.
1464 */
p2m_mem_paging_drop_page(struct domain * d,unsigned long gfn,p2m_type_t p2mt)1465 void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn,
1466 p2m_type_t p2mt)
1467 {
1468 vm_event_request_t req = {
1469 .reason = VM_EVENT_REASON_MEM_PAGING,
1470 .u.mem_paging.gfn = gfn
1471 };
1472
1473 /* We allow no ring in this unique case, because it won't affect
1474 * correctness of the guest execution at this point. If this is the only
1475 * page that happens to be paged-out, we'll be okay.. but it's likely the
1476 * guest will crash shortly anyways. */
1477 int rc = vm_event_claim_slot(d, d->vm_event_paging);
1478 if ( rc < 0 )
1479 return;
1480
1481 /* Send release notification to pager */
1482 req.u.mem_paging.flags = MEM_PAGING_DROP_PAGE;
1483
1484 /* Update stats unless the page hasn't yet been evicted */
1485 if ( p2mt != p2m_ram_paging_out )
1486 atomic_dec(&d->paged_pages);
1487 else
1488 /* Evict will fail now, tag this request for pager */
1489 req.u.mem_paging.flags |= MEM_PAGING_EVICT_FAIL;
1490
1491 vm_event_put_request(d, d->vm_event_paging, &req);
1492 }
1493
1494 /**
1495 * p2m_mem_paging_populate - Tell pager to populate a paged page
1496 * @d: guest domain
1497 * @gfn: guest page in paging state
1498 *
1499 * p2m_mem_paging_populate() will notify the pager that a page in any of the
1500 * paging states needs to be written back into the guest.
1501 * This function needs to be called whenever gfn_to_mfn() returns any of the p2m
1502 * paging types because the gfn may not be backed by a mfn.
1503 *
1504 * The gfn can be in any of the paging states, but the pager needs only be
1505 * notified when the gfn is in the paging-out path (paging_out or paged). This
1506 * function may be called more than once from several vcpus. If the vcpu belongs
1507 * to the guest, the vcpu must be stopped and the pager notified that the vcpu
1508 * was stopped. The pager needs to handle several requests for the same gfn.
1509 *
1510 * If the gfn is not in the paging-out path and the vcpu does not belong to the
1511 * guest, nothing needs to be done and the function assumes that a request was
1512 * already sent to the pager. In this case the caller has to try again until the
1513 * gfn is fully paged in again.
1514 */
p2m_mem_paging_populate(struct domain * d,unsigned long gfn_l)1515 void p2m_mem_paging_populate(struct domain *d, unsigned long gfn_l)
1516 {
1517 struct vcpu *v = current;
1518 vm_event_request_t req = {
1519 .reason = VM_EVENT_REASON_MEM_PAGING,
1520 .u.mem_paging.gfn = gfn_l
1521 };
1522 p2m_type_t p2mt;
1523 p2m_access_t a;
1524 gfn_t gfn = _gfn(gfn_l);
1525 mfn_t mfn;
1526 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1527
1528 /* We're paging. There should be a ring */
1529 int rc = vm_event_claim_slot(d, d->vm_event_paging);
1530 if ( rc == -ENOSYS )
1531 {
1532 gdprintk(XENLOG_ERR, "Domain %hu paging gfn %lx yet no ring "
1533 "in place\n", d->domain_id, gfn_l);
1534 /* Prevent the vcpu from faulting repeatedly on the same gfn */
1535 if ( v->domain == d )
1536 vcpu_pause_nosync(v);
1537 domain_crash(d);
1538 return;
1539 }
1540 else if ( rc < 0 )
1541 return;
1542
1543 /* Fix p2m mapping */
1544 gfn_lock(p2m, gfn, 0);
1545 mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1546 /* Allow only nominated or evicted pages to enter page-in path */
1547 if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged )
1548 {
1549 /* Evict will fail now, tag this request for pager */
1550 if ( p2mt == p2m_ram_paging_out )
1551 req.u.mem_paging.flags |= MEM_PAGING_EVICT_FAIL;
1552
1553 p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_in, a);
1554 }
1555 gfn_unlock(p2m, gfn, 0);
1556
1557 /* Pause domain if request came from guest and gfn has paging type */
1558 if ( p2m_is_paging(p2mt) && v->domain == d )
1559 {
1560 vm_event_vcpu_pause(v);
1561 req.flags |= VM_EVENT_FLAG_VCPU_PAUSED;
1562 }
1563 /* No need to inform pager if the gfn is not in the page-out path */
1564 else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
1565 {
1566 /* gfn is already on its way back and vcpu is not paused */
1567 vm_event_cancel_slot(d, d->vm_event_paging);
1568 return;
1569 }
1570
1571 /* Send request to pager */
1572 req.u.mem_paging.p2mt = p2mt;
1573 req.vcpu_id = v->vcpu_id;
1574
1575 vm_event_put_request(d, d->vm_event_paging, &req);
1576 }
1577
1578 /**
1579 * p2m_mem_paging_prep - Allocate a new page for the guest
1580 * @d: guest domain
1581 * @gfn: guest page in paging state
1582 *
1583 * p2m_mem_paging_prep() will allocate a new page for the guest if the gfn is
1584 * not backed by a mfn. It is called by the pager.
1585 * It is required that the gfn was already populated. The gfn may already have a
1586 * mfn if populate was called for gfn which was nominated but not evicted. In
1587 * this case only the p2mt needs to be forwarded.
1588 */
p2m_mem_paging_prep(struct domain * d,unsigned long gfn_l,uint64_t buffer)1589 int p2m_mem_paging_prep(struct domain *d, unsigned long gfn_l, uint64_t buffer)
1590 {
1591 struct page_info *page;
1592 p2m_type_t p2mt;
1593 p2m_access_t a;
1594 gfn_t gfn = _gfn(gfn_l);
1595 mfn_t mfn;
1596 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1597 int ret, page_extant = 1;
1598 const void *user_ptr = (const void *) buffer;
1599
1600 if ( user_ptr )
1601 /* Sanity check the buffer and bail out early if trouble */
1602 if ( (buffer & (PAGE_SIZE - 1)) ||
1603 (!access_ok(user_ptr, PAGE_SIZE)) )
1604 return -EINVAL;
1605
1606 gfn_lock(p2m, gfn, 0);
1607
1608 mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1609
1610 ret = -ENOENT;
1611 /* Allow missing pages */
1612 if ( (p2mt != p2m_ram_paging_in) && (p2mt != p2m_ram_paged) )
1613 goto out;
1614
1615 /* Allocate a page if the gfn does not have one yet */
1616 if ( !mfn_valid(mfn) )
1617 {
1618 /* If the user did not provide a buffer, we disallow */
1619 ret = -EINVAL;
1620 if ( unlikely(user_ptr == NULL) )
1621 goto out;
1622 /* Get a free page */
1623 ret = -ENOMEM;
1624 page = alloc_domheap_page(p2m->domain, 0);
1625 if ( unlikely(page == NULL) )
1626 goto out;
1627 mfn = page_to_mfn(page);
1628 page_extant = 0;
1629 }
1630
1631 /* If we were given a buffer, now is the time to use it */
1632 if ( !page_extant && user_ptr )
1633 {
1634 void *guest_map;
1635 int rc;
1636
1637 ASSERT( mfn_valid(mfn) );
1638 guest_map = map_domain_page(mfn);
1639 rc = copy_from_user(guest_map, user_ptr, PAGE_SIZE);
1640 unmap_domain_page(guest_map);
1641 if ( rc )
1642 {
1643 gdprintk(XENLOG_ERR, "Failed to load paging-in gfn %lx domain %u "
1644 "bytes left %d\n", gfn_l, d->domain_id, rc);
1645 ret = -EFAULT;
1646 put_page(page); /* Don't leak pages */
1647 goto out;
1648 }
1649 }
1650
1651 /* Make the page already guest-accessible. If the pager still has a
1652 * pending resume operation, it will be idempotent p2m entry-wise,
1653 * but will unpause the vcpu */
1654 ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
1655 paging_mode_log_dirty(d) ? p2m_ram_logdirty
1656 : p2m_ram_rw, a);
1657 set_gpfn_from_mfn(mfn_x(mfn), gfn_l);
1658
1659 if ( !page_extant )
1660 atomic_dec(&d->paged_pages);
1661
1662 out:
1663 gfn_unlock(p2m, gfn, 0);
1664 return ret;
1665 }
1666
1667 /**
1668 * p2m_mem_paging_resume - Resume guest gfn
1669 * @d: guest domain
1670 * @rsp: vm_event response received
1671 *
1672 * p2m_mem_paging_resume() will forward the p2mt of a gfn to ram_rw. It is
1673 * called by the pager.
1674 *
1675 * The gfn was previously either evicted and populated, or nominated and
1676 * populated. If the page was evicted the p2mt will be p2m_ram_paging_in. If
1677 * the page was just nominated the p2mt will be p2m_ram_paging_in_start because
1678 * the pager did not call p2m_mem_paging_prep().
1679 *
1680 * If the gfn was dropped the vcpu needs to be unpaused.
1681 */
1682
p2m_mem_paging_resume(struct domain * d,vm_event_response_t * rsp)1683 void p2m_mem_paging_resume(struct domain *d, vm_event_response_t *rsp)
1684 {
1685 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1686 p2m_type_t p2mt;
1687 p2m_access_t a;
1688 mfn_t mfn;
1689
1690 /* Fix p2m entry if the page was not dropped */
1691 if ( !(rsp->u.mem_paging.flags & MEM_PAGING_DROP_PAGE) )
1692 {
1693 gfn_t gfn = _gfn(rsp->u.mem_access.gfn);
1694
1695 gfn_lock(p2m, gfn, 0);
1696 mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1697 /*
1698 * Allow only pages which were prepared properly, or pages which
1699 * were nominated but not evicted.
1700 */
1701 if ( mfn_valid(mfn) && (p2mt == p2m_ram_paging_in) )
1702 {
1703 p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
1704 paging_mode_log_dirty(d) ? p2m_ram_logdirty :
1705 p2m_ram_rw, a);
1706 set_gpfn_from_mfn(mfn_x(mfn), gfn_x(gfn));
1707 }
1708 gfn_unlock(p2m, gfn, 0);
1709 }
1710 }
1711
p2m_altp2m_check(struct vcpu * v,uint16_t idx)1712 void p2m_altp2m_check(struct vcpu *v, uint16_t idx)
1713 {
1714 if ( altp2m_active(v->domain) )
1715 p2m_switch_vcpu_altp2m_by_id(v, idx);
1716 }
1717
1718 static struct p2m_domain *
p2m_getlru_nestedp2m(struct domain * d,struct p2m_domain * p2m)1719 p2m_getlru_nestedp2m(struct domain *d, struct p2m_domain *p2m)
1720 {
1721 struct list_head *lru_list = &p2m_get_hostp2m(d)->np2m_list;
1722
1723 ASSERT(!list_empty(lru_list));
1724
1725 if ( p2m == NULL )
1726 p2m = list_entry(lru_list->prev, struct p2m_domain, np2m_list);
1727
1728 list_move(&p2m->np2m_list, lru_list);
1729
1730 return p2m;
1731 }
1732
1733 static void
p2m_flush_table_locked(struct p2m_domain * p2m)1734 p2m_flush_table_locked(struct p2m_domain *p2m)
1735 {
1736 struct page_info *top, *pg;
1737 struct domain *d = p2m->domain;
1738 mfn_t mfn;
1739
1740 ASSERT(p2m_locked_by_me(p2m));
1741
1742 /*
1743 * "Host" p2m tables can have shared entries &c that need a bit more care
1744 * when discarding them.
1745 */
1746 ASSERT(!p2m_is_hostp2m(p2m));
1747 /* Nested p2m's do not do pod, hence the asserts (and no pod lock)*/
1748 ASSERT(page_list_empty(&p2m->pod.super));
1749 ASSERT(page_list_empty(&p2m->pod.single));
1750
1751 /* No need to flush if it's already empty */
1752 if ( p2m_is_nestedp2m(p2m) && p2m->np2m_base == P2M_BASE_EADDR )
1753 return;
1754
1755 /* This is no longer a valid nested p2m for any address space */
1756 p2m->np2m_base = P2M_BASE_EADDR;
1757 p2m->np2m_generation++;
1758
1759 /* Make sure nobody else is using this p2m table */
1760 nestedhvm_vmcx_flushtlb(p2m);
1761
1762 /* Zap the top level of the trie */
1763 mfn = pagetable_get_mfn(p2m_get_pagetable(p2m));
1764 clear_domain_page(mfn);
1765
1766 /* Free the rest of the trie pages back to the paging pool */
1767 top = mfn_to_page(mfn);
1768 while ( (pg = page_list_remove_head(&p2m->pages)) )
1769 {
1770 if ( pg != top )
1771 d->arch.paging.free_page(d, pg);
1772 }
1773 page_list_add(top, &p2m->pages);
1774 }
1775
1776 /* Reset this p2m table to be empty */
1777 static void
p2m_flush_table(struct p2m_domain * p2m)1778 p2m_flush_table(struct p2m_domain *p2m)
1779 {
1780 p2m_lock(p2m);
1781 p2m_flush_table_locked(p2m);
1782 p2m_unlock(p2m);
1783 }
1784
1785 void
p2m_flush(struct vcpu * v,struct p2m_domain * p2m)1786 p2m_flush(struct vcpu *v, struct p2m_domain *p2m)
1787 {
1788 ASSERT(v->domain == p2m->domain);
1789 vcpu_nestedhvm(v).nv_p2m = NULL;
1790 p2m_flush_table(p2m);
1791 hvm_asid_flush_vcpu(v);
1792 }
1793
1794 void
p2m_flush_nestedp2m(struct domain * d)1795 p2m_flush_nestedp2m(struct domain *d)
1796 {
1797 int i;
1798 for ( i = 0; i < MAX_NESTEDP2M; i++ )
1799 p2m_flush_table(d->arch.nested_p2m[i]);
1800 }
1801
np2m_flush_base(struct vcpu * v,unsigned long np2m_base)1802 void np2m_flush_base(struct vcpu *v, unsigned long np2m_base)
1803 {
1804 struct domain *d = v->domain;
1805 struct p2m_domain *p2m;
1806 unsigned int i;
1807
1808 np2m_base &= ~(0xfffull);
1809
1810 nestedp2m_lock(d);
1811 for ( i = 0; i < MAX_NESTEDP2M; i++ )
1812 {
1813 p2m = d->arch.nested_p2m[i];
1814 p2m_lock(p2m);
1815 if ( p2m->np2m_base == np2m_base )
1816 {
1817 p2m_flush_table_locked(p2m);
1818 p2m_unlock(p2m);
1819 break;
1820 }
1821 p2m_unlock(p2m);
1822 }
1823 nestedp2m_unlock(d);
1824 }
1825
assign_np2m(struct vcpu * v,struct p2m_domain * p2m)1826 static void assign_np2m(struct vcpu *v, struct p2m_domain *p2m)
1827 {
1828 struct nestedvcpu *nv = &vcpu_nestedhvm(v);
1829 struct domain *d = v->domain;
1830
1831 /* Bring this np2m to the top of the LRU list */
1832 p2m_getlru_nestedp2m(d, p2m);
1833
1834 nv->nv_flushp2m = 0;
1835 nv->nv_p2m = p2m;
1836 nv->np2m_generation = p2m->np2m_generation;
1837 cpumask_set_cpu(v->processor, p2m->dirty_cpumask);
1838 }
1839
nvcpu_flush(struct vcpu * v)1840 static void nvcpu_flush(struct vcpu *v)
1841 {
1842 hvm_asid_flush_vcpu(v);
1843 vcpu_nestedhvm(v).stale_np2m = true;
1844 }
1845
1846 struct p2m_domain *
p2m_get_nestedp2m_locked(struct vcpu * v)1847 p2m_get_nestedp2m_locked(struct vcpu *v)
1848 {
1849 struct nestedvcpu *nv = &vcpu_nestedhvm(v);
1850 struct domain *d = v->domain;
1851 struct p2m_domain *p2m;
1852 uint64_t np2m_base = nhvm_vcpu_p2m_base(v);
1853 unsigned int i;
1854 bool needs_flush = true;
1855
1856 /* Mask out low bits; this avoids collisions with P2M_BASE_EADDR */
1857 np2m_base &= ~(0xfffull);
1858
1859 if (nv->nv_flushp2m && nv->nv_p2m) {
1860 nv->nv_p2m = NULL;
1861 }
1862
1863 nestedp2m_lock(d);
1864 p2m = nv->nv_p2m;
1865 if ( p2m )
1866 {
1867 p2m_lock(p2m);
1868 if ( p2m->np2m_base == np2m_base )
1869 {
1870 /* Check if np2m was flushed just before the lock */
1871 if ( nv->np2m_generation == p2m->np2m_generation )
1872 needs_flush = false;
1873 /* np2m is up-to-date */
1874 goto found;
1875 }
1876 else if ( p2m->np2m_base != P2M_BASE_EADDR )
1877 {
1878 /* vCPU is switching from some other valid np2m */
1879 cpumask_clear_cpu(v->processor, p2m->dirty_cpumask);
1880 }
1881 p2m_unlock(p2m);
1882 }
1883
1884 /* Share a np2m if possible */
1885 for ( i = 0; i < MAX_NESTEDP2M; i++ )
1886 {
1887 p2m = d->arch.nested_p2m[i];
1888 p2m_lock(p2m);
1889
1890 if ( p2m->np2m_base == np2m_base )
1891 goto found;
1892
1893 p2m_unlock(p2m);
1894 }
1895
1896 /* All p2m's are or were in use. Take the least recent used one,
1897 * flush it and reuse. */
1898 p2m = p2m_getlru_nestedp2m(d, NULL);
1899 p2m_flush_table(p2m);
1900 p2m_lock(p2m);
1901
1902 found:
1903 if ( needs_flush )
1904 nvcpu_flush(v);
1905 p2m->np2m_base = np2m_base;
1906 assign_np2m(v, p2m);
1907 nestedp2m_unlock(d);
1908
1909 return p2m;
1910 }
1911
p2m_get_nestedp2m(struct vcpu * v)1912 struct p2m_domain *p2m_get_nestedp2m(struct vcpu *v)
1913 {
1914 struct p2m_domain *p2m = p2m_get_nestedp2m_locked(v);
1915 p2m_unlock(p2m);
1916
1917 return p2m;
1918 }
1919
1920 struct p2m_domain *
p2m_get_p2m(struct vcpu * v)1921 p2m_get_p2m(struct vcpu *v)
1922 {
1923 if (!nestedhvm_is_n2(v))
1924 return p2m_get_hostp2m(v->domain);
1925
1926 return p2m_get_nestedp2m(v);
1927 }
1928
np2m_schedule(int dir)1929 void np2m_schedule(int dir)
1930 {
1931 struct vcpu *curr = current;
1932 struct nestedvcpu *nv = &vcpu_nestedhvm(curr);
1933 struct p2m_domain *p2m;
1934
1935 ASSERT(dir == NP2M_SCHEDLE_IN || dir == NP2M_SCHEDLE_OUT);
1936
1937 if ( !nestedhvm_enabled(curr->domain) ||
1938 !nestedhvm_vcpu_in_guestmode(curr) ||
1939 !nestedhvm_paging_mode_hap(curr) )
1940 return;
1941
1942 p2m = nv->nv_p2m;
1943 if ( p2m )
1944 {
1945 bool np2m_valid;
1946
1947 p2m_lock(p2m);
1948 np2m_valid = p2m->np2m_base == nhvm_vcpu_p2m_base(curr) &&
1949 nv->np2m_generation == p2m->np2m_generation;
1950 if ( dir == NP2M_SCHEDLE_OUT && np2m_valid )
1951 {
1952 /*
1953 * The np2m is up to date but this vCPU will no longer use it,
1954 * which means there are no reasons to send a flush IPI.
1955 */
1956 cpumask_clear_cpu(curr->processor, p2m->dirty_cpumask);
1957 }
1958 else if ( dir == NP2M_SCHEDLE_IN )
1959 {
1960 if ( !np2m_valid )
1961 {
1962 /* This vCPU's np2m was flushed while it was not runnable */
1963 hvm_asid_flush_core();
1964 vcpu_nestedhvm(curr).nv_p2m = NULL;
1965 }
1966 else
1967 cpumask_set_cpu(curr->processor, p2m->dirty_cpumask);
1968 }
1969 p2m_unlock(p2m);
1970 }
1971 }
1972
paging_gva_to_gfn(struct vcpu * v,unsigned long va,uint32_t * pfec)1973 unsigned long paging_gva_to_gfn(struct vcpu *v,
1974 unsigned long va,
1975 uint32_t *pfec)
1976 {
1977 struct p2m_domain *hostp2m = p2m_get_hostp2m(v->domain);
1978 const struct paging_mode *hostmode = paging_get_hostmode(v);
1979
1980 if ( is_hvm_vcpu(v) && paging_mode_hap(v->domain) && nestedhvm_is_n2(v) )
1981 {
1982 unsigned long l2_gfn, l1_gfn;
1983 struct p2m_domain *p2m;
1984 const struct paging_mode *mode;
1985 uint8_t l1_p2ma;
1986 unsigned int l1_page_order;
1987 int rv;
1988
1989 /* translate l2 guest va into l2 guest gfn */
1990 p2m = p2m_get_nestedp2m(v);
1991 mode = paging_get_nestedmode(v);
1992 l2_gfn = mode->gva_to_gfn(v, p2m, va, pfec);
1993
1994 if ( l2_gfn == gfn_x(INVALID_GFN) )
1995 return gfn_x(INVALID_GFN);
1996
1997 /* translate l2 guest gfn into l1 guest gfn */
1998 rv = nestedhap_walk_L1_p2m(v, l2_gfn, &l1_gfn, &l1_page_order, &l1_p2ma,
1999 1,
2000 !!(*pfec & PFEC_write_access),
2001 !!(*pfec & PFEC_insn_fetch));
2002
2003 if ( rv != NESTEDHVM_PAGEFAULT_DONE )
2004 return gfn_x(INVALID_GFN);
2005
2006 /*
2007 * Sanity check that l1_gfn can be used properly as a 4K mapping, even
2008 * if it mapped by a nested superpage.
2009 */
2010 ASSERT((l2_gfn & ((1ul << l1_page_order) - 1)) ==
2011 (l1_gfn & ((1ul << l1_page_order) - 1)));
2012
2013 return l1_gfn;
2014 }
2015
2016 return hostmode->gva_to_gfn(v, hostp2m, va, pfec);
2017 }
2018
2019 /*
2020 * If the map is non-NULL, we leave this function having acquired an extra ref
2021 * on mfn_to_page(*mfn). In all cases, *pfec contains appropriate
2022 * synthetic/structure PFEC_* bits.
2023 */
map_domain_gfn(struct p2m_domain * p2m,gfn_t gfn,mfn_t * mfn,p2m_type_t * p2mt,p2m_query_t q,uint32_t * pfec)2024 void *map_domain_gfn(struct p2m_domain *p2m, gfn_t gfn, mfn_t *mfn,
2025 p2m_type_t *p2mt, p2m_query_t q, uint32_t *pfec)
2026 {
2027 struct page_info *page;
2028
2029 if ( !gfn_valid(p2m->domain, gfn) )
2030 {
2031 *pfec = PFEC_reserved_bit | PFEC_page_present;
2032 return NULL;
2033 }
2034
2035 /* Translate the gfn, unsharing if shared. */
2036 page = p2m_get_page_from_gfn(p2m, gfn, p2mt, NULL, q);
2037 if ( p2m_is_paging(*p2mt) )
2038 {
2039 ASSERT(p2m_is_hostp2m(p2m));
2040 if ( page )
2041 put_page(page);
2042 p2m_mem_paging_populate(p2m->domain, gfn_x(gfn));
2043 *pfec = PFEC_page_paged;
2044 return NULL;
2045 }
2046 if ( p2m_is_shared(*p2mt) )
2047 {
2048 if ( page )
2049 put_page(page);
2050 *pfec = PFEC_page_shared;
2051 return NULL;
2052 }
2053 if ( !page )
2054 {
2055 *pfec = 0;
2056 return NULL;
2057 }
2058
2059 *pfec = PFEC_page_present;
2060 *mfn = page_to_mfn(page);
2061 ASSERT(mfn_valid(*mfn));
2062
2063 return map_domain_page(*mfn);
2064 }
2065
mmio_order(const struct domain * d,unsigned long start_fn,unsigned long nr)2066 static unsigned int mmio_order(const struct domain *d,
2067 unsigned long start_fn, unsigned long nr)
2068 {
2069 /*
2070 * Note that the !iommu_use_hap_pt() here has three effects:
2071 * - cover iommu_{,un}map_page() not having an "order" input yet,
2072 * - exclude shadow mode (which doesn't support large MMIO mappings),
2073 * - exclude PV guests, should execution reach this code for such.
2074 * So be careful when altering this.
2075 */
2076 if ( !need_iommu(d) || !iommu_use_hap_pt(d) ||
2077 (start_fn & ((1UL << PAGE_ORDER_2M) - 1)) || !(nr >> PAGE_ORDER_2M) )
2078 return PAGE_ORDER_4K;
2079
2080 if ( 0 /*
2081 * Don't use 1Gb pages, to limit the iteration count in
2082 * set_typed_p2m_entry() when it needs to zap M2P entries
2083 * for a RAM range.
2084 */ &&
2085 !(start_fn & ((1UL << PAGE_ORDER_1G) - 1)) && (nr >> PAGE_ORDER_1G) &&
2086 hap_has_1gb )
2087 return PAGE_ORDER_1G;
2088
2089 if ( hap_has_2mb )
2090 return PAGE_ORDER_2M;
2091
2092 return PAGE_ORDER_4K;
2093 }
2094
2095 #define MAP_MMIO_MAX_ITER 64 /* pretty arbitrary */
2096
map_mmio_regions(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)2097 int map_mmio_regions(struct domain *d,
2098 gfn_t start_gfn,
2099 unsigned long nr,
2100 mfn_t mfn)
2101 {
2102 int ret = 0;
2103 unsigned long i;
2104 unsigned int iter, order;
2105
2106 if ( !paging_mode_translate(d) )
2107 return 0;
2108
2109 for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER;
2110 i += 1UL << order, ++iter )
2111 {
2112 /* OR'ing gfn and mfn values will return an order suitable to both. */
2113 for ( order = mmio_order(d, (gfn_x(start_gfn) + i) | (mfn_x(mfn) + i), nr - i); ;
2114 order = ret - 1 )
2115 {
2116 ret = set_mmio_p2m_entry(d, gfn_x(start_gfn) + i,
2117 mfn_add(mfn, i), order,
2118 p2m_get_hostp2m(d)->default_access);
2119 if ( ret <= 0 )
2120 break;
2121 ASSERT(ret <= order);
2122 }
2123 if ( ret < 0 )
2124 break;
2125 }
2126
2127 return i == nr ? 0 : i ?: ret;
2128 }
2129
unmap_mmio_regions(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)2130 int unmap_mmio_regions(struct domain *d,
2131 gfn_t start_gfn,
2132 unsigned long nr,
2133 mfn_t mfn)
2134 {
2135 int ret = 0;
2136 unsigned long i;
2137 unsigned int iter, order;
2138
2139 if ( !paging_mode_translate(d) )
2140 return 0;
2141
2142 for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER;
2143 i += 1UL << order, ++iter )
2144 {
2145 /* OR'ing gfn and mfn values will return an order suitable to both. */
2146 for ( order = mmio_order(d, (gfn_x(start_gfn) + i) | (mfn_x(mfn) + i), nr - i); ;
2147 order = ret - 1 )
2148 {
2149 ret = clear_mmio_p2m_entry(d, gfn_x(start_gfn) + i,
2150 mfn_add(mfn, i), order);
2151 if ( ret <= 0 )
2152 break;
2153 ASSERT(ret <= order);
2154 }
2155 if ( ret < 0 )
2156 break;
2157 }
2158
2159 return i == nr ? 0 : i ?: ret;
2160 }
2161
p2m_switch_vcpu_altp2m_by_id(struct vcpu * v,unsigned int idx)2162 bool_t p2m_switch_vcpu_altp2m_by_id(struct vcpu *v, unsigned int idx)
2163 {
2164 struct domain *d = v->domain;
2165 bool_t rc = 0;
2166
2167 if ( idx >= MAX_ALTP2M )
2168 return rc;
2169
2170 altp2m_list_lock(d);
2171
2172 if ( d->arch.altp2m_eptp[idx] != mfn_x(INVALID_MFN) )
2173 {
2174 if ( idx != vcpu_altp2m(v).p2midx )
2175 {
2176 atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
2177 vcpu_altp2m(v).p2midx = idx;
2178 atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
2179 altp2m_vcpu_update_p2m(v);
2180 }
2181 rc = 1;
2182 }
2183
2184 altp2m_list_unlock(d);
2185 return rc;
2186 }
2187
2188 /*
2189 * If the fault is for a not present entry:
2190 * if the entry in the host p2m has a valid mfn, copy it and retry
2191 * else indicate that outer handler should handle fault
2192 *
2193 * If the fault is for a present entry:
2194 * indicate that outer handler should handle fault
2195 */
2196
p2m_altp2m_lazy_copy(struct vcpu * v,paddr_t gpa,unsigned long gla,struct npfec npfec,struct p2m_domain ** ap2m)2197 bool_t p2m_altp2m_lazy_copy(struct vcpu *v, paddr_t gpa,
2198 unsigned long gla, struct npfec npfec,
2199 struct p2m_domain **ap2m)
2200 {
2201 struct p2m_domain *hp2m = p2m_get_hostp2m(v->domain);
2202 p2m_type_t p2mt;
2203 p2m_access_t p2ma;
2204 unsigned int page_order;
2205 gfn_t gfn = _gfn(paddr_to_pfn(gpa));
2206 unsigned long mask;
2207 mfn_t mfn;
2208 int rv;
2209
2210 *ap2m = p2m_get_altp2m(v);
2211
2212 mfn = get_gfn_type_access(*ap2m, gfn_x(gfn), &p2mt, &p2ma,
2213 0, &page_order);
2214 __put_gfn(*ap2m, gfn_x(gfn));
2215
2216 if ( !mfn_eq(mfn, INVALID_MFN) )
2217 return 0;
2218
2219 mfn = get_gfn_type_access(hp2m, gfn_x(gfn), &p2mt, &p2ma,
2220 P2M_ALLOC, &page_order);
2221 __put_gfn(hp2m, gfn_x(gfn));
2222
2223 if ( mfn_eq(mfn, INVALID_MFN) )
2224 return 0;
2225
2226 p2m_lock(*ap2m);
2227
2228 /*
2229 * If this is a superpage mapping, round down both frame numbers
2230 * to the start of the superpage.
2231 */
2232 mask = ~((1UL << page_order) - 1);
2233 mfn = _mfn(mfn_x(mfn) & mask);
2234 gfn = _gfn(gfn_x(gfn) & mask);
2235
2236 rv = p2m_set_entry(*ap2m, gfn, mfn, page_order, p2mt, p2ma);
2237 p2m_unlock(*ap2m);
2238
2239 if ( rv )
2240 {
2241 gdprintk(XENLOG_ERR,
2242 "failed to set entry for %#"PRIx64" -> %#"PRIx64" p2m %#"PRIx64"\n",
2243 gfn_x(gfn), mfn_x(mfn), (unsigned long)*ap2m);
2244 domain_crash(hp2m->domain);
2245 }
2246
2247 return 1;
2248 }
2249
p2m_flush_altp2m(struct domain * d)2250 void p2m_flush_altp2m(struct domain *d)
2251 {
2252 unsigned int i;
2253
2254 altp2m_list_lock(d);
2255
2256 for ( i = 0; i < MAX_ALTP2M; i++ )
2257 {
2258 p2m_flush_table(d->arch.altp2m_p2m[i]);
2259 /* Uninit and reinit ept to force TLB shootdown */
2260 ept_p2m_uninit(d->arch.altp2m_p2m[i]);
2261 ept_p2m_init(d->arch.altp2m_p2m[i]);
2262 d->arch.altp2m_eptp[i] = mfn_x(INVALID_MFN);
2263 }
2264
2265 altp2m_list_unlock(d);
2266 }
2267
p2m_init_altp2m_by_id(struct domain * d,unsigned int idx)2268 int p2m_init_altp2m_by_id(struct domain *d, unsigned int idx)
2269 {
2270 int rc = -EINVAL;
2271
2272 if ( idx >= MAX_ALTP2M )
2273 return rc;
2274
2275 altp2m_list_lock(d);
2276
2277 if ( d->arch.altp2m_eptp[idx] == mfn_x(INVALID_MFN) )
2278 {
2279 p2m_init_altp2m_ept(d, idx);
2280 rc = 0;
2281 }
2282
2283 altp2m_list_unlock(d);
2284 return rc;
2285 }
2286
p2m_init_next_altp2m(struct domain * d,uint16_t * idx)2287 int p2m_init_next_altp2m(struct domain *d, uint16_t *idx)
2288 {
2289 int rc = -EINVAL;
2290 unsigned int i;
2291
2292 altp2m_list_lock(d);
2293
2294 for ( i = 0; i < MAX_ALTP2M; i++ )
2295 {
2296 if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) )
2297 continue;
2298
2299 p2m_init_altp2m_ept(d, i);
2300 *idx = i;
2301 rc = 0;
2302
2303 break;
2304 }
2305
2306 altp2m_list_unlock(d);
2307 return rc;
2308 }
2309
p2m_destroy_altp2m_by_id(struct domain * d,unsigned int idx)2310 int p2m_destroy_altp2m_by_id(struct domain *d, unsigned int idx)
2311 {
2312 struct p2m_domain *p2m;
2313 int rc = -EBUSY;
2314
2315 if ( !idx || idx >= MAX_ALTP2M )
2316 return rc;
2317
2318 domain_pause_except_self(d);
2319
2320 altp2m_list_lock(d);
2321
2322 if ( d->arch.altp2m_eptp[idx] != mfn_x(INVALID_MFN) )
2323 {
2324 p2m = d->arch.altp2m_p2m[idx];
2325
2326 if ( !_atomic_read(p2m->active_vcpus) )
2327 {
2328 p2m_flush_table(d->arch.altp2m_p2m[idx]);
2329 /* Uninit and reinit ept to force TLB shootdown */
2330 ept_p2m_uninit(d->arch.altp2m_p2m[idx]);
2331 ept_p2m_init(d->arch.altp2m_p2m[idx]);
2332 d->arch.altp2m_eptp[idx] = mfn_x(INVALID_MFN);
2333 rc = 0;
2334 }
2335 }
2336
2337 altp2m_list_unlock(d);
2338
2339 domain_unpause_except_self(d);
2340
2341 return rc;
2342 }
2343
p2m_switch_domain_altp2m_by_id(struct domain * d,unsigned int idx)2344 int p2m_switch_domain_altp2m_by_id(struct domain *d, unsigned int idx)
2345 {
2346 struct vcpu *v;
2347 int rc = -EINVAL;
2348
2349 if ( idx >= MAX_ALTP2M )
2350 return rc;
2351
2352 domain_pause_except_self(d);
2353
2354 altp2m_list_lock(d);
2355
2356 if ( d->arch.altp2m_eptp[idx] != mfn_x(INVALID_MFN) )
2357 {
2358 for_each_vcpu( d, v )
2359 if ( idx != vcpu_altp2m(v).p2midx )
2360 {
2361 atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
2362 vcpu_altp2m(v).p2midx = idx;
2363 atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
2364 altp2m_vcpu_update_p2m(v);
2365 }
2366
2367 rc = 0;
2368 }
2369
2370 altp2m_list_unlock(d);
2371
2372 domain_unpause_except_self(d);
2373
2374 return rc;
2375 }
2376
p2m_change_altp2m_gfn(struct domain * d,unsigned int idx,gfn_t old_gfn,gfn_t new_gfn)2377 int p2m_change_altp2m_gfn(struct domain *d, unsigned int idx,
2378 gfn_t old_gfn, gfn_t new_gfn)
2379 {
2380 struct p2m_domain *hp2m, *ap2m;
2381 p2m_access_t a;
2382 p2m_type_t t;
2383 mfn_t mfn;
2384 unsigned int page_order;
2385 int rc = -EINVAL;
2386
2387 if ( idx >= MAX_ALTP2M || d->arch.altp2m_eptp[idx] == mfn_x(INVALID_MFN) )
2388 return rc;
2389
2390 hp2m = p2m_get_hostp2m(d);
2391 ap2m = d->arch.altp2m_p2m[idx];
2392
2393 p2m_lock(hp2m);
2394 p2m_lock(ap2m);
2395
2396 mfn = ap2m->get_entry(ap2m, old_gfn, &t, &a, 0, NULL, NULL);
2397
2398 if ( gfn_eq(new_gfn, INVALID_GFN) )
2399 {
2400 if ( mfn_valid(mfn) )
2401 p2m_remove_page(ap2m, gfn_x(old_gfn), mfn_x(mfn), PAGE_ORDER_4K);
2402 rc = 0;
2403 goto out;
2404 }
2405
2406 /* Check host p2m if no valid entry in alternate */
2407 if ( !mfn_valid(mfn) )
2408 {
2409 mfn = __get_gfn_type_access(hp2m, gfn_x(old_gfn), &t, &a,
2410 P2M_ALLOC, &page_order, 0);
2411
2412 if ( !mfn_valid(mfn) || t != p2m_ram_rw )
2413 goto out;
2414
2415 /* If this is a superpage, copy that first */
2416 if ( page_order != PAGE_ORDER_4K )
2417 {
2418 gfn_t gfn;
2419 unsigned long mask;
2420
2421 mask = ~((1UL << page_order) - 1);
2422 gfn = _gfn(gfn_x(old_gfn) & mask);
2423 mfn = _mfn(mfn_x(mfn) & mask);
2424
2425 if ( ap2m->set_entry(ap2m, gfn, mfn, page_order, t, a, 1) )
2426 goto out;
2427 }
2428 }
2429
2430 mfn = ap2m->get_entry(ap2m, new_gfn, &t, &a, 0, NULL, NULL);
2431
2432 if ( !mfn_valid(mfn) )
2433 mfn = hp2m->get_entry(hp2m, new_gfn, &t, &a, 0, NULL, NULL);
2434
2435 /* Note: currently it is not safe to remap to a shared entry */
2436 if ( !mfn_valid(mfn) || (t != p2m_ram_rw) )
2437 goto out;
2438
2439 if ( !ap2m->set_entry(ap2m, old_gfn, mfn, PAGE_ORDER_4K, t, a,
2440 (current->domain != d)) )
2441 {
2442 rc = 0;
2443
2444 if ( gfn_x(new_gfn) < ap2m->min_remapped_gfn )
2445 ap2m->min_remapped_gfn = gfn_x(new_gfn);
2446 if ( gfn_x(new_gfn) > ap2m->max_remapped_gfn )
2447 ap2m->max_remapped_gfn = gfn_x(new_gfn);
2448 }
2449
2450 out:
2451 p2m_unlock(ap2m);
2452 p2m_unlock(hp2m);
2453 return rc;
2454 }
2455
p2m_reset_altp2m(struct p2m_domain * p2m)2456 static void p2m_reset_altp2m(struct p2m_domain *p2m)
2457 {
2458 p2m_flush_table(p2m);
2459 /* Uninit and reinit ept to force TLB shootdown */
2460 ept_p2m_uninit(p2m);
2461 ept_p2m_init(p2m);
2462 p2m->min_remapped_gfn = gfn_x(INVALID_GFN);
2463 p2m->max_remapped_gfn = 0;
2464 }
2465
p2m_altp2m_propagate_change(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int page_order,p2m_type_t p2mt,p2m_access_t p2ma)2466 void p2m_altp2m_propagate_change(struct domain *d, gfn_t gfn,
2467 mfn_t mfn, unsigned int page_order,
2468 p2m_type_t p2mt, p2m_access_t p2ma)
2469 {
2470 struct p2m_domain *p2m;
2471 p2m_access_t a;
2472 p2m_type_t t;
2473 mfn_t m;
2474 unsigned int i;
2475 unsigned int reset_count = 0;
2476 unsigned int last_reset_idx = ~0;
2477
2478 if ( !altp2m_active(d) )
2479 return;
2480
2481 altp2m_list_lock(d);
2482
2483 for ( i = 0; i < MAX_ALTP2M; i++ )
2484 {
2485 if ( d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) )
2486 continue;
2487
2488 p2m = d->arch.altp2m_p2m[i];
2489 m = get_gfn_type_access(p2m, gfn_x(gfn), &t, &a, 0, NULL);
2490
2491 /* Check for a dropped page that may impact this altp2m */
2492 if ( mfn_eq(mfn, INVALID_MFN) &&
2493 gfn_x(gfn) >= p2m->min_remapped_gfn &&
2494 gfn_x(gfn) <= p2m->max_remapped_gfn )
2495 {
2496 if ( !reset_count++ )
2497 {
2498 p2m_reset_altp2m(p2m);
2499 last_reset_idx = i;
2500 }
2501 else
2502 {
2503 /* At least 2 altp2m's impacted, so reset everything */
2504 __put_gfn(p2m, gfn_x(gfn));
2505
2506 for ( i = 0; i < MAX_ALTP2M; i++ )
2507 {
2508 if ( i == last_reset_idx ||
2509 d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) )
2510 continue;
2511
2512 p2m = d->arch.altp2m_p2m[i];
2513 p2m_lock(p2m);
2514 p2m_reset_altp2m(p2m);
2515 p2m_unlock(p2m);
2516 }
2517
2518 goto out;
2519 }
2520 }
2521 else if ( !mfn_eq(m, INVALID_MFN) )
2522 p2m_set_entry(p2m, gfn, mfn, page_order, p2mt, p2ma);
2523
2524 __put_gfn(p2m, gfn_x(gfn));
2525 }
2526
2527 out:
2528 altp2m_list_unlock(d);
2529 }
2530
2531 /*** Audit ***/
2532
2533 #if P2M_AUDIT
audit_p2m(struct domain * d,uint64_t * orphans,uint64_t * m2p_bad,uint64_t * p2m_bad)2534 void audit_p2m(struct domain *d,
2535 uint64_t *orphans,
2536 uint64_t *m2p_bad,
2537 uint64_t *p2m_bad)
2538 {
2539 struct page_info *page;
2540 struct domain *od;
2541 unsigned long mfn, gfn;
2542 mfn_t p2mfn;
2543 unsigned long orphans_count = 0, mpbad = 0, pmbad = 0;
2544 p2m_access_t p2ma;
2545 p2m_type_t type;
2546 struct p2m_domain *p2m = p2m_get_hostp2m(d);
2547
2548 if ( !paging_mode_translate(d) )
2549 goto out_p2m_audit;
2550
2551 P2M_PRINTK("p2m audit starts\n");
2552
2553 p2m_lock(p2m);
2554 pod_lock(p2m);
2555
2556 if (p2m->audit_p2m)
2557 pmbad = p2m->audit_p2m(p2m);
2558
2559 /* Audit part two: walk the domain's page allocation list, checking
2560 * the m2p entries. */
2561 spin_lock(&d->page_alloc_lock);
2562 page_list_for_each ( page, &d->page_list )
2563 {
2564 mfn = mfn_x(page_to_mfn(page));
2565
2566 P2M_PRINTK("auditing guest page, mfn=%#lx\n", mfn);
2567
2568 od = page_get_owner(page);
2569
2570 if ( od != d )
2571 {
2572 P2M_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n",
2573 mfn, od, (od?od->domain_id:-1), d, d->domain_id);
2574 continue;
2575 }
2576
2577 gfn = get_gpfn_from_mfn(mfn);
2578 if ( gfn == INVALID_M2P_ENTRY )
2579 {
2580 orphans_count++;
2581 P2M_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n",
2582 mfn);
2583 continue;
2584 }
2585
2586 if ( gfn == SHARED_M2P_ENTRY )
2587 {
2588 P2M_PRINTK("shared mfn (%lx) on domain page list!\n",
2589 mfn);
2590 continue;
2591 }
2592
2593 p2mfn = get_gfn_type_access(p2m, gfn, &type, &p2ma, 0, NULL);
2594 if ( mfn_x(p2mfn) != mfn )
2595 {
2596 mpbad++;
2597 P2M_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx"
2598 " (-> gfn %#lx)\n",
2599 mfn, gfn, mfn_x(p2mfn),
2600 (mfn_valid(p2mfn)
2601 ? get_gpfn_from_mfn(mfn_x(p2mfn))
2602 : -1u));
2603 /* This m2p entry is stale: the domain has another frame in
2604 * this physical slot. No great disaster, but for neatness,
2605 * blow away the m2p entry. */
2606 set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
2607 }
2608 __put_gfn(p2m, gfn);
2609
2610 P2M_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx\n",
2611 mfn, gfn, mfn_x(p2mfn));
2612 }
2613 spin_unlock(&d->page_alloc_lock);
2614
2615 pod_unlock(p2m);
2616 p2m_unlock(p2m);
2617
2618 P2M_PRINTK("p2m audit complete\n");
2619 if ( orphans_count | mpbad | pmbad )
2620 P2M_PRINTK("p2m audit found %lu orphans\n", orphans_count);
2621 if ( mpbad | pmbad )
2622 {
2623 P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
2624 pmbad, mpbad);
2625 WARN();
2626 }
2627
2628 out_p2m_audit:
2629 *orphans = (uint64_t) orphans_count;
2630 *m2p_bad = (uint64_t) mpbad;
2631 *p2m_bad = (uint64_t) pmbad;
2632 }
2633 #endif /* P2M_AUDIT */
2634
2635 /*
2636 * Add frame from foreign domain to target domain's physmap. Similar to
2637 * XENMAPSPACE_gmfn but the frame is foreign being mapped into current,
2638 * and is not removed from foreign domain.
2639 *
2640 * Usage: - libxl on pvh dom0 creating a guest and doing privcmd_ioctl_mmap.
2641 * - xentrace running on dom0 mapping xenheap pages. foreigndom would
2642 * be DOMID_XEN in such a case.
2643 * etc..
2644 *
2645 * Side Effect: the mfn for fgfn will be refcounted in lower level routines
2646 * so it is not lost while mapped here. The refcnt is released
2647 * via the XENMEM_remove_from_physmap path.
2648 *
2649 * Returns: 0 ==> success
2650 */
p2m_add_foreign(struct domain * tdom,unsigned long fgfn,unsigned long gpfn,domid_t foreigndom)2651 int p2m_add_foreign(struct domain *tdom, unsigned long fgfn,
2652 unsigned long gpfn, domid_t foreigndom)
2653 {
2654 p2m_type_t p2mt, p2mt_prev;
2655 mfn_t prev_mfn, mfn;
2656 struct page_info *page;
2657 int rc;
2658 struct domain *fdom;
2659
2660 ASSERT(tdom);
2661 if ( foreigndom == DOMID_SELF )
2662 return -EINVAL;
2663 /*
2664 * hvm fixme: until support is added to p2m teardown code to cleanup any
2665 * foreign entries, limit this to hardware domain only.
2666 */
2667 if ( !is_hardware_domain(tdom) )
2668 return -EPERM;
2669
2670 if ( foreigndom == DOMID_XEN )
2671 fdom = rcu_lock_domain(dom_xen);
2672 else
2673 fdom = rcu_lock_domain_by_id(foreigndom);
2674 if ( fdom == NULL )
2675 return -ESRCH;
2676
2677 rc = -EINVAL;
2678 if ( tdom == fdom )
2679 goto out;
2680
2681 rc = xsm_map_gmfn_foreign(XSM_TARGET, tdom, fdom);
2682 if ( rc )
2683 goto out;
2684
2685 /*
2686 * Take a refcnt on the mfn. NB: following supported for foreign mapping:
2687 * ram_rw | ram_logdirty | ram_ro | paging_out.
2688 */
2689 page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
2690 if ( !page ||
2691 !p2m_is_ram(p2mt) || p2m_is_shared(p2mt) || p2m_is_hole(p2mt) )
2692 {
2693 if ( page )
2694 put_page(page);
2695 rc = -EINVAL;
2696 goto out;
2697 }
2698 mfn = page_to_mfn(page);
2699
2700 /* Remove previously mapped page if it is present. */
2701 prev_mfn = get_gfn(tdom, gpfn, &p2mt_prev);
2702 if ( mfn_valid(prev_mfn) )
2703 {
2704 if ( is_xen_heap_mfn(mfn_x(prev_mfn)) )
2705 /* Xen heap frames are simply unhooked from this phys slot */
2706 rc = guest_physmap_remove_page(tdom, _gfn(gpfn), prev_mfn, 0);
2707 else
2708 /* Normal domain memory is freed, to avoid leaking memory. */
2709 rc = guest_remove_page(tdom, gpfn);
2710 if ( rc )
2711 goto put_both;
2712 }
2713 /*
2714 * Create the new mapping. Can't use guest_physmap_add_page() because it
2715 * will update the m2p table which will result in mfn -> gpfn of dom0
2716 * and not fgfn of domU.
2717 */
2718 rc = set_foreign_p2m_entry(tdom, gpfn, mfn);
2719 if ( rc )
2720 gdprintk(XENLOG_WARNING, "set_foreign_p2m_entry failed. "
2721 "gpfn:%lx mfn:%lx fgfn:%lx td:%d fd:%d\n",
2722 gpfn, mfn_x(mfn), fgfn, tdom->domain_id, fdom->domain_id);
2723
2724 put_both:
2725 put_page(page);
2726
2727 /*
2728 * This put_gfn for the above get_gfn for prev_mfn. We must do this
2729 * after set_foreign_p2m_entry so another cpu doesn't populate the gpfn
2730 * before us.
2731 */
2732 put_gfn(tdom, gpfn);
2733
2734 out:
2735 if ( fdom )
2736 rcu_unlock_domain(fdom);
2737 return rc;
2738 }
2739 /*
2740 * Local variables:
2741 * mode: C
2742 * c-file-style: "BSD"
2743 * c-basic-offset: 4
2744 * indent-tabs-mode: nil
2745 * End:
2746 */
2747