1 /******************************************************************************
2  * arch/x86/mm/p2m.c
3  *
4  * physical-to-machine mappings for automatically-translated domains.
5  *
6  * Parts of this code are Copyright (c) 2009 by Citrix Systems, Inc. (Patrick Colp)
7  * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
8  * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
9  * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
10  * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; If not, see <http://www.gnu.org/licenses/>.
24  */
25 
26 #include <xen/guest_access.h> /* copy_from_guest() */
27 #include <xen/iommu.h>
28 #include <xen/vm_event.h>
29 #include <xen/event.h>
30 #include <public/vm_event.h>
31 #include <asm/domain.h>
32 #include <asm/page.h>
33 #include <asm/paging.h>
34 #include <asm/p2m.h>
35 #include <asm/hvm/vmx/vmx.h> /* ept_p2m_init() */
36 #include <asm/mem_sharing.h>
37 #include <asm/hvm/nestedhvm.h>
38 #include <asm/altp2m.h>
39 #include <asm/hvm/svm/amd-iommu-proto.h>
40 #include <asm/vm_event.h>
41 #include <xsm/xsm.h>
42 
43 #include "mm-locks.h"
44 
45 /* Turn on/off host superpage page table support for hap, default on. */
46 bool_t __initdata opt_hap_1gb = 1, __initdata opt_hap_2mb = 1;
47 boolean_param("hap_1gb", opt_hap_1gb);
48 boolean_param("hap_2mb", opt_hap_2mb);
49 
50 /* Override macros from asm/page.h to make them work with mfn_t */
51 #undef mfn_to_page
52 #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
53 #undef page_to_mfn
54 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
55 
56 DEFINE_PERCPU_RWLOCK_GLOBAL(p2m_percpu_rwlock);
57 
58 /* Init the datastructures for later use by the p2m code */
p2m_initialise(struct domain * d,struct p2m_domain * p2m)59 static int p2m_initialise(struct domain *d, struct p2m_domain *p2m)
60 {
61     unsigned int i;
62     int ret = 0;
63 
64     mm_rwlock_init(&p2m->lock);
65     mm_lock_init(&p2m->pod.lock);
66     INIT_LIST_HEAD(&p2m->np2m_list);
67     INIT_PAGE_LIST_HEAD(&p2m->pages);
68     INIT_PAGE_LIST_HEAD(&p2m->pod.super);
69     INIT_PAGE_LIST_HEAD(&p2m->pod.single);
70 
71     p2m->domain = d;
72     p2m->default_access = p2m_access_rwx;
73     p2m->p2m_class = p2m_host;
74 
75     p2m->np2m_base = P2M_BASE_EADDR;
76     p2m->np2m_generation = 0;
77 
78     for ( i = 0; i < ARRAY_SIZE(p2m->pod.mrp.list); ++i )
79         p2m->pod.mrp.list[i] = gfn_x(INVALID_GFN);
80 
81     if ( hap_enabled(d) && cpu_has_vmx )
82         ret = ept_p2m_init(p2m);
83     else
84         p2m_pt_init(p2m);
85 
86     spin_lock_init(&p2m->ioreq.lock);
87 
88     return ret;
89 }
90 
p2m_init_one(struct domain * d)91 static struct p2m_domain *p2m_init_one(struct domain *d)
92 {
93     struct p2m_domain *p2m = xzalloc(struct p2m_domain);
94 
95     if ( !p2m )
96         return NULL;
97 
98     if ( !zalloc_cpumask_var(&p2m->dirty_cpumask) )
99         goto free_p2m;
100 
101     if ( p2m_initialise(d, p2m) )
102         goto free_cpumask;
103     return p2m;
104 
105 free_cpumask:
106     free_cpumask_var(p2m->dirty_cpumask);
107 free_p2m:
108     xfree(p2m);
109     return NULL;
110 }
111 
p2m_free_one(struct p2m_domain * p2m)112 static void p2m_free_one(struct p2m_domain *p2m)
113 {
114     if ( hap_enabled(p2m->domain) && cpu_has_vmx )
115         ept_p2m_uninit(p2m);
116     free_cpumask_var(p2m->dirty_cpumask);
117     xfree(p2m);
118 }
119 
p2m_init_hostp2m(struct domain * d)120 static int p2m_init_hostp2m(struct domain *d)
121 {
122     struct p2m_domain *p2m = p2m_init_one(d);
123 
124     if ( p2m )
125     {
126         p2m->logdirty_ranges = rangeset_new(d, "log-dirty",
127                                             RANGESETF_prettyprint_hex);
128         if ( p2m->logdirty_ranges )
129         {
130             d->arch.p2m = p2m;
131             return 0;
132         }
133         p2m_free_one(p2m);
134     }
135     return -ENOMEM;
136 }
137 
p2m_teardown_hostp2m(struct domain * d)138 static void p2m_teardown_hostp2m(struct domain *d)
139 {
140     /* Iterate over all p2m tables per domain */
141     struct p2m_domain *p2m = p2m_get_hostp2m(d);
142 
143     if ( p2m )
144     {
145         rangeset_destroy(p2m->logdirty_ranges);
146         p2m_free_one(p2m);
147         d->arch.p2m = NULL;
148     }
149 }
150 
p2m_teardown_nestedp2m(struct domain * d)151 static void p2m_teardown_nestedp2m(struct domain *d)
152 {
153     unsigned int i;
154     struct p2m_domain *p2m;
155 
156     for ( i = 0; i < MAX_NESTEDP2M; i++ )
157     {
158         if ( !d->arch.nested_p2m[i] )
159             continue;
160         p2m = d->arch.nested_p2m[i];
161         list_del(&p2m->np2m_list);
162         p2m_free_one(p2m);
163         d->arch.nested_p2m[i] = NULL;
164     }
165 }
166 
p2m_init_nestedp2m(struct domain * d)167 static int p2m_init_nestedp2m(struct domain *d)
168 {
169     unsigned int i;
170     struct p2m_domain *p2m;
171 
172     mm_lock_init(&d->arch.nested_p2m_lock);
173     for ( i = 0; i < MAX_NESTEDP2M; i++ )
174     {
175         d->arch.nested_p2m[i] = p2m = p2m_init_one(d);
176         if ( p2m == NULL )
177         {
178             p2m_teardown_nestedp2m(d);
179             return -ENOMEM;
180         }
181         p2m->p2m_class = p2m_nested;
182         p2m->write_p2m_entry = nestedp2m_write_p2m_entry;
183         list_add(&p2m->np2m_list, &p2m_get_hostp2m(d)->np2m_list);
184     }
185 
186     return 0;
187 }
188 
p2m_teardown_altp2m(struct domain * d)189 static void p2m_teardown_altp2m(struct domain *d)
190 {
191     unsigned int i;
192     struct p2m_domain *p2m;
193 
194     for ( i = 0; i < MAX_ALTP2M; i++ )
195     {
196         if ( !d->arch.altp2m_p2m[i] )
197             continue;
198         p2m = d->arch.altp2m_p2m[i];
199         d->arch.altp2m_p2m[i] = NULL;
200         p2m_free_one(p2m);
201     }
202 }
203 
p2m_init_altp2m(struct domain * d)204 static int p2m_init_altp2m(struct domain *d)
205 {
206     unsigned int i;
207     struct p2m_domain *p2m;
208 
209     mm_lock_init(&d->arch.altp2m_list_lock);
210     for ( i = 0; i < MAX_ALTP2M; i++ )
211     {
212         d->arch.altp2m_p2m[i] = p2m = p2m_init_one(d);
213         if ( p2m == NULL )
214         {
215             p2m_teardown_altp2m(d);
216             return -ENOMEM;
217         }
218         p2m->p2m_class = p2m_alternate;
219         p2m->access_required = 1;
220         _atomic_set(&p2m->active_vcpus, 0);
221     }
222 
223     return 0;
224 }
225 
p2m_init(struct domain * d)226 int p2m_init(struct domain *d)
227 {
228     int rc;
229 
230     rc = p2m_init_hostp2m(d);
231     if ( rc )
232         return rc;
233 
234     /* Must initialise nestedp2m unconditionally
235      * since nestedhvm_enabled(d) returns false here.
236      * (p2m_init runs too early for HVM_PARAM_* options) */
237     rc = p2m_init_nestedp2m(d);
238     if ( rc )
239     {
240         p2m_teardown_hostp2m(d);
241         return rc;
242     }
243 
244     rc = p2m_init_altp2m(d);
245     if ( rc )
246     {
247         p2m_teardown_hostp2m(d);
248         p2m_teardown_nestedp2m(d);
249     }
250 
251     return rc;
252 }
253 
p2m_is_logdirty_range(struct p2m_domain * p2m,unsigned long start,unsigned long end)254 int p2m_is_logdirty_range(struct p2m_domain *p2m, unsigned long start,
255                           unsigned long end)
256 {
257     ASSERT(p2m_is_hostp2m(p2m));
258     if ( p2m->global_logdirty ||
259          rangeset_contains_range(p2m->logdirty_ranges, start, end) )
260         return 1;
261     if ( rangeset_overlaps_range(p2m->logdirty_ranges, start, end) )
262         return -1;
263     return 0;
264 }
265 
p2m_change_entry_type_global(struct domain * d,p2m_type_t ot,p2m_type_t nt)266 void p2m_change_entry_type_global(struct domain *d,
267                                   p2m_type_t ot, p2m_type_t nt)
268 {
269     struct p2m_domain *p2m = p2m_get_hostp2m(d);
270 
271     ASSERT(ot != nt);
272     ASSERT(p2m_is_changeable(ot) && p2m_is_changeable(nt));
273 
274     p2m_lock(p2m);
275     p2m->change_entry_type_global(p2m, ot, nt);
276     p2m->global_logdirty = (nt == p2m_ram_logdirty);
277     p2m_unlock(p2m);
278 }
279 
p2m_memory_type_changed(struct domain * d)280 void p2m_memory_type_changed(struct domain *d)
281 {
282     struct p2m_domain *p2m = p2m_get_hostp2m(d);
283 
284     if ( p2m->memory_type_changed )
285     {
286         p2m_lock(p2m);
287         p2m->memory_type_changed(p2m);
288         p2m_unlock(p2m);
289     }
290 }
291 
p2m_set_ioreq_server(struct domain * d,unsigned int flags,struct hvm_ioreq_server * s)292 int p2m_set_ioreq_server(struct domain *d,
293                          unsigned int flags,
294                          struct hvm_ioreq_server *s)
295 {
296     struct p2m_domain *p2m = p2m_get_hostp2m(d);
297     int rc;
298 
299     /*
300      * Use lock to prevent concurrent setting attempts
301      * from multiple ioreq servers.
302      */
303     spin_lock(&p2m->ioreq.lock);
304 
305     /* Unmap ioreq server from p2m type by passing flags with 0. */
306     if ( flags == 0 )
307     {
308         rc = -EINVAL;
309         if ( p2m->ioreq.server != s )
310             goto out;
311 
312         p2m->ioreq.server = NULL;
313         p2m->ioreq.flags = 0;
314     }
315     else
316     {
317         rc = -EBUSY;
318         if ( p2m->ioreq.server != NULL )
319             goto out;
320 
321         /*
322          * It is possible that an ioreq server has just been unmapped,
323          * released the spin lock, with some p2m_ioreq_server entries
324          * in p2m table remained. We shall refuse another ioreq server
325          * mapping request in such case.
326          */
327         if ( read_atomic(&p2m->ioreq.entry_count) )
328             goto out;
329 
330         p2m->ioreq.server = s;
331         p2m->ioreq.flags = flags;
332     }
333 
334     rc = 0;
335 
336  out:
337     spin_unlock(&p2m->ioreq.lock);
338 
339     return rc;
340 }
341 
p2m_get_ioreq_server(struct domain * d,unsigned int * flags)342 struct hvm_ioreq_server *p2m_get_ioreq_server(struct domain *d,
343                                               unsigned int *flags)
344 {
345     struct p2m_domain *p2m = p2m_get_hostp2m(d);
346     struct hvm_ioreq_server *s;
347 
348     spin_lock(&p2m->ioreq.lock);
349 
350     s = p2m->ioreq.server;
351     *flags = p2m->ioreq.flags;
352 
353     spin_unlock(&p2m->ioreq.lock);
354     return s;
355 }
356 
p2m_enable_hardware_log_dirty(struct domain * d)357 void p2m_enable_hardware_log_dirty(struct domain *d)
358 {
359     struct p2m_domain *p2m = p2m_get_hostp2m(d);
360 
361     if ( p2m->enable_hardware_log_dirty )
362     {
363         p2m_lock(p2m);
364         p2m->enable_hardware_log_dirty(p2m);
365         p2m_unlock(p2m);
366     }
367 }
368 
p2m_disable_hardware_log_dirty(struct domain * d)369 void p2m_disable_hardware_log_dirty(struct domain *d)
370 {
371     struct p2m_domain *p2m = p2m_get_hostp2m(d);
372 
373     if ( p2m->disable_hardware_log_dirty )
374     {
375         p2m_lock(p2m);
376         p2m->disable_hardware_log_dirty(p2m);
377         p2m_unlock(p2m);
378     }
379 }
380 
p2m_flush_hardware_cached_dirty(struct domain * d)381 void p2m_flush_hardware_cached_dirty(struct domain *d)
382 {
383     struct p2m_domain *p2m = p2m_get_hostp2m(d);
384 
385     if ( p2m->flush_hardware_cached_dirty )
386     {
387         p2m_lock(p2m);
388         p2m->flush_hardware_cached_dirty(p2m);
389         p2m_unlock(p2m);
390     }
391 }
392 
393 /*
394  * Force a synchronous P2M TLB flush if a deferred flush is pending.
395  *
396  * Must be called with the p2m lock held.
397  */
p2m_tlb_flush_sync(struct p2m_domain * p2m)398 void p2m_tlb_flush_sync(struct p2m_domain *p2m)
399 {
400     if ( p2m->need_flush ) {
401         p2m->need_flush = 0;
402         p2m->tlb_flush(p2m);
403     }
404 }
405 
406 /*
407  * Unlock the p2m lock and do a P2M TLB flush if needed.
408  */
p2m_unlock_and_tlb_flush(struct p2m_domain * p2m)409 void p2m_unlock_and_tlb_flush(struct p2m_domain *p2m)
410 {
411     if ( p2m->need_flush ) {
412         p2m->need_flush = 0;
413         mm_write_unlock(&p2m->lock);
414         p2m->tlb_flush(p2m);
415     } else
416         mm_write_unlock(&p2m->lock);
417 }
418 
__get_gfn_type_access(struct p2m_domain * p2m,unsigned long gfn_l,p2m_type_t * t,p2m_access_t * a,p2m_query_t q,unsigned int * page_order,bool_t locked)419 mfn_t __get_gfn_type_access(struct p2m_domain *p2m, unsigned long gfn_l,
420                     p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
421                     unsigned int *page_order, bool_t locked)
422 {
423     mfn_t mfn;
424     gfn_t gfn = _gfn(gfn_l);
425 
426     /* Unshare makes no sense withuot populate. */
427     if ( q & P2M_UNSHARE )
428         q |= P2M_ALLOC;
429 
430     if ( !p2m || !paging_mode_translate(p2m->domain) )
431     {
432         /* Not necessarily true, but for non-translated guests, we claim
433          * it's the most generic kind of memory */
434         *t = p2m_ram_rw;
435         return _mfn(gfn_l);
436     }
437 
438     if ( locked )
439         /* Grab the lock here, don't release until put_gfn */
440         gfn_lock(p2m, gfn, 0);
441 
442     mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
443 
444     if ( (q & P2M_UNSHARE) && p2m_is_shared(*t) )
445     {
446         ASSERT(p2m_is_hostp2m(p2m));
447         /* Try to unshare. If we fail, communicate ENOMEM without
448          * sleeping. */
449         if ( mem_sharing_unshare_page(p2m->domain, gfn_l, 0) < 0 )
450             (void)mem_sharing_notify_enomem(p2m->domain, gfn_l, 0);
451         mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL);
452     }
453 
454     if (unlikely((p2m_is_broken(*t))))
455     {
456         /* Return invalid_mfn to avoid caller's access */
457         mfn = INVALID_MFN;
458         if ( q & P2M_ALLOC )
459             domain_crash(p2m->domain);
460     }
461 
462     return mfn;
463 }
464 
__put_gfn(struct p2m_domain * p2m,unsigned long gfn)465 void __put_gfn(struct p2m_domain *p2m, unsigned long gfn)
466 {
467     if ( !p2m || !paging_mode_translate(p2m->domain) )
468         /* Nothing to do in this case */
469         return;
470 
471     ASSERT(gfn_locked_by_me(p2m, gfn));
472 
473     gfn_unlock(p2m, gfn, 0);
474 }
475 
476 /* Atomically look up a GFN and take a reference count on the backing page. */
p2m_get_page_from_gfn(struct p2m_domain * p2m,gfn_t gfn,p2m_type_t * t,p2m_access_t * a,p2m_query_t q)477 struct page_info *p2m_get_page_from_gfn(
478     struct p2m_domain *p2m, gfn_t gfn,
479     p2m_type_t *t, p2m_access_t *a, p2m_query_t q)
480 {
481     struct page_info *page = NULL;
482     p2m_access_t _a;
483     p2m_type_t _t;
484     mfn_t mfn;
485 
486     /* Allow t or a to be NULL */
487     t = t ?: &_t;
488     a = a ?: &_a;
489 
490     if ( likely(!p2m_locked_by_me(p2m)) )
491     {
492         /* Fast path: look up and get out */
493         p2m_read_lock(p2m);
494         mfn = __get_gfn_type_access(p2m, gfn_x(gfn), t, a, 0, NULL, 0);
495         if ( p2m_is_any_ram(*t) && mfn_valid(mfn)
496              && !((q & P2M_UNSHARE) && p2m_is_shared(*t)) )
497         {
498             page = mfn_to_page(mfn);
499             if ( unlikely(p2m_is_foreign(*t)) )
500             {
501                 struct domain *fdom = page_get_owner_and_reference(page);
502 
503                 ASSERT(fdom != p2m->domain);
504                 if ( fdom == NULL )
505                     page = NULL;
506             }
507             else if ( !get_page(page, p2m->domain) &&
508                       /* Page could be shared */
509                       (!p2m_is_shared(*t) || !get_page(page, dom_cow)) )
510                 page = NULL;
511         }
512         p2m_read_unlock(p2m);
513 
514         if ( page )
515             return page;
516 
517         /* Error path: not a suitable GFN at all */
518         if ( !p2m_is_ram(*t) && !p2m_is_paging(*t) && !p2m_is_pod(*t) )
519             return NULL;
520     }
521 
522     /* Slow path: take the write lock and do fixups */
523     mfn = get_gfn_type_access(p2m, gfn_x(gfn), t, a, q, NULL);
524     if ( p2m_is_ram(*t) && mfn_valid(mfn) )
525     {
526         page = mfn_to_page(mfn);
527         if ( !get_page(page, p2m->domain) )
528             page = NULL;
529     }
530     put_gfn(p2m->domain, gfn_x(gfn));
531 
532     return page;
533 }
534 
535 /* Returns: 0 for success, -errno for failure */
p2m_set_entry(struct p2m_domain * p2m,gfn_t gfn,mfn_t mfn,unsigned int page_order,p2m_type_t p2mt,p2m_access_t p2ma)536 int p2m_set_entry(struct p2m_domain *p2m, gfn_t gfn, mfn_t mfn,
537                   unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma)
538 {
539     struct domain *d = p2m->domain;
540     unsigned long todo = 1ul << page_order;
541     unsigned int order;
542     int set_rc, rc = 0;
543 
544     ASSERT(gfn_locked_by_me(p2m, gfn));
545 
546     while ( todo )
547     {
548         if ( hap_enabled(d) )
549         {
550             unsigned long fn_mask = !mfn_eq(mfn, INVALID_MFN) ? mfn_x(mfn) : 0;
551 
552             fn_mask |= gfn_x(gfn) | todo;
553 
554             order = (!(fn_mask & ((1ul << PAGE_ORDER_1G) - 1)) &&
555                      hap_has_1gb) ? PAGE_ORDER_1G :
556                     (!(fn_mask & ((1ul << PAGE_ORDER_2M) - 1)) &&
557                      hap_has_2mb) ? PAGE_ORDER_2M : PAGE_ORDER_4K;
558         }
559         else
560             order = 0;
561 
562         set_rc = p2m->set_entry(p2m, gfn, mfn, order, p2mt, p2ma, -1);
563         if ( set_rc )
564             rc = set_rc;
565 
566         gfn = gfn_add(gfn, 1ul << order);
567         if ( !mfn_eq(mfn, INVALID_MFN) )
568             mfn = mfn_add(mfn, 1ul << order);
569         todo -= 1ul << order;
570     }
571 
572     return rc;
573 }
574 
p2m_alloc_ptp(struct p2m_domain * p2m,unsigned int level)575 mfn_t p2m_alloc_ptp(struct p2m_domain *p2m, unsigned int level)
576 {
577     struct page_info *pg;
578 
579     ASSERT(p2m);
580     ASSERT(p2m->domain);
581     ASSERT(p2m->domain->arch.paging.alloc_page);
582     pg = p2m->domain->arch.paging.alloc_page(p2m->domain);
583     if ( !pg )
584         return INVALID_MFN;
585 
586     page_list_add_tail(pg, &p2m->pages);
587     BUILD_BUG_ON(PGT_l1_page_table * 2 != PGT_l2_page_table);
588     BUILD_BUG_ON(PGT_l1_page_table * 3 != PGT_l3_page_table);
589     BUILD_BUG_ON(PGT_l1_page_table * 4 != PGT_l4_page_table);
590     pg->u.inuse.type_info = (PGT_l1_page_table * level) | 1 | PGT_validated;
591 
592     return page_to_mfn(pg);
593 }
594 
p2m_free_ptp(struct p2m_domain * p2m,struct page_info * pg)595 void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg)
596 {
597     ASSERT(pg);
598     ASSERT(p2m);
599     ASSERT(p2m->domain);
600     ASSERT(p2m->domain->arch.paging.free_page);
601 
602     page_list_del(pg, &p2m->pages);
603     p2m->domain->arch.paging.free_page(p2m->domain, pg);
604 
605     return;
606 }
607 
608 /*
609  * Allocate a new p2m table for a domain.
610  *
611  * The structure of the p2m table is that of a pagetable for xen (i.e. it is
612  * controlled by CONFIG_PAGING_LEVELS).
613  *
614  * Returns 0 for success, -errno for failure.
615  */
p2m_alloc_table(struct p2m_domain * p2m)616 int p2m_alloc_table(struct p2m_domain *p2m)
617 {
618     mfn_t top_mfn;
619     struct domain *d = p2m->domain;
620     int rc = 0;
621 
622     p2m_lock(p2m);
623 
624     if ( p2m_is_hostp2m(p2m)
625          && !page_list_empty(&d->page_list) )
626     {
627         P2M_ERROR("dom %d already has memory allocated\n", d->domain_id);
628         p2m_unlock(p2m);
629         return -EINVAL;
630     }
631 
632     if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 )
633     {
634         P2M_ERROR("p2m already allocated for this domain\n");
635         p2m_unlock(p2m);
636         return -EINVAL;
637     }
638 
639     P2M_PRINTK("allocating p2m table\n");
640 
641     top_mfn = p2m_alloc_ptp(p2m, 4);
642     if ( mfn_eq(top_mfn, INVALID_MFN) )
643     {
644         p2m_unlock(p2m);
645         return -ENOMEM;
646     }
647 
648     p2m->phys_table = pagetable_from_mfn(top_mfn);
649 
650     if ( hap_enabled(d) )
651         iommu_share_p2m_table(d);
652 
653     P2M_PRINTK("populating p2m table\n");
654 
655     /* Initialise physmap tables for slot zero. Other code assumes this. */
656     p2m->defer_nested_flush = 1;
657     rc = p2m_set_entry(p2m, _gfn(0), INVALID_MFN, PAGE_ORDER_4K,
658                        p2m_invalid, p2m->default_access);
659     p2m->defer_nested_flush = 0;
660     p2m_unlock(p2m);
661     if ( !rc )
662         P2M_PRINTK("p2m table initialised for slot zero\n");
663     else
664         P2M_PRINTK("failed to initialise p2m table for slot zero (%d)\n", rc);
665     return rc;
666 }
667 
668 /*
669  * hvm fixme: when adding support for pvh non-hardware domains, this path must
670  * cleanup any foreign p2m types (release refcnts on them).
671  */
p2m_teardown(struct p2m_domain * p2m)672 void p2m_teardown(struct p2m_domain *p2m)
673 /* Return all the p2m pages to Xen.
674  * We know we don't have any extra mappings to these pages */
675 {
676     struct page_info *pg;
677     struct domain *d;
678 
679     if (p2m == NULL)
680         return;
681 
682     d = p2m->domain;
683 
684     p2m_lock(p2m);
685     ASSERT(atomic_read(&d->shr_pages) == 0);
686     p2m->phys_table = pagetable_null();
687 
688     while ( (pg = page_list_remove_head(&p2m->pages)) )
689         d->arch.paging.free_page(d, pg);
690     p2m_unlock(p2m);
691 }
692 
p2m_final_teardown(struct domain * d)693 void p2m_final_teardown(struct domain *d)
694 {
695     /*
696      * We must teardown both of them unconditionally because
697      * we initialise them unconditionally.
698      */
699     p2m_teardown_altp2m(d);
700     p2m_teardown_nestedp2m(d);
701 
702     /* Iterate over all p2m tables per domain */
703     p2m_teardown_hostp2m(d);
704 }
705 
706 
707 static int
p2m_remove_page(struct p2m_domain * p2m,unsigned long gfn_l,unsigned long mfn,unsigned int page_order)708 p2m_remove_page(struct p2m_domain *p2m, unsigned long gfn_l, unsigned long mfn,
709                 unsigned int page_order)
710 {
711     unsigned long i;
712     gfn_t gfn = _gfn(gfn_l);
713     mfn_t mfn_return;
714     p2m_type_t t;
715     p2m_access_t a;
716 
717     if ( !paging_mode_translate(p2m->domain) )
718     {
719         int rc = 0;
720 
721         if ( need_iommu(p2m->domain) )
722         {
723             for ( i = 0; i < (1 << page_order); i++ )
724             {
725                 int ret = iommu_unmap_page(p2m->domain, mfn + i);
726 
727                 if ( !rc )
728                     rc = ret;
729             }
730         }
731 
732         return rc;
733     }
734 
735     ASSERT(gfn_locked_by_me(p2m, gfn));
736     P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn_l, mfn);
737 
738     if ( mfn_valid(_mfn(mfn)) )
739     {
740         for ( i = 0; i < (1UL << page_order); i++ )
741         {
742             mfn_return = p2m->get_entry(p2m, gfn_add(gfn, i), &t, &a, 0,
743                                         NULL, NULL);
744             if ( !p2m_is_grant(t) && !p2m_is_shared(t) && !p2m_is_foreign(t) )
745                 set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
746             ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) );
747         }
748     }
749     return p2m_set_entry(p2m, gfn, INVALID_MFN, page_order, p2m_invalid,
750                          p2m->default_access);
751 }
752 
753 int
guest_physmap_remove_page(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int page_order)754 guest_physmap_remove_page(struct domain *d, gfn_t gfn,
755                           mfn_t mfn, unsigned int page_order)
756 {
757     struct p2m_domain *p2m = p2m_get_hostp2m(d);
758     int rc;
759     gfn_lock(p2m, gfn, page_order);
760     rc = p2m_remove_page(p2m, gfn_x(gfn), mfn_x(mfn), page_order);
761     gfn_unlock(p2m, gfn, page_order);
762     return rc;
763 }
764 
765 int
guest_physmap_add_entry(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int page_order,p2m_type_t t)766 guest_physmap_add_entry(struct domain *d, gfn_t gfn, mfn_t mfn,
767                         unsigned int page_order, p2m_type_t t)
768 {
769     struct p2m_domain *p2m = p2m_get_hostp2m(d);
770     unsigned long i;
771     gfn_t ogfn;
772     p2m_type_t ot;
773     p2m_access_t a;
774     mfn_t omfn;
775     int pod_count = 0;
776     int rc = 0;
777 
778     if ( !paging_mode_translate(d) )
779     {
780         if ( need_iommu(d) && t == p2m_ram_rw )
781         {
782             for ( i = 0; i < (1 << page_order); i++ )
783             {
784                 rc = iommu_map_page(d, mfn_x(mfn_add(mfn, i)),
785                                     mfn_x(mfn_add(mfn, i)),
786                                     IOMMUF_readable|IOMMUF_writable);
787                 if ( rc != 0 )
788                 {
789                     while ( i-- > 0 )
790                         /* If statement to satisfy __must_check. */
791                         if ( iommu_unmap_page(d, mfn_x(mfn_add(mfn, i))) )
792                             continue;
793 
794                     return rc;
795                 }
796             }
797         }
798         return 0;
799     }
800 
801     /* foreign pages are added thru p2m_add_foreign */
802     if ( p2m_is_foreign(t) )
803         return -EINVAL;
804 
805     p2m_lock(p2m);
806 
807     P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn_x(gfn), mfn_x(mfn));
808 
809     /* First, remove m->p mappings for existing p->m mappings */
810     for ( i = 0; i < (1UL << page_order); i++ )
811     {
812         omfn = p2m->get_entry(p2m, gfn_add(gfn, i), &ot,
813                               &a, 0, NULL, NULL);
814         if ( p2m_is_shared(ot) )
815         {
816             /* Do an unshare to cleanly take care of all corner
817              * cases. */
818             int rc;
819             rc = mem_sharing_unshare_page(p2m->domain,
820                                           gfn_x(gfn_add(gfn, i)), 0);
821             if ( rc )
822             {
823                 p2m_unlock(p2m);
824                 /* NOTE: Should a guest domain bring this upon itself,
825                  * there is not a whole lot we can do. We are buried
826                  * deep in locks from most code paths by now. So, fail
827                  * the call and don't try to sleep on a wait queue
828                  * while placing the mem event.
829                  *
830                  * However, all current (changeset 3432abcf9380) code
831                  * paths avoid this unsavoury situation. For now.
832                  *
833                  * Foreign domains are okay to place an event as they
834                  * won't go to sleep. */
835                 (void)mem_sharing_notify_enomem(p2m->domain,
836                                                 gfn_x(gfn_add(gfn, i)),
837                                                 0);
838                 return rc;
839             }
840             omfn = p2m->get_entry(p2m, gfn_add(gfn, i),
841                                   &ot, &a, 0, NULL, NULL);
842             ASSERT(!p2m_is_shared(ot));
843         }
844         if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
845         {
846             /* Really shouldn't be unmapping grant/foreign maps this way */
847             domain_crash(d);
848             p2m_unlock(p2m);
849 
850             return -EINVAL;
851         }
852         else if ( p2m_is_ram(ot) && !p2m_is_paged(ot) )
853         {
854             ASSERT(mfn_valid(omfn));
855             set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
856         }
857         else if ( ot == p2m_populate_on_demand )
858         {
859             /* Count how man PoD entries we'll be replacing if successful */
860             pod_count++;
861         }
862         else if ( p2m_is_paging(ot) && (ot != p2m_ram_paging_out) )
863         {
864             /* We're plugging a hole in the physmap where a paged out page was */
865             atomic_dec(&d->paged_pages);
866         }
867     }
868 
869     /* Then, look for m->p mappings for this range and deal with them */
870     for ( i = 0; i < (1UL << page_order); i++ )
871     {
872         if ( page_get_owner(mfn_to_page(mfn_add(mfn, i))) == dom_cow )
873         {
874             /* This is no way to add a shared page to your physmap! */
875             gdprintk(XENLOG_ERR, "Adding shared mfn %lx directly to dom%d physmap not allowed.\n",
876                      mfn_x(mfn_add(mfn, i)), d->domain_id);
877             p2m_unlock(p2m);
878             return -EINVAL;
879         }
880         if ( page_get_owner(mfn_to_page(mfn_add(mfn, i))) != d )
881             continue;
882         ogfn = _gfn(mfn_to_gfn(d, mfn_add(mfn, i)));
883         if ( !gfn_eq(ogfn, _gfn(INVALID_M2P_ENTRY)) &&
884              !gfn_eq(ogfn, gfn_add(gfn, i)) )
885         {
886             /* This machine frame is already mapped at another physical
887              * address */
888             P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
889                       mfn_x(mfn_add(mfn, i)), gfn_x(ogfn),
890                       gfn_x(gfn_add(gfn, i)));
891             omfn = p2m->get_entry(p2m, ogfn, &ot, &a, 0, NULL, NULL);
892             if ( p2m_is_ram(ot) && !p2m_is_paged(ot) )
893             {
894                 ASSERT(mfn_valid(omfn));
895                 P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
896                           gfn_x(ogfn) , mfn_x(omfn));
897                 if ( mfn_eq(omfn, mfn_add(mfn, i)) )
898                     p2m_remove_page(p2m, gfn_x(ogfn), mfn_x(mfn_add(mfn, i)),
899                                     0);
900             }
901         }
902     }
903 
904     /* Now, actually do the two-way mapping */
905     if ( mfn_valid(mfn) )
906     {
907         rc = p2m_set_entry(p2m, gfn, mfn, page_order, t,
908                            p2m->default_access);
909         if ( rc )
910             goto out; /* Failed to update p2m, bail without updating m2p. */
911 
912         if ( !p2m_is_grant(t) )
913         {
914             for ( i = 0; i < (1UL << page_order); i++ )
915                 set_gpfn_from_mfn(mfn_x(mfn_add(mfn, i)),
916                                   gfn_x(gfn_add(gfn, i)));
917         }
918     }
919     else
920     {
921         gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
922                  gfn_x(gfn), mfn_x(mfn));
923         rc = p2m_set_entry(p2m, gfn, INVALID_MFN, page_order,
924                            p2m_invalid, p2m->default_access);
925         if ( rc == 0 )
926         {
927             pod_lock(p2m);
928             p2m->pod.entry_count -= pod_count;
929             BUG_ON(p2m->pod.entry_count < 0);
930             pod_unlock(p2m);
931         }
932     }
933 
934 out:
935     p2m_unlock(p2m);
936 
937     return rc;
938 }
939 
940 
941 /*
942  * Modify the p2m type of a single gfn from ot to nt.
943  * Returns: 0 for success, -errno for failure.
944  * Resets the access permissions.
945  */
p2m_change_type_one(struct domain * d,unsigned long gfn_l,p2m_type_t ot,p2m_type_t nt)946 int p2m_change_type_one(struct domain *d, unsigned long gfn_l,
947                        p2m_type_t ot, p2m_type_t nt)
948 {
949     p2m_access_t a;
950     p2m_type_t pt;
951     gfn_t gfn = _gfn(gfn_l);
952     mfn_t mfn;
953     struct p2m_domain *p2m = p2m_get_hostp2m(d);
954     int rc;
955 
956     BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt));
957     BUG_ON(p2m_is_foreign(ot) || p2m_is_foreign(nt));
958 
959     gfn_lock(p2m, gfn, 0);
960 
961     mfn = p2m->get_entry(p2m, gfn, &pt, &a, 0, NULL, NULL);
962     rc = likely(pt == ot)
963          ? p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, nt,
964                          p2m->default_access)
965          : -EBUSY;
966 
967     gfn_unlock(p2m, gfn, 0);
968 
969     return rc;
970 }
971 
972 /* Modify the p2m type of a range of gfns from ot to nt. */
p2m_change_type_range(struct domain * d,unsigned long start,unsigned long end,p2m_type_t ot,p2m_type_t nt)973 void p2m_change_type_range(struct domain *d,
974                            unsigned long start, unsigned long end,
975                            p2m_type_t ot, p2m_type_t nt)
976 {
977     unsigned long gfn = start;
978     struct p2m_domain *p2m = p2m_get_hostp2m(d);
979     int rc = 0;
980 
981     ASSERT(ot != nt);
982     ASSERT(p2m_is_changeable(ot) && p2m_is_changeable(nt));
983 
984     p2m_lock(p2m);
985     p2m->defer_nested_flush = 1;
986 
987     if ( unlikely(end > p2m->max_mapped_pfn) )
988     {
989         if ( !gfn )
990         {
991             p2m->change_entry_type_global(p2m, ot, nt);
992             gfn = end;
993         }
994         end = p2m->max_mapped_pfn + 1;
995     }
996     if ( gfn < end )
997         rc = p2m->change_entry_type_range(p2m, ot, nt, gfn, end - 1);
998     if ( rc )
999     {
1000         printk(XENLOG_G_ERR "Error %d changing Dom%d GFNs [%lx,%lx] from %d to %d\n",
1001                rc, d->domain_id, start, end - 1, ot, nt);
1002         domain_crash(d);
1003     }
1004 
1005     switch ( nt )
1006     {
1007     case p2m_ram_rw:
1008         if ( ot == p2m_ram_logdirty )
1009             rc = rangeset_remove_range(p2m->logdirty_ranges, start, end - 1);
1010         break;
1011     case p2m_ram_logdirty:
1012         if ( ot == p2m_ram_rw )
1013             rc = rangeset_add_range(p2m->logdirty_ranges, start, end - 1);
1014         break;
1015     default:
1016         break;
1017     }
1018     if ( rc )
1019     {
1020         printk(XENLOG_G_ERR "Error %d manipulating Dom%d's log-dirty ranges\n",
1021                rc, d->domain_id);
1022         domain_crash(d);
1023     }
1024 
1025     p2m->defer_nested_flush = 0;
1026     if ( nestedhvm_enabled(d) )
1027         p2m_flush_nestedp2m(d);
1028     p2m_unlock(p2m);
1029 }
1030 
1031 /*
1032  * Finish p2m type change for gfns which are marked as need_recalc in a range.
1033  * Returns: 0/1 for success, negative for failure
1034  */
p2m_finish_type_change(struct domain * d,gfn_t first_gfn,unsigned long max_nr)1035 int p2m_finish_type_change(struct domain *d,
1036                            gfn_t first_gfn, unsigned long max_nr)
1037 {
1038     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1039     unsigned long gfn = gfn_x(first_gfn);
1040     unsigned long last_gfn = gfn + max_nr - 1;
1041     int rc = 0;
1042 
1043     p2m_lock(p2m);
1044 
1045     last_gfn = min(last_gfn, p2m->max_mapped_pfn);
1046     while ( gfn <= last_gfn )
1047     {
1048         rc = p2m->recalc(p2m, gfn);
1049         /*
1050          * ept->recalc could return 0/1/-ENOMEM. pt->recalc could return
1051          * 0/-ENOMEM/-ENOENT, -ENOENT isn't an error as we are looping
1052          * gfn here.
1053          */
1054         if ( rc == -ENOENT )
1055             rc = 0;
1056         else if ( rc < 0 )
1057         {
1058             gdprintk(XENLOG_ERR, "p2m->recalc failed! Dom%d gfn=%lx\n",
1059                      d->domain_id, gfn);
1060             break;
1061         }
1062 
1063         gfn++;
1064     }
1065 
1066     p2m_unlock(p2m);
1067 
1068     return rc;
1069 }
1070 
1071 /*
1072  * Returns:
1073  *    0              for success
1074  *    -errno         for failure
1075  *    1 + new order  for caller to retry with smaller order (guaranteed
1076  *                   to be smaller than order passed in)
1077  */
set_typed_p2m_entry(struct domain * d,unsigned long gfn_l,mfn_t mfn,unsigned int order,p2m_type_t gfn_p2mt,p2m_access_t access)1078 static int set_typed_p2m_entry(struct domain *d, unsigned long gfn_l,
1079                                mfn_t mfn, unsigned int order,
1080                                p2m_type_t gfn_p2mt, p2m_access_t access)
1081 {
1082     int rc = 0;
1083     p2m_access_t a;
1084     p2m_type_t ot;
1085     mfn_t omfn;
1086     gfn_t gfn = _gfn(gfn_l);
1087     unsigned int cur_order = 0;
1088     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1089 
1090     if ( !paging_mode_translate(d) )
1091         return -EIO;
1092 
1093     gfn_lock(p2m, gfn, order);
1094     omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, &cur_order, NULL);
1095     if ( cur_order < order )
1096     {
1097         gfn_unlock(p2m, gfn, order);
1098         return cur_order + 1;
1099     }
1100     if ( p2m_is_grant(ot) || p2m_is_foreign(ot) )
1101     {
1102         gfn_unlock(p2m, gfn, order);
1103         domain_crash(d);
1104         return -ENOENT;
1105     }
1106     else if ( p2m_is_ram(ot) )
1107     {
1108         unsigned long i;
1109 
1110         for ( i = 0; i < (1UL << order); ++i )
1111         {
1112             ASSERT(mfn_valid(_mfn(mfn_x(omfn) + i)));
1113             set_gpfn_from_mfn(mfn_x(omfn) + i, INVALID_M2P_ENTRY);
1114         }
1115     }
1116 
1117     P2M_DEBUG("set %d %lx %lx\n", gfn_p2mt, gfn_l, mfn_x(mfn));
1118     rc = p2m_set_entry(p2m, gfn, mfn, order, gfn_p2mt, access);
1119     if ( rc )
1120         gdprintk(XENLOG_ERR, "p2m_set_entry: %#lx:%u -> %d (0x%"PRI_mfn")\n",
1121                  gfn_l, order, rc, mfn_x(mfn));
1122     else if ( p2m_is_pod(ot) )
1123     {
1124         pod_lock(p2m);
1125         p2m->pod.entry_count -= 1UL << order;
1126         BUG_ON(p2m->pod.entry_count < 0);
1127         pod_unlock(p2m);
1128     }
1129     gfn_unlock(p2m, gfn, order);
1130 
1131     return rc;
1132 }
1133 
1134 /* Set foreign mfn in the given guest's p2m table. */
set_foreign_p2m_entry(struct domain * d,unsigned long gfn,mfn_t mfn)1135 static int set_foreign_p2m_entry(struct domain *d, unsigned long gfn,
1136                                  mfn_t mfn)
1137 {
1138     return set_typed_p2m_entry(d, gfn, mfn, PAGE_ORDER_4K, p2m_map_foreign,
1139                                p2m_get_hostp2m(d)->default_access);
1140 }
1141 
set_mmio_p2m_entry(struct domain * d,unsigned long gfn,mfn_t mfn,unsigned int order,p2m_access_t access)1142 int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
1143                        unsigned int order, p2m_access_t access)
1144 {
1145     if ( order > PAGE_ORDER_4K &&
1146          rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn),
1147                                  mfn_x(mfn) + (1UL << order) - 1) )
1148         return PAGE_ORDER_4K + 1;
1149 
1150     return set_typed_p2m_entry(d, gfn, mfn, order, p2m_mmio_direct, access);
1151 }
1152 
set_identity_p2m_entry(struct domain * d,unsigned long gfn_l,p2m_access_t p2ma,unsigned int flag)1153 int set_identity_p2m_entry(struct domain *d, unsigned long gfn_l,
1154                            p2m_access_t p2ma, unsigned int flag)
1155 {
1156     p2m_type_t p2mt;
1157     p2m_access_t a;
1158     gfn_t gfn = _gfn(gfn_l);
1159     mfn_t mfn;
1160     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1161     int ret;
1162 
1163     if ( !paging_mode_translate(p2m->domain) )
1164     {
1165         if ( !need_iommu(d) )
1166             return 0;
1167         return iommu_map_page(d, gfn_l, gfn_l, IOMMUF_readable|IOMMUF_writable);
1168     }
1169 
1170     gfn_lock(p2m, gfn, 0);
1171 
1172     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1173 
1174     if ( p2mt == p2m_invalid || p2mt == p2m_mmio_dm )
1175         ret = p2m_set_entry(p2m, gfn, _mfn(gfn_l), PAGE_ORDER_4K,
1176                             p2m_mmio_direct, p2ma);
1177     else if ( mfn_x(mfn) == gfn_l && p2mt == p2m_mmio_direct && a == p2ma )
1178         ret = 0;
1179     else
1180     {
1181         if ( flag & XEN_DOMCTL_DEV_RDM_RELAXED )
1182             ret = 0;
1183         else
1184             ret = -EBUSY;
1185         printk(XENLOG_G_WARNING
1186                "Cannot setup identity map d%d:%lx,"
1187                " gfn already mapped to %lx.\n",
1188                d->domain_id, gfn_l, mfn_x(mfn));
1189     }
1190 
1191     gfn_unlock(p2m, gfn, 0);
1192     return ret;
1193 }
1194 
1195 /*
1196  * Returns:
1197  *    0        for success
1198  *    -errno   for failure
1199  *    order+1  for caller to retry with order (guaranteed smaller than
1200  *             the order value passed in)
1201  */
clear_mmio_p2m_entry(struct domain * d,unsigned long gfn_l,mfn_t mfn,unsigned int order)1202 int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn_l, mfn_t mfn,
1203                          unsigned int order)
1204 {
1205     int rc = -EINVAL;
1206     gfn_t gfn = _gfn(gfn_l);
1207     mfn_t actual_mfn;
1208     p2m_access_t a;
1209     p2m_type_t t;
1210     unsigned int cur_order = 0;
1211     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1212 
1213     if ( !paging_mode_translate(d) )
1214         return -EIO;
1215 
1216     gfn_lock(p2m, gfn, order);
1217     actual_mfn = p2m->get_entry(p2m, gfn, &t, &a, 0, &cur_order, NULL);
1218     if ( cur_order < order )
1219     {
1220         rc = cur_order + 1;
1221         goto out;
1222     }
1223 
1224     /* Do not use mfn_valid() here as it will usually fail for MMIO pages. */
1225     if ( mfn_eq(actual_mfn, INVALID_MFN) || (t != p2m_mmio_direct) )
1226     {
1227         gdprintk(XENLOG_ERR,
1228                  "gfn_to_mfn failed! gfn=%08lx type:%d\n", gfn_l, t);
1229         goto out;
1230     }
1231     if ( mfn_x(mfn) != mfn_x(actual_mfn) )
1232         gdprintk(XENLOG_WARNING,
1233                  "no mapping between mfn %08lx and gfn %08lx\n",
1234                  mfn_x(mfn), gfn_l);
1235     rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order, p2m_invalid,
1236                        p2m->default_access);
1237 
1238  out:
1239     gfn_unlock(p2m, gfn, order);
1240 
1241     return rc;
1242 }
1243 
clear_identity_p2m_entry(struct domain * d,unsigned long gfn_l)1244 int clear_identity_p2m_entry(struct domain *d, unsigned long gfn_l)
1245 {
1246     p2m_type_t p2mt;
1247     p2m_access_t a;
1248     gfn_t gfn = _gfn(gfn_l);
1249     mfn_t mfn;
1250     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1251     int ret;
1252 
1253     if ( !paging_mode_translate(d) )
1254     {
1255         if ( !need_iommu(d) )
1256             return 0;
1257         return iommu_unmap_page(d, gfn_l);
1258     }
1259 
1260     gfn_lock(p2m, gfn, 0);
1261 
1262     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1263     if ( p2mt == p2m_mmio_direct && mfn_x(mfn) == gfn_l )
1264     {
1265         ret = p2m_set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_4K,
1266                             p2m_invalid, p2m->default_access);
1267         gfn_unlock(p2m, gfn, 0);
1268     }
1269     else
1270     {
1271         gfn_unlock(p2m, gfn, 0);
1272         printk(XENLOG_G_WARNING
1273                "non-identity map d%d:%lx not cleared (mapped to %lx)\n",
1274                d->domain_id, gfn_l, mfn_x(mfn));
1275         ret = 0;
1276     }
1277 
1278     return ret;
1279 }
1280 
1281 /* Returns: 0 for success, -errno for failure */
set_shared_p2m_entry(struct domain * d,unsigned long gfn_l,mfn_t mfn)1282 int set_shared_p2m_entry(struct domain *d, unsigned long gfn_l, mfn_t mfn)
1283 {
1284     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1285     int rc = 0;
1286     gfn_t gfn = _gfn(gfn_l);
1287     p2m_access_t a;
1288     p2m_type_t ot;
1289     mfn_t omfn;
1290     unsigned long pg_type;
1291 
1292     if ( !paging_mode_translate(p2m->domain) )
1293         return -EIO;
1294 
1295     gfn_lock(p2m, gfn, 0);
1296     omfn = p2m->get_entry(p2m, gfn, &ot, &a, 0, NULL, NULL);
1297     /* At the moment we only allow p2m change if gfn has already been made
1298      * sharable first */
1299     ASSERT(p2m_is_shared(ot));
1300     ASSERT(mfn_valid(omfn));
1301     /* Set the m2p entry to invalid only if there are no further type
1302      * refs to this page as shared */
1303     pg_type = read_atomic(&(mfn_to_page(omfn)->u.inuse.type_info));
1304     if ( (pg_type & PGT_count_mask) == 0
1305          || (pg_type & PGT_type_mask) != PGT_shared_page )
1306         set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
1307 
1308     P2M_DEBUG("set shared %lx %lx\n", gfn_l, mfn_x(mfn));
1309     rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_shared,
1310                        p2m->default_access);
1311     gfn_unlock(p2m, gfn, 0);
1312     if ( rc )
1313         gdprintk(XENLOG_ERR,
1314                  "p2m_set_entry failed! mfn=%08lx rc:%d\n",
1315                  mfn_x(get_gfn_query_unlocked(p2m->domain, gfn_l, &ot)), rc);
1316     return rc;
1317 }
1318 
1319 /**
1320  * p2m_mem_paging_nominate - Mark a guest page as to-be-paged-out
1321  * @d: guest domain
1322  * @gfn: guest page to nominate
1323  *
1324  * Returns 0 for success or negative errno values if gfn is not pageable.
1325  *
1326  * p2m_mem_paging_nominate() is called by the pager and checks if a guest page
1327  * can be paged out. If the following conditions are met the p2mt will be
1328  * changed:
1329  * - the gfn is backed by a mfn
1330  * - the p2mt of the gfn is pageable
1331  * - the mfn is not used for IO
1332  * - the mfn has exactly one user and has no special meaning
1333  *
1334  * Once the p2mt is changed the page is readonly for the guest.  On success the
1335  * pager can write the page contents to disk and later evict the page.
1336  */
p2m_mem_paging_nominate(struct domain * d,unsigned long gfn_l)1337 int p2m_mem_paging_nominate(struct domain *d, unsigned long gfn_l)
1338 {
1339     struct page_info *page;
1340     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1341     p2m_type_t p2mt;
1342     p2m_access_t a;
1343     gfn_t gfn = _gfn(gfn_l);
1344     mfn_t mfn;
1345     int ret = -EBUSY;
1346 
1347     gfn_lock(p2m, gfn, 0);
1348 
1349     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1350 
1351     /* Check if mfn is valid */
1352     if ( !mfn_valid(mfn) )
1353         goto out;
1354 
1355     /* Check p2m type */
1356     if ( !p2m_is_pageable(p2mt) )
1357         goto out;
1358 
1359     /* Check for io memory page */
1360     if ( is_iomem_page(mfn) )
1361         goto out;
1362 
1363     /* Check page count and type */
1364     page = mfn_to_page(mfn);
1365     if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
1366          (1 | PGC_allocated) )
1367         goto out;
1368 
1369     if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
1370         goto out;
1371 
1372     /* Fix p2m entry */
1373     ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_out, a);
1374 
1375  out:
1376     gfn_unlock(p2m, gfn, 0);
1377     return ret;
1378 }
1379 
1380 /**
1381  * p2m_mem_paging_evict - Mark a guest page as paged-out
1382  * @d: guest domain
1383  * @gfn: guest page to evict
1384  *
1385  * Returns 0 for success or negative errno values if eviction is not possible.
1386  *
1387  * p2m_mem_paging_evict() is called by the pager and will free a guest page and
1388  * release it back to Xen. If the following conditions are met the page can be
1389  * freed:
1390  * - the gfn is backed by a mfn
1391  * - the gfn was nominated
1392  * - the mfn has still exactly one user and has no special meaning
1393  *
1394  * After successful nomination some other process could have mapped the page. In
1395  * this case eviction can not be done. If the gfn was populated before the pager
1396  * could evict it, eviction can not be done either. In this case the gfn is
1397  * still backed by a mfn.
1398  */
p2m_mem_paging_evict(struct domain * d,unsigned long gfn_l)1399 int p2m_mem_paging_evict(struct domain *d, unsigned long gfn_l)
1400 {
1401     struct page_info *page;
1402     p2m_type_t p2mt;
1403     p2m_access_t a;
1404     gfn_t gfn = _gfn(gfn_l);
1405     mfn_t mfn;
1406     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1407     int ret = -EBUSY;
1408 
1409     gfn_lock(p2m, gfn, 0);
1410 
1411     /* Get mfn */
1412     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1413     if ( unlikely(!mfn_valid(mfn)) )
1414         goto out;
1415 
1416     /* Allow only nominated pages */
1417     if ( p2mt != p2m_ram_paging_out )
1418         goto out;
1419 
1420     /* Get the page so it doesn't get modified under Xen's feet */
1421     page = mfn_to_page(mfn);
1422     if ( unlikely(!get_page(page, d)) )
1423         goto out;
1424 
1425     /* Check page count and type once more */
1426     if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
1427          (2 | PGC_allocated) )
1428         goto out_put;
1429 
1430     if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
1431         goto out_put;
1432 
1433     /* Decrement guest domain's ref count of the page */
1434     if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
1435         put_page(page);
1436 
1437     /* Remove mapping from p2m table */
1438     ret = p2m_set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_4K,
1439                         p2m_ram_paged, a);
1440 
1441     /* Clear content before returning the page to Xen */
1442     scrub_one_page(page);
1443 
1444     /* Track number of paged gfns */
1445     atomic_inc(&d->paged_pages);
1446 
1447  out_put:
1448     /* Put the page back so it gets freed */
1449     put_page(page);
1450 
1451  out:
1452     gfn_unlock(p2m, gfn, 0);
1453     return ret;
1454 }
1455 
1456 /**
1457  * p2m_mem_paging_drop_page - Tell pager to drop its reference to a paged page
1458  * @d: guest domain
1459  * @gfn: guest page to drop
1460  *
1461  * p2m_mem_paging_drop_page() will notify the pager that a paged-out gfn was
1462  * released by the guest. The pager is supposed to drop its reference of the
1463  * gfn.
1464  */
p2m_mem_paging_drop_page(struct domain * d,unsigned long gfn,p2m_type_t p2mt)1465 void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn,
1466                                 p2m_type_t p2mt)
1467 {
1468     vm_event_request_t req = {
1469         .reason = VM_EVENT_REASON_MEM_PAGING,
1470         .u.mem_paging.gfn = gfn
1471     };
1472 
1473     /* We allow no ring in this unique case, because it won't affect
1474      * correctness of the guest execution at this point.  If this is the only
1475      * page that happens to be paged-out, we'll be okay..  but it's likely the
1476      * guest will crash shortly anyways. */
1477     int rc = vm_event_claim_slot(d, d->vm_event_paging);
1478     if ( rc < 0 )
1479         return;
1480 
1481     /* Send release notification to pager */
1482     req.u.mem_paging.flags = MEM_PAGING_DROP_PAGE;
1483 
1484     /* Update stats unless the page hasn't yet been evicted */
1485     if ( p2mt != p2m_ram_paging_out )
1486         atomic_dec(&d->paged_pages);
1487     else
1488         /* Evict will fail now, tag this request for pager */
1489         req.u.mem_paging.flags |= MEM_PAGING_EVICT_FAIL;
1490 
1491     vm_event_put_request(d, d->vm_event_paging, &req);
1492 }
1493 
1494 /**
1495  * p2m_mem_paging_populate - Tell pager to populate a paged page
1496  * @d: guest domain
1497  * @gfn: guest page in paging state
1498  *
1499  * p2m_mem_paging_populate() will notify the pager that a page in any of the
1500  * paging states needs to be written back into the guest.
1501  * This function needs to be called whenever gfn_to_mfn() returns any of the p2m
1502  * paging types because the gfn may not be backed by a mfn.
1503  *
1504  * The gfn can be in any of the paging states, but the pager needs only be
1505  * notified when the gfn is in the paging-out path (paging_out or paged).  This
1506  * function may be called more than once from several vcpus. If the vcpu belongs
1507  * to the guest, the vcpu must be stopped and the pager notified that the vcpu
1508  * was stopped. The pager needs to handle several requests for the same gfn.
1509  *
1510  * If the gfn is not in the paging-out path and the vcpu does not belong to the
1511  * guest, nothing needs to be done and the function assumes that a request was
1512  * already sent to the pager. In this case the caller has to try again until the
1513  * gfn is fully paged in again.
1514  */
p2m_mem_paging_populate(struct domain * d,unsigned long gfn_l)1515 void p2m_mem_paging_populate(struct domain *d, unsigned long gfn_l)
1516 {
1517     struct vcpu *v = current;
1518     vm_event_request_t req = {
1519         .reason = VM_EVENT_REASON_MEM_PAGING,
1520         .u.mem_paging.gfn = gfn_l
1521     };
1522     p2m_type_t p2mt;
1523     p2m_access_t a;
1524     gfn_t gfn = _gfn(gfn_l);
1525     mfn_t mfn;
1526     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1527 
1528     /* We're paging. There should be a ring */
1529     int rc = vm_event_claim_slot(d, d->vm_event_paging);
1530     if ( rc == -ENOSYS )
1531     {
1532         gdprintk(XENLOG_ERR, "Domain %hu paging gfn %lx yet no ring "
1533                              "in place\n", d->domain_id, gfn_l);
1534         /* Prevent the vcpu from faulting repeatedly on the same gfn */
1535         if ( v->domain == d )
1536             vcpu_pause_nosync(v);
1537         domain_crash(d);
1538         return;
1539     }
1540     else if ( rc < 0 )
1541         return;
1542 
1543     /* Fix p2m mapping */
1544     gfn_lock(p2m, gfn, 0);
1545     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1546     /* Allow only nominated or evicted pages to enter page-in path */
1547     if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged )
1548     {
1549         /* Evict will fail now, tag this request for pager */
1550         if ( p2mt == p2m_ram_paging_out )
1551             req.u.mem_paging.flags |= MEM_PAGING_EVICT_FAIL;
1552 
1553         p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_in, a);
1554     }
1555     gfn_unlock(p2m, gfn, 0);
1556 
1557     /* Pause domain if request came from guest and gfn has paging type */
1558     if ( p2m_is_paging(p2mt) && v->domain == d )
1559     {
1560         vm_event_vcpu_pause(v);
1561         req.flags |= VM_EVENT_FLAG_VCPU_PAUSED;
1562     }
1563     /* No need to inform pager if the gfn is not in the page-out path */
1564     else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
1565     {
1566         /* gfn is already on its way back and vcpu is not paused */
1567         vm_event_cancel_slot(d, d->vm_event_paging);
1568         return;
1569     }
1570 
1571     /* Send request to pager */
1572     req.u.mem_paging.p2mt = p2mt;
1573     req.vcpu_id = v->vcpu_id;
1574 
1575     vm_event_put_request(d, d->vm_event_paging, &req);
1576 }
1577 
1578 /**
1579  * p2m_mem_paging_prep - Allocate a new page for the guest
1580  * @d: guest domain
1581  * @gfn: guest page in paging state
1582  *
1583  * p2m_mem_paging_prep() will allocate a new page for the guest if the gfn is
1584  * not backed by a mfn. It is called by the pager.
1585  * It is required that the gfn was already populated. The gfn may already have a
1586  * mfn if populate was called for  gfn which was nominated but not evicted. In
1587  * this case only the p2mt needs to be forwarded.
1588  */
p2m_mem_paging_prep(struct domain * d,unsigned long gfn_l,uint64_t buffer)1589 int p2m_mem_paging_prep(struct domain *d, unsigned long gfn_l, uint64_t buffer)
1590 {
1591     struct page_info *page;
1592     p2m_type_t p2mt;
1593     p2m_access_t a;
1594     gfn_t gfn = _gfn(gfn_l);
1595     mfn_t mfn;
1596     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1597     int ret, page_extant = 1;
1598     const void *user_ptr = (const void *) buffer;
1599 
1600     if ( user_ptr )
1601         /* Sanity check the buffer and bail out early if trouble */
1602         if ( (buffer & (PAGE_SIZE - 1)) ||
1603              (!access_ok(user_ptr, PAGE_SIZE)) )
1604             return -EINVAL;
1605 
1606     gfn_lock(p2m, gfn, 0);
1607 
1608     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1609 
1610     ret = -ENOENT;
1611     /* Allow missing pages */
1612     if ( (p2mt != p2m_ram_paging_in) && (p2mt != p2m_ram_paged) )
1613         goto out;
1614 
1615     /* Allocate a page if the gfn does not have one yet */
1616     if ( !mfn_valid(mfn) )
1617     {
1618         /* If the user did not provide a buffer, we disallow */
1619         ret = -EINVAL;
1620         if ( unlikely(user_ptr == NULL) )
1621             goto out;
1622         /* Get a free page */
1623         ret = -ENOMEM;
1624         page = alloc_domheap_page(p2m->domain, 0);
1625         if ( unlikely(page == NULL) )
1626             goto out;
1627         mfn = page_to_mfn(page);
1628         page_extant = 0;
1629     }
1630 
1631     /* If we were given a buffer, now is the time to use it */
1632     if ( !page_extant && user_ptr )
1633     {
1634         void *guest_map;
1635         int rc;
1636 
1637         ASSERT( mfn_valid(mfn) );
1638         guest_map = map_domain_page(mfn);
1639         rc = copy_from_user(guest_map, user_ptr, PAGE_SIZE);
1640         unmap_domain_page(guest_map);
1641         if ( rc )
1642         {
1643             gdprintk(XENLOG_ERR, "Failed to load paging-in gfn %lx domain %u "
1644                                  "bytes left %d\n", gfn_l, d->domain_id, rc);
1645             ret = -EFAULT;
1646             put_page(page); /* Don't leak pages */
1647             goto out;
1648         }
1649     }
1650 
1651     /* Make the page already guest-accessible. If the pager still has a
1652      * pending resume operation, it will be idempotent p2m entry-wise,
1653      * but will unpause the vcpu */
1654     ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
1655                         paging_mode_log_dirty(d) ? p2m_ram_logdirty
1656                                                  : p2m_ram_rw, a);
1657     set_gpfn_from_mfn(mfn_x(mfn), gfn_l);
1658 
1659     if ( !page_extant )
1660         atomic_dec(&d->paged_pages);
1661 
1662  out:
1663     gfn_unlock(p2m, gfn, 0);
1664     return ret;
1665 }
1666 
1667 /**
1668  * p2m_mem_paging_resume - Resume guest gfn
1669  * @d: guest domain
1670  * @rsp: vm_event response received
1671  *
1672  * p2m_mem_paging_resume() will forward the p2mt of a gfn to ram_rw. It is
1673  * called by the pager.
1674  *
1675  * The gfn was previously either evicted and populated, or nominated and
1676  * populated. If the page was evicted the p2mt will be p2m_ram_paging_in. If
1677  * the page was just nominated the p2mt will be p2m_ram_paging_in_start because
1678  * the pager did not call p2m_mem_paging_prep().
1679  *
1680  * If the gfn was dropped the vcpu needs to be unpaused.
1681  */
1682 
p2m_mem_paging_resume(struct domain * d,vm_event_response_t * rsp)1683 void p2m_mem_paging_resume(struct domain *d, vm_event_response_t *rsp)
1684 {
1685     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1686     p2m_type_t p2mt;
1687     p2m_access_t a;
1688     mfn_t mfn;
1689 
1690     /* Fix p2m entry if the page was not dropped */
1691     if ( !(rsp->u.mem_paging.flags & MEM_PAGING_DROP_PAGE) )
1692     {
1693         gfn_t gfn = _gfn(rsp->u.mem_access.gfn);
1694 
1695         gfn_lock(p2m, gfn, 0);
1696         mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
1697         /*
1698          * Allow only pages which were prepared properly, or pages which
1699          * were nominated but not evicted.
1700          */
1701         if ( mfn_valid(mfn) && (p2mt == p2m_ram_paging_in) )
1702         {
1703             p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
1704                           paging_mode_log_dirty(d) ? p2m_ram_logdirty :
1705                           p2m_ram_rw, a);
1706             set_gpfn_from_mfn(mfn_x(mfn), gfn_x(gfn));
1707         }
1708         gfn_unlock(p2m, gfn, 0);
1709     }
1710 }
1711 
p2m_altp2m_check(struct vcpu * v,uint16_t idx)1712 void p2m_altp2m_check(struct vcpu *v, uint16_t idx)
1713 {
1714     if ( altp2m_active(v->domain) )
1715         p2m_switch_vcpu_altp2m_by_id(v, idx);
1716 }
1717 
1718 static struct p2m_domain *
p2m_getlru_nestedp2m(struct domain * d,struct p2m_domain * p2m)1719 p2m_getlru_nestedp2m(struct domain *d, struct p2m_domain *p2m)
1720 {
1721     struct list_head *lru_list = &p2m_get_hostp2m(d)->np2m_list;
1722 
1723     ASSERT(!list_empty(lru_list));
1724 
1725     if ( p2m == NULL )
1726         p2m = list_entry(lru_list->prev, struct p2m_domain, np2m_list);
1727 
1728     list_move(&p2m->np2m_list, lru_list);
1729 
1730     return p2m;
1731 }
1732 
1733 static void
p2m_flush_table_locked(struct p2m_domain * p2m)1734 p2m_flush_table_locked(struct p2m_domain *p2m)
1735 {
1736     struct page_info *top, *pg;
1737     struct domain *d = p2m->domain;
1738     mfn_t mfn;
1739 
1740     ASSERT(p2m_locked_by_me(p2m));
1741 
1742     /*
1743      * "Host" p2m tables can have shared entries &c that need a bit more care
1744      * when discarding them.
1745      */
1746     ASSERT(!p2m_is_hostp2m(p2m));
1747     /* Nested p2m's do not do pod, hence the asserts (and no pod lock)*/
1748     ASSERT(page_list_empty(&p2m->pod.super));
1749     ASSERT(page_list_empty(&p2m->pod.single));
1750 
1751     /* No need to flush if it's already empty */
1752     if ( p2m_is_nestedp2m(p2m) && p2m->np2m_base == P2M_BASE_EADDR )
1753         return;
1754 
1755     /* This is no longer a valid nested p2m for any address space */
1756     p2m->np2m_base = P2M_BASE_EADDR;
1757     p2m->np2m_generation++;
1758 
1759     /* Make sure nobody else is using this p2m table */
1760     nestedhvm_vmcx_flushtlb(p2m);
1761 
1762     /* Zap the top level of the trie */
1763     mfn = pagetable_get_mfn(p2m_get_pagetable(p2m));
1764     clear_domain_page(mfn);
1765 
1766     /* Free the rest of the trie pages back to the paging pool */
1767     top = mfn_to_page(mfn);
1768     while ( (pg = page_list_remove_head(&p2m->pages)) )
1769     {
1770         if ( pg != top )
1771             d->arch.paging.free_page(d, pg);
1772     }
1773     page_list_add(top, &p2m->pages);
1774 }
1775 
1776 /* Reset this p2m table to be empty */
1777 static void
p2m_flush_table(struct p2m_domain * p2m)1778 p2m_flush_table(struct p2m_domain *p2m)
1779 {
1780     p2m_lock(p2m);
1781     p2m_flush_table_locked(p2m);
1782     p2m_unlock(p2m);
1783 }
1784 
1785 void
p2m_flush(struct vcpu * v,struct p2m_domain * p2m)1786 p2m_flush(struct vcpu *v, struct p2m_domain *p2m)
1787 {
1788     ASSERT(v->domain == p2m->domain);
1789     vcpu_nestedhvm(v).nv_p2m = NULL;
1790     p2m_flush_table(p2m);
1791     hvm_asid_flush_vcpu(v);
1792 }
1793 
1794 void
p2m_flush_nestedp2m(struct domain * d)1795 p2m_flush_nestedp2m(struct domain *d)
1796 {
1797     int i;
1798     for ( i = 0; i < MAX_NESTEDP2M; i++ )
1799         p2m_flush_table(d->arch.nested_p2m[i]);
1800 }
1801 
np2m_flush_base(struct vcpu * v,unsigned long np2m_base)1802 void np2m_flush_base(struct vcpu *v, unsigned long np2m_base)
1803 {
1804     struct domain *d = v->domain;
1805     struct p2m_domain *p2m;
1806     unsigned int i;
1807 
1808     np2m_base &= ~(0xfffull);
1809 
1810     nestedp2m_lock(d);
1811     for ( i = 0; i < MAX_NESTEDP2M; i++ )
1812     {
1813         p2m = d->arch.nested_p2m[i];
1814         p2m_lock(p2m);
1815         if ( p2m->np2m_base == np2m_base )
1816         {
1817             p2m_flush_table_locked(p2m);
1818             p2m_unlock(p2m);
1819             break;
1820         }
1821         p2m_unlock(p2m);
1822     }
1823     nestedp2m_unlock(d);
1824 }
1825 
assign_np2m(struct vcpu * v,struct p2m_domain * p2m)1826 static void assign_np2m(struct vcpu *v, struct p2m_domain *p2m)
1827 {
1828     struct nestedvcpu *nv = &vcpu_nestedhvm(v);
1829     struct domain *d = v->domain;
1830 
1831     /* Bring this np2m to the top of the LRU list */
1832     p2m_getlru_nestedp2m(d, p2m);
1833 
1834     nv->nv_flushp2m = 0;
1835     nv->nv_p2m = p2m;
1836     nv->np2m_generation = p2m->np2m_generation;
1837     cpumask_set_cpu(v->processor, p2m->dirty_cpumask);
1838 }
1839 
nvcpu_flush(struct vcpu * v)1840 static void nvcpu_flush(struct vcpu *v)
1841 {
1842     hvm_asid_flush_vcpu(v);
1843     vcpu_nestedhvm(v).stale_np2m = true;
1844 }
1845 
1846 struct p2m_domain *
p2m_get_nestedp2m_locked(struct vcpu * v)1847 p2m_get_nestedp2m_locked(struct vcpu *v)
1848 {
1849     struct nestedvcpu *nv = &vcpu_nestedhvm(v);
1850     struct domain *d = v->domain;
1851     struct p2m_domain *p2m;
1852     uint64_t np2m_base = nhvm_vcpu_p2m_base(v);
1853     unsigned int i;
1854     bool needs_flush = true;
1855 
1856     /* Mask out low bits; this avoids collisions with P2M_BASE_EADDR */
1857     np2m_base &= ~(0xfffull);
1858 
1859     if (nv->nv_flushp2m && nv->nv_p2m) {
1860         nv->nv_p2m = NULL;
1861     }
1862 
1863     nestedp2m_lock(d);
1864     p2m = nv->nv_p2m;
1865     if ( p2m )
1866     {
1867         p2m_lock(p2m);
1868         if ( p2m->np2m_base == np2m_base )
1869         {
1870             /* Check if np2m was flushed just before the lock */
1871             if ( nv->np2m_generation == p2m->np2m_generation )
1872                 needs_flush = false;
1873             /* np2m is up-to-date */
1874             goto found;
1875         }
1876         else if ( p2m->np2m_base != P2M_BASE_EADDR )
1877         {
1878             /* vCPU is switching from some other valid np2m */
1879             cpumask_clear_cpu(v->processor, p2m->dirty_cpumask);
1880         }
1881         p2m_unlock(p2m);
1882     }
1883 
1884     /* Share a np2m if possible */
1885     for ( i = 0; i < MAX_NESTEDP2M; i++ )
1886     {
1887         p2m = d->arch.nested_p2m[i];
1888         p2m_lock(p2m);
1889 
1890         if ( p2m->np2m_base == np2m_base )
1891             goto found;
1892 
1893         p2m_unlock(p2m);
1894     }
1895 
1896     /* All p2m's are or were in use. Take the least recent used one,
1897      * flush it and reuse. */
1898     p2m = p2m_getlru_nestedp2m(d, NULL);
1899     p2m_flush_table(p2m);
1900     p2m_lock(p2m);
1901 
1902  found:
1903     if ( needs_flush )
1904         nvcpu_flush(v);
1905     p2m->np2m_base = np2m_base;
1906     assign_np2m(v, p2m);
1907     nestedp2m_unlock(d);
1908 
1909     return p2m;
1910 }
1911 
p2m_get_nestedp2m(struct vcpu * v)1912 struct p2m_domain *p2m_get_nestedp2m(struct vcpu *v)
1913 {
1914     struct p2m_domain *p2m = p2m_get_nestedp2m_locked(v);
1915     p2m_unlock(p2m);
1916 
1917     return p2m;
1918 }
1919 
1920 struct p2m_domain *
p2m_get_p2m(struct vcpu * v)1921 p2m_get_p2m(struct vcpu *v)
1922 {
1923     if (!nestedhvm_is_n2(v))
1924         return p2m_get_hostp2m(v->domain);
1925 
1926     return p2m_get_nestedp2m(v);
1927 }
1928 
np2m_schedule(int dir)1929 void np2m_schedule(int dir)
1930 {
1931     struct vcpu *curr = current;
1932     struct nestedvcpu *nv = &vcpu_nestedhvm(curr);
1933     struct p2m_domain *p2m;
1934 
1935     ASSERT(dir == NP2M_SCHEDLE_IN || dir == NP2M_SCHEDLE_OUT);
1936 
1937     if ( !nestedhvm_enabled(curr->domain) ||
1938          !nestedhvm_vcpu_in_guestmode(curr) ||
1939          !nestedhvm_paging_mode_hap(curr) )
1940         return;
1941 
1942     p2m = nv->nv_p2m;
1943     if ( p2m )
1944     {
1945         bool np2m_valid;
1946 
1947         p2m_lock(p2m);
1948         np2m_valid = p2m->np2m_base == nhvm_vcpu_p2m_base(curr) &&
1949                      nv->np2m_generation == p2m->np2m_generation;
1950         if ( dir == NP2M_SCHEDLE_OUT && np2m_valid )
1951         {
1952             /*
1953              * The np2m is up to date but this vCPU will no longer use it,
1954              * which means there are no reasons to send a flush IPI.
1955              */
1956             cpumask_clear_cpu(curr->processor, p2m->dirty_cpumask);
1957         }
1958         else if ( dir == NP2M_SCHEDLE_IN )
1959         {
1960             if ( !np2m_valid )
1961             {
1962                 /* This vCPU's np2m was flushed while it was not runnable */
1963                 hvm_asid_flush_core();
1964                 vcpu_nestedhvm(curr).nv_p2m = NULL;
1965             }
1966             else
1967                 cpumask_set_cpu(curr->processor, p2m->dirty_cpumask);
1968         }
1969         p2m_unlock(p2m);
1970     }
1971 }
1972 
paging_gva_to_gfn(struct vcpu * v,unsigned long va,uint32_t * pfec)1973 unsigned long paging_gva_to_gfn(struct vcpu *v,
1974                                 unsigned long va,
1975                                 uint32_t *pfec)
1976 {
1977     struct p2m_domain *hostp2m = p2m_get_hostp2m(v->domain);
1978     const struct paging_mode *hostmode = paging_get_hostmode(v);
1979 
1980     if ( is_hvm_vcpu(v) && paging_mode_hap(v->domain) && nestedhvm_is_n2(v) )
1981     {
1982         unsigned long l2_gfn, l1_gfn;
1983         struct p2m_domain *p2m;
1984         const struct paging_mode *mode;
1985         uint8_t l1_p2ma;
1986         unsigned int l1_page_order;
1987         int rv;
1988 
1989         /* translate l2 guest va into l2 guest gfn */
1990         p2m = p2m_get_nestedp2m(v);
1991         mode = paging_get_nestedmode(v);
1992         l2_gfn = mode->gva_to_gfn(v, p2m, va, pfec);
1993 
1994         if ( l2_gfn == gfn_x(INVALID_GFN) )
1995             return gfn_x(INVALID_GFN);
1996 
1997         /* translate l2 guest gfn into l1 guest gfn */
1998         rv = nestedhap_walk_L1_p2m(v, l2_gfn, &l1_gfn, &l1_page_order, &l1_p2ma,
1999                                    1,
2000                                    !!(*pfec & PFEC_write_access),
2001                                    !!(*pfec & PFEC_insn_fetch));
2002 
2003         if ( rv != NESTEDHVM_PAGEFAULT_DONE )
2004             return gfn_x(INVALID_GFN);
2005 
2006         /*
2007          * Sanity check that l1_gfn can be used properly as a 4K mapping, even
2008          * if it mapped by a nested superpage.
2009          */
2010         ASSERT((l2_gfn & ((1ul << l1_page_order) - 1)) ==
2011                (l1_gfn & ((1ul << l1_page_order) - 1)));
2012 
2013         return l1_gfn;
2014     }
2015 
2016     return hostmode->gva_to_gfn(v, hostp2m, va, pfec);
2017 }
2018 
2019 /*
2020  * If the map is non-NULL, we leave this function having acquired an extra ref
2021  * on mfn_to_page(*mfn).  In all cases, *pfec contains appropriate
2022  * synthetic/structure PFEC_* bits.
2023  */
map_domain_gfn(struct p2m_domain * p2m,gfn_t gfn,mfn_t * mfn,p2m_type_t * p2mt,p2m_query_t q,uint32_t * pfec)2024 void *map_domain_gfn(struct p2m_domain *p2m, gfn_t gfn, mfn_t *mfn,
2025                      p2m_type_t *p2mt, p2m_query_t q, uint32_t *pfec)
2026 {
2027     struct page_info *page;
2028 
2029     if ( !gfn_valid(p2m->domain, gfn) )
2030     {
2031         *pfec = PFEC_reserved_bit | PFEC_page_present;
2032         return NULL;
2033     }
2034 
2035     /* Translate the gfn, unsharing if shared. */
2036     page = p2m_get_page_from_gfn(p2m, gfn, p2mt, NULL, q);
2037     if ( p2m_is_paging(*p2mt) )
2038     {
2039         ASSERT(p2m_is_hostp2m(p2m));
2040         if ( page )
2041             put_page(page);
2042         p2m_mem_paging_populate(p2m->domain, gfn_x(gfn));
2043         *pfec = PFEC_page_paged;
2044         return NULL;
2045     }
2046     if ( p2m_is_shared(*p2mt) )
2047     {
2048         if ( page )
2049             put_page(page);
2050         *pfec = PFEC_page_shared;
2051         return NULL;
2052     }
2053     if ( !page )
2054     {
2055         *pfec = 0;
2056         return NULL;
2057     }
2058 
2059     *pfec = PFEC_page_present;
2060     *mfn = page_to_mfn(page);
2061     ASSERT(mfn_valid(*mfn));
2062 
2063     return map_domain_page(*mfn);
2064 }
2065 
mmio_order(const struct domain * d,unsigned long start_fn,unsigned long nr)2066 static unsigned int mmio_order(const struct domain *d,
2067                                unsigned long start_fn, unsigned long nr)
2068 {
2069     /*
2070      * Note that the !iommu_use_hap_pt() here has three effects:
2071      * - cover iommu_{,un}map_page() not having an "order" input yet,
2072      * - exclude shadow mode (which doesn't support large MMIO mappings),
2073      * - exclude PV guests, should execution reach this code for such.
2074      * So be careful when altering this.
2075      */
2076     if ( !need_iommu(d) || !iommu_use_hap_pt(d) ||
2077          (start_fn & ((1UL << PAGE_ORDER_2M) - 1)) || !(nr >> PAGE_ORDER_2M) )
2078         return PAGE_ORDER_4K;
2079 
2080     if ( 0 /*
2081             * Don't use 1Gb pages, to limit the iteration count in
2082             * set_typed_p2m_entry() when it needs to zap M2P entries
2083             * for a RAM range.
2084             */ &&
2085          !(start_fn & ((1UL << PAGE_ORDER_1G) - 1)) && (nr >> PAGE_ORDER_1G) &&
2086          hap_has_1gb )
2087         return PAGE_ORDER_1G;
2088 
2089     if ( hap_has_2mb )
2090         return PAGE_ORDER_2M;
2091 
2092     return PAGE_ORDER_4K;
2093 }
2094 
2095 #define MAP_MMIO_MAX_ITER 64 /* pretty arbitrary */
2096 
map_mmio_regions(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)2097 int map_mmio_regions(struct domain *d,
2098                      gfn_t start_gfn,
2099                      unsigned long nr,
2100                      mfn_t mfn)
2101 {
2102     int ret = 0;
2103     unsigned long i;
2104     unsigned int iter, order;
2105 
2106     if ( !paging_mode_translate(d) )
2107         return 0;
2108 
2109     for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER;
2110           i += 1UL << order, ++iter )
2111     {
2112         /* OR'ing gfn and mfn values will return an order suitable to both. */
2113         for ( order = mmio_order(d, (gfn_x(start_gfn) + i) | (mfn_x(mfn) + i), nr - i); ;
2114               order = ret - 1 )
2115         {
2116             ret = set_mmio_p2m_entry(d, gfn_x(start_gfn) + i,
2117                                      mfn_add(mfn, i), order,
2118                                      p2m_get_hostp2m(d)->default_access);
2119             if ( ret <= 0 )
2120                 break;
2121             ASSERT(ret <= order);
2122         }
2123         if ( ret < 0 )
2124             break;
2125     }
2126 
2127     return i == nr ? 0 : i ?: ret;
2128 }
2129 
unmap_mmio_regions(struct domain * d,gfn_t start_gfn,unsigned long nr,mfn_t mfn)2130 int unmap_mmio_regions(struct domain *d,
2131                        gfn_t start_gfn,
2132                        unsigned long nr,
2133                        mfn_t mfn)
2134 {
2135     int ret = 0;
2136     unsigned long i;
2137     unsigned int iter, order;
2138 
2139     if ( !paging_mode_translate(d) )
2140         return 0;
2141 
2142     for ( iter = i = 0; i < nr && iter < MAP_MMIO_MAX_ITER;
2143           i += 1UL << order, ++iter )
2144     {
2145         /* OR'ing gfn and mfn values will return an order suitable to both. */
2146         for ( order = mmio_order(d, (gfn_x(start_gfn) + i) | (mfn_x(mfn) + i), nr - i); ;
2147               order = ret - 1 )
2148         {
2149             ret = clear_mmio_p2m_entry(d, gfn_x(start_gfn) + i,
2150                                        mfn_add(mfn, i), order);
2151             if ( ret <= 0 )
2152                 break;
2153             ASSERT(ret <= order);
2154         }
2155         if ( ret < 0 )
2156             break;
2157     }
2158 
2159     return i == nr ? 0 : i ?: ret;
2160 }
2161 
p2m_switch_vcpu_altp2m_by_id(struct vcpu * v,unsigned int idx)2162 bool_t p2m_switch_vcpu_altp2m_by_id(struct vcpu *v, unsigned int idx)
2163 {
2164     struct domain *d = v->domain;
2165     bool_t rc = 0;
2166 
2167     if ( idx >= MAX_ALTP2M )
2168         return rc;
2169 
2170     altp2m_list_lock(d);
2171 
2172     if ( d->arch.altp2m_eptp[idx] != mfn_x(INVALID_MFN) )
2173     {
2174         if ( idx != vcpu_altp2m(v).p2midx )
2175         {
2176             atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
2177             vcpu_altp2m(v).p2midx = idx;
2178             atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
2179             altp2m_vcpu_update_p2m(v);
2180         }
2181         rc = 1;
2182     }
2183 
2184     altp2m_list_unlock(d);
2185     return rc;
2186 }
2187 
2188 /*
2189  * If the fault is for a not present entry:
2190  *     if the entry in the host p2m has a valid mfn, copy it and retry
2191  *     else indicate that outer handler should handle fault
2192  *
2193  * If the fault is for a present entry:
2194  *     indicate that outer handler should handle fault
2195  */
2196 
p2m_altp2m_lazy_copy(struct vcpu * v,paddr_t gpa,unsigned long gla,struct npfec npfec,struct p2m_domain ** ap2m)2197 bool_t p2m_altp2m_lazy_copy(struct vcpu *v, paddr_t gpa,
2198                             unsigned long gla, struct npfec npfec,
2199                             struct p2m_domain **ap2m)
2200 {
2201     struct p2m_domain *hp2m = p2m_get_hostp2m(v->domain);
2202     p2m_type_t p2mt;
2203     p2m_access_t p2ma;
2204     unsigned int page_order;
2205     gfn_t gfn = _gfn(paddr_to_pfn(gpa));
2206     unsigned long mask;
2207     mfn_t mfn;
2208     int rv;
2209 
2210     *ap2m = p2m_get_altp2m(v);
2211 
2212     mfn = get_gfn_type_access(*ap2m, gfn_x(gfn), &p2mt, &p2ma,
2213                               0, &page_order);
2214     __put_gfn(*ap2m, gfn_x(gfn));
2215 
2216     if ( !mfn_eq(mfn, INVALID_MFN) )
2217         return 0;
2218 
2219     mfn = get_gfn_type_access(hp2m, gfn_x(gfn), &p2mt, &p2ma,
2220                               P2M_ALLOC, &page_order);
2221     __put_gfn(hp2m, gfn_x(gfn));
2222 
2223     if ( mfn_eq(mfn, INVALID_MFN) )
2224         return 0;
2225 
2226     p2m_lock(*ap2m);
2227 
2228     /*
2229      * If this is a superpage mapping, round down both frame numbers
2230      * to the start of the superpage.
2231      */
2232     mask = ~((1UL << page_order) - 1);
2233     mfn = _mfn(mfn_x(mfn) & mask);
2234     gfn = _gfn(gfn_x(gfn) & mask);
2235 
2236     rv = p2m_set_entry(*ap2m, gfn, mfn, page_order, p2mt, p2ma);
2237     p2m_unlock(*ap2m);
2238 
2239     if ( rv )
2240     {
2241         gdprintk(XENLOG_ERR,
2242 	    "failed to set entry for %#"PRIx64" -> %#"PRIx64" p2m %#"PRIx64"\n",
2243 	    gfn_x(gfn), mfn_x(mfn), (unsigned long)*ap2m);
2244         domain_crash(hp2m->domain);
2245     }
2246 
2247     return 1;
2248 }
2249 
p2m_flush_altp2m(struct domain * d)2250 void p2m_flush_altp2m(struct domain *d)
2251 {
2252     unsigned int i;
2253 
2254     altp2m_list_lock(d);
2255 
2256     for ( i = 0; i < MAX_ALTP2M; i++ )
2257     {
2258         p2m_flush_table(d->arch.altp2m_p2m[i]);
2259         /* Uninit and reinit ept to force TLB shootdown */
2260         ept_p2m_uninit(d->arch.altp2m_p2m[i]);
2261         ept_p2m_init(d->arch.altp2m_p2m[i]);
2262         d->arch.altp2m_eptp[i] = mfn_x(INVALID_MFN);
2263     }
2264 
2265     altp2m_list_unlock(d);
2266 }
2267 
p2m_init_altp2m_by_id(struct domain * d,unsigned int idx)2268 int p2m_init_altp2m_by_id(struct domain *d, unsigned int idx)
2269 {
2270     int rc = -EINVAL;
2271 
2272     if ( idx >= MAX_ALTP2M )
2273         return rc;
2274 
2275     altp2m_list_lock(d);
2276 
2277     if ( d->arch.altp2m_eptp[idx] == mfn_x(INVALID_MFN) )
2278     {
2279         p2m_init_altp2m_ept(d, idx);
2280         rc = 0;
2281     }
2282 
2283     altp2m_list_unlock(d);
2284     return rc;
2285 }
2286 
p2m_init_next_altp2m(struct domain * d,uint16_t * idx)2287 int p2m_init_next_altp2m(struct domain *d, uint16_t *idx)
2288 {
2289     int rc = -EINVAL;
2290     unsigned int i;
2291 
2292     altp2m_list_lock(d);
2293 
2294     for ( i = 0; i < MAX_ALTP2M; i++ )
2295     {
2296         if ( d->arch.altp2m_eptp[i] != mfn_x(INVALID_MFN) )
2297             continue;
2298 
2299         p2m_init_altp2m_ept(d, i);
2300         *idx = i;
2301         rc = 0;
2302 
2303         break;
2304     }
2305 
2306     altp2m_list_unlock(d);
2307     return rc;
2308 }
2309 
p2m_destroy_altp2m_by_id(struct domain * d,unsigned int idx)2310 int p2m_destroy_altp2m_by_id(struct domain *d, unsigned int idx)
2311 {
2312     struct p2m_domain *p2m;
2313     int rc = -EBUSY;
2314 
2315     if ( !idx || idx >= MAX_ALTP2M )
2316         return rc;
2317 
2318     domain_pause_except_self(d);
2319 
2320     altp2m_list_lock(d);
2321 
2322     if ( d->arch.altp2m_eptp[idx] != mfn_x(INVALID_MFN) )
2323     {
2324         p2m = d->arch.altp2m_p2m[idx];
2325 
2326         if ( !_atomic_read(p2m->active_vcpus) )
2327         {
2328             p2m_flush_table(d->arch.altp2m_p2m[idx]);
2329             /* Uninit and reinit ept to force TLB shootdown */
2330             ept_p2m_uninit(d->arch.altp2m_p2m[idx]);
2331             ept_p2m_init(d->arch.altp2m_p2m[idx]);
2332             d->arch.altp2m_eptp[idx] = mfn_x(INVALID_MFN);
2333             rc = 0;
2334         }
2335     }
2336 
2337     altp2m_list_unlock(d);
2338 
2339     domain_unpause_except_self(d);
2340 
2341     return rc;
2342 }
2343 
p2m_switch_domain_altp2m_by_id(struct domain * d,unsigned int idx)2344 int p2m_switch_domain_altp2m_by_id(struct domain *d, unsigned int idx)
2345 {
2346     struct vcpu *v;
2347     int rc = -EINVAL;
2348 
2349     if ( idx >= MAX_ALTP2M )
2350         return rc;
2351 
2352     domain_pause_except_self(d);
2353 
2354     altp2m_list_lock(d);
2355 
2356     if ( d->arch.altp2m_eptp[idx] != mfn_x(INVALID_MFN) )
2357     {
2358         for_each_vcpu( d, v )
2359             if ( idx != vcpu_altp2m(v).p2midx )
2360             {
2361                 atomic_dec(&p2m_get_altp2m(v)->active_vcpus);
2362                 vcpu_altp2m(v).p2midx = idx;
2363                 atomic_inc(&p2m_get_altp2m(v)->active_vcpus);
2364                 altp2m_vcpu_update_p2m(v);
2365             }
2366 
2367         rc = 0;
2368     }
2369 
2370     altp2m_list_unlock(d);
2371 
2372     domain_unpause_except_self(d);
2373 
2374     return rc;
2375 }
2376 
p2m_change_altp2m_gfn(struct domain * d,unsigned int idx,gfn_t old_gfn,gfn_t new_gfn)2377 int p2m_change_altp2m_gfn(struct domain *d, unsigned int idx,
2378                           gfn_t old_gfn, gfn_t new_gfn)
2379 {
2380     struct p2m_domain *hp2m, *ap2m;
2381     p2m_access_t a;
2382     p2m_type_t t;
2383     mfn_t mfn;
2384     unsigned int page_order;
2385     int rc = -EINVAL;
2386 
2387     if ( idx >= MAX_ALTP2M || d->arch.altp2m_eptp[idx] == mfn_x(INVALID_MFN) )
2388         return rc;
2389 
2390     hp2m = p2m_get_hostp2m(d);
2391     ap2m = d->arch.altp2m_p2m[idx];
2392 
2393     p2m_lock(hp2m);
2394     p2m_lock(ap2m);
2395 
2396     mfn = ap2m->get_entry(ap2m, old_gfn, &t, &a, 0, NULL, NULL);
2397 
2398     if ( gfn_eq(new_gfn, INVALID_GFN) )
2399     {
2400         if ( mfn_valid(mfn) )
2401             p2m_remove_page(ap2m, gfn_x(old_gfn), mfn_x(mfn), PAGE_ORDER_4K);
2402         rc = 0;
2403         goto out;
2404     }
2405 
2406     /* Check host p2m if no valid entry in alternate */
2407     if ( !mfn_valid(mfn) )
2408     {
2409         mfn = __get_gfn_type_access(hp2m, gfn_x(old_gfn), &t, &a,
2410                                     P2M_ALLOC, &page_order, 0);
2411 
2412         if ( !mfn_valid(mfn) || t != p2m_ram_rw )
2413             goto out;
2414 
2415         /* If this is a superpage, copy that first */
2416         if ( page_order != PAGE_ORDER_4K )
2417         {
2418             gfn_t gfn;
2419             unsigned long mask;
2420 
2421             mask = ~((1UL << page_order) - 1);
2422             gfn = _gfn(gfn_x(old_gfn) & mask);
2423             mfn = _mfn(mfn_x(mfn) & mask);
2424 
2425             if ( ap2m->set_entry(ap2m, gfn, mfn, page_order, t, a, 1) )
2426                 goto out;
2427         }
2428     }
2429 
2430     mfn = ap2m->get_entry(ap2m, new_gfn, &t, &a, 0, NULL, NULL);
2431 
2432     if ( !mfn_valid(mfn) )
2433         mfn = hp2m->get_entry(hp2m, new_gfn, &t, &a, 0, NULL, NULL);
2434 
2435     /* Note: currently it is not safe to remap to a shared entry */
2436     if ( !mfn_valid(mfn) || (t != p2m_ram_rw) )
2437         goto out;
2438 
2439     if ( !ap2m->set_entry(ap2m, old_gfn, mfn, PAGE_ORDER_4K, t, a,
2440                           (current->domain != d)) )
2441     {
2442         rc = 0;
2443 
2444         if ( gfn_x(new_gfn) < ap2m->min_remapped_gfn )
2445             ap2m->min_remapped_gfn = gfn_x(new_gfn);
2446         if ( gfn_x(new_gfn) > ap2m->max_remapped_gfn )
2447             ap2m->max_remapped_gfn = gfn_x(new_gfn);
2448     }
2449 
2450  out:
2451     p2m_unlock(ap2m);
2452     p2m_unlock(hp2m);
2453     return rc;
2454 }
2455 
p2m_reset_altp2m(struct p2m_domain * p2m)2456 static void p2m_reset_altp2m(struct p2m_domain *p2m)
2457 {
2458     p2m_flush_table(p2m);
2459     /* Uninit and reinit ept to force TLB shootdown */
2460     ept_p2m_uninit(p2m);
2461     ept_p2m_init(p2m);
2462     p2m->min_remapped_gfn = gfn_x(INVALID_GFN);
2463     p2m->max_remapped_gfn = 0;
2464 }
2465 
p2m_altp2m_propagate_change(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int page_order,p2m_type_t p2mt,p2m_access_t p2ma)2466 void p2m_altp2m_propagate_change(struct domain *d, gfn_t gfn,
2467                                  mfn_t mfn, unsigned int page_order,
2468                                  p2m_type_t p2mt, p2m_access_t p2ma)
2469 {
2470     struct p2m_domain *p2m;
2471     p2m_access_t a;
2472     p2m_type_t t;
2473     mfn_t m;
2474     unsigned int i;
2475     unsigned int reset_count = 0;
2476     unsigned int last_reset_idx = ~0;
2477 
2478     if ( !altp2m_active(d) )
2479         return;
2480 
2481     altp2m_list_lock(d);
2482 
2483     for ( i = 0; i < MAX_ALTP2M; i++ )
2484     {
2485         if ( d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) )
2486             continue;
2487 
2488         p2m = d->arch.altp2m_p2m[i];
2489         m = get_gfn_type_access(p2m, gfn_x(gfn), &t, &a, 0, NULL);
2490 
2491         /* Check for a dropped page that may impact this altp2m */
2492         if ( mfn_eq(mfn, INVALID_MFN) &&
2493              gfn_x(gfn) >= p2m->min_remapped_gfn &&
2494              gfn_x(gfn) <= p2m->max_remapped_gfn )
2495         {
2496             if ( !reset_count++ )
2497             {
2498                 p2m_reset_altp2m(p2m);
2499                 last_reset_idx = i;
2500             }
2501             else
2502             {
2503                 /* At least 2 altp2m's impacted, so reset everything */
2504                 __put_gfn(p2m, gfn_x(gfn));
2505 
2506                 for ( i = 0; i < MAX_ALTP2M; i++ )
2507                 {
2508                     if ( i == last_reset_idx ||
2509                          d->arch.altp2m_eptp[i] == mfn_x(INVALID_MFN) )
2510                         continue;
2511 
2512                     p2m = d->arch.altp2m_p2m[i];
2513                     p2m_lock(p2m);
2514                     p2m_reset_altp2m(p2m);
2515                     p2m_unlock(p2m);
2516                 }
2517 
2518                 goto out;
2519             }
2520         }
2521         else if ( !mfn_eq(m, INVALID_MFN) )
2522             p2m_set_entry(p2m, gfn, mfn, page_order, p2mt, p2ma);
2523 
2524         __put_gfn(p2m, gfn_x(gfn));
2525     }
2526 
2527  out:
2528     altp2m_list_unlock(d);
2529 }
2530 
2531 /*** Audit ***/
2532 
2533 #if P2M_AUDIT
audit_p2m(struct domain * d,uint64_t * orphans,uint64_t * m2p_bad,uint64_t * p2m_bad)2534 void audit_p2m(struct domain *d,
2535                uint64_t *orphans,
2536                 uint64_t *m2p_bad,
2537                 uint64_t *p2m_bad)
2538 {
2539     struct page_info *page;
2540     struct domain *od;
2541     unsigned long mfn, gfn;
2542     mfn_t p2mfn;
2543     unsigned long orphans_count = 0, mpbad = 0, pmbad = 0;
2544     p2m_access_t p2ma;
2545     p2m_type_t type;
2546     struct p2m_domain *p2m = p2m_get_hostp2m(d);
2547 
2548     if ( !paging_mode_translate(d) )
2549         goto out_p2m_audit;
2550 
2551     P2M_PRINTK("p2m audit starts\n");
2552 
2553     p2m_lock(p2m);
2554     pod_lock(p2m);
2555 
2556     if (p2m->audit_p2m)
2557         pmbad = p2m->audit_p2m(p2m);
2558 
2559     /* Audit part two: walk the domain's page allocation list, checking
2560      * the m2p entries. */
2561     spin_lock(&d->page_alloc_lock);
2562     page_list_for_each ( page, &d->page_list )
2563     {
2564         mfn = mfn_x(page_to_mfn(page));
2565 
2566         P2M_PRINTK("auditing guest page, mfn=%#lx\n", mfn);
2567 
2568         od = page_get_owner(page);
2569 
2570         if ( od != d )
2571         {
2572             P2M_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n",
2573                        mfn, od, (od?od->domain_id:-1), d, d->domain_id);
2574             continue;
2575         }
2576 
2577         gfn = get_gpfn_from_mfn(mfn);
2578         if ( gfn == INVALID_M2P_ENTRY )
2579         {
2580             orphans_count++;
2581             P2M_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n",
2582                            mfn);
2583             continue;
2584         }
2585 
2586         if ( gfn == SHARED_M2P_ENTRY )
2587         {
2588             P2M_PRINTK("shared mfn (%lx) on domain page list!\n",
2589                     mfn);
2590             continue;
2591         }
2592 
2593         p2mfn = get_gfn_type_access(p2m, gfn, &type, &p2ma, 0, NULL);
2594         if ( mfn_x(p2mfn) != mfn )
2595         {
2596             mpbad++;
2597             P2M_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx"
2598                        " (-> gfn %#lx)\n",
2599                        mfn, gfn, mfn_x(p2mfn),
2600                        (mfn_valid(p2mfn)
2601                         ? get_gpfn_from_mfn(mfn_x(p2mfn))
2602                         : -1u));
2603             /* This m2p entry is stale: the domain has another frame in
2604              * this physical slot.  No great disaster, but for neatness,
2605              * blow away the m2p entry. */
2606             set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
2607         }
2608         __put_gfn(p2m, gfn);
2609 
2610         P2M_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx\n",
2611                        mfn, gfn, mfn_x(p2mfn));
2612     }
2613     spin_unlock(&d->page_alloc_lock);
2614 
2615     pod_unlock(p2m);
2616     p2m_unlock(p2m);
2617 
2618     P2M_PRINTK("p2m audit complete\n");
2619     if ( orphans_count | mpbad | pmbad )
2620         P2M_PRINTK("p2m audit found %lu orphans\n", orphans_count);
2621     if ( mpbad | pmbad )
2622     {
2623         P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
2624                    pmbad, mpbad);
2625         WARN();
2626     }
2627 
2628 out_p2m_audit:
2629     *orphans = (uint64_t) orphans_count;
2630     *m2p_bad = (uint64_t) mpbad;
2631     *p2m_bad = (uint64_t) pmbad;
2632 }
2633 #endif /* P2M_AUDIT */
2634 
2635 /*
2636  * Add frame from foreign domain to target domain's physmap. Similar to
2637  * XENMAPSPACE_gmfn but the frame is foreign being mapped into current,
2638  * and is not removed from foreign domain.
2639  *
2640  * Usage: - libxl on pvh dom0 creating a guest and doing privcmd_ioctl_mmap.
2641  *        - xentrace running on dom0 mapping xenheap pages. foreigndom would
2642  *          be DOMID_XEN in such a case.
2643  *        etc..
2644  *
2645  * Side Effect: the mfn for fgfn will be refcounted in lower level routines
2646  *              so it is not lost while mapped here. The refcnt is released
2647  *              via the XENMEM_remove_from_physmap path.
2648  *
2649  * Returns: 0 ==> success
2650  */
p2m_add_foreign(struct domain * tdom,unsigned long fgfn,unsigned long gpfn,domid_t foreigndom)2651 int p2m_add_foreign(struct domain *tdom, unsigned long fgfn,
2652                     unsigned long gpfn, domid_t foreigndom)
2653 {
2654     p2m_type_t p2mt, p2mt_prev;
2655     mfn_t prev_mfn, mfn;
2656     struct page_info *page;
2657     int rc;
2658     struct domain *fdom;
2659 
2660     ASSERT(tdom);
2661     if ( foreigndom == DOMID_SELF )
2662         return -EINVAL;
2663     /*
2664      * hvm fixme: until support is added to p2m teardown code to cleanup any
2665      * foreign entries, limit this to hardware domain only.
2666      */
2667     if ( !is_hardware_domain(tdom) )
2668         return -EPERM;
2669 
2670     if ( foreigndom == DOMID_XEN )
2671         fdom = rcu_lock_domain(dom_xen);
2672     else
2673         fdom = rcu_lock_domain_by_id(foreigndom);
2674     if ( fdom == NULL )
2675         return -ESRCH;
2676 
2677     rc = -EINVAL;
2678     if ( tdom == fdom )
2679         goto out;
2680 
2681     rc = xsm_map_gmfn_foreign(XSM_TARGET, tdom, fdom);
2682     if ( rc )
2683         goto out;
2684 
2685     /*
2686      * Take a refcnt on the mfn. NB: following supported for foreign mapping:
2687      *     ram_rw | ram_logdirty | ram_ro | paging_out.
2688      */
2689     page = get_page_from_gfn(fdom, fgfn, &p2mt, P2M_ALLOC);
2690     if ( !page ||
2691          !p2m_is_ram(p2mt) || p2m_is_shared(p2mt) || p2m_is_hole(p2mt) )
2692     {
2693         if ( page )
2694             put_page(page);
2695         rc = -EINVAL;
2696         goto out;
2697     }
2698     mfn = page_to_mfn(page);
2699 
2700     /* Remove previously mapped page if it is present. */
2701     prev_mfn = get_gfn(tdom, gpfn, &p2mt_prev);
2702     if ( mfn_valid(prev_mfn) )
2703     {
2704         if ( is_xen_heap_mfn(mfn_x(prev_mfn)) )
2705             /* Xen heap frames are simply unhooked from this phys slot */
2706             rc = guest_physmap_remove_page(tdom, _gfn(gpfn), prev_mfn, 0);
2707         else
2708             /* Normal domain memory is freed, to avoid leaking memory. */
2709             rc = guest_remove_page(tdom, gpfn);
2710         if ( rc )
2711             goto put_both;
2712     }
2713     /*
2714      * Create the new mapping. Can't use guest_physmap_add_page() because it
2715      * will update the m2p table which will result in  mfn -> gpfn of dom0
2716      * and not fgfn of domU.
2717      */
2718     rc = set_foreign_p2m_entry(tdom, gpfn, mfn);
2719     if ( rc )
2720         gdprintk(XENLOG_WARNING, "set_foreign_p2m_entry failed. "
2721                  "gpfn:%lx mfn:%lx fgfn:%lx td:%d fd:%d\n",
2722                  gpfn, mfn_x(mfn), fgfn, tdom->domain_id, fdom->domain_id);
2723 
2724  put_both:
2725     put_page(page);
2726 
2727     /*
2728      * This put_gfn for the above get_gfn for prev_mfn.  We must do this
2729      * after set_foreign_p2m_entry so another cpu doesn't populate the gpfn
2730      * before us.
2731      */
2732     put_gfn(tdom, gpfn);
2733 
2734 out:
2735     if ( fdom )
2736         rcu_unlock_domain(fdom);
2737     return rc;
2738 }
2739 /*
2740  * Local variables:
2741  * mode: C
2742  * c-file-style: "BSD"
2743  * c-basic-offset: 4
2744  * indent-tabs-mode: nil
2745  * End:
2746  */
2747