1 /******************************************************************************
2  * arch/x86/paging.c
3  *
4  * x86 specific paging support
5  * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
6  * Copyright (c) 2007 XenSource Inc.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; If not, see <http://www.gnu.org/licenses/>.
20  */
21 
22 #include <xen/init.h>
23 #include <xen/guest_access.h>
24 #include <asm/paging.h>
25 #include <asm/shadow.h>
26 #include <asm/p2m.h>
27 #include <asm/hap.h>
28 #include <asm/event.h>
29 #include <asm/hvm/nestedhvm.h>
30 #include <xen/numa.h>
31 #include <xsm/xsm.h>
32 #include <public/sched.h> /* SHUTDOWN_suspend */
33 
34 #include "mm-locks.h"
35 
36 /* Printouts */
37 #define PAGING_PRINTK(_f, _a...)                                     \
38     debugtrace_printk("pg: %s(): " _f, __func__, ##_a)
39 #define PAGING_ERROR(_f, _a...)                                      \
40     printk("pg error: %s(): " _f, __func__, ##_a)
41 #define PAGING_DEBUG(flag, _f, _a...)                                \
42     do {                                                             \
43         if (PAGING_DEBUG_ ## flag)                                   \
44             debugtrace_printk("pgdebug: %s(): " _f, __func__, ##_a); \
45     } while (0)
46 
47 /* Per-CPU variable for enforcing the lock ordering */
48 DEFINE_PER_CPU(int, mm_lock_level);
49 
50 /* Override macros from asm/page.h to make them work with mfn_t */
51 #undef mfn_to_page
52 #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
53 #undef page_to_mfn
54 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
55 
56 /************************************************/
57 /*              LOG DIRTY SUPPORT               */
58 /************************************************/
59 
paging_new_log_dirty_page(struct domain * d)60 static mfn_t paging_new_log_dirty_page(struct domain *d)
61 {
62     struct page_info *page;
63 
64     page = d->arch.paging.alloc_page(d);
65     if ( unlikely(page == NULL) )
66     {
67         d->arch.paging.log_dirty.failed_allocs++;
68         return INVALID_MFN;
69     }
70 
71     d->arch.paging.log_dirty.allocs++;
72 
73     return page_to_mfn(page);
74 }
75 
76 /* Alloc and init a new leaf node */
paging_new_log_dirty_leaf(struct domain * d)77 static mfn_t paging_new_log_dirty_leaf(struct domain *d)
78 {
79     mfn_t mfn = paging_new_log_dirty_page(d);
80 
81     if ( mfn_valid(mfn) )
82         clear_domain_page(mfn);
83 
84     return mfn;
85 }
86 
87 /* Alloc and init a new non-leaf node */
paging_new_log_dirty_node(struct domain * d)88 static mfn_t paging_new_log_dirty_node(struct domain *d)
89 {
90     mfn_t mfn = paging_new_log_dirty_page(d);
91     if ( mfn_valid(mfn) )
92     {
93         int i;
94         mfn_t *node = map_domain_page(mfn);
95         for ( i = 0; i < LOGDIRTY_NODE_ENTRIES; i++ )
96             node[i] = INVALID_MFN;
97         unmap_domain_page(node);
98     }
99     return mfn;
100 }
101 
102 /* get the top of the log-dirty bitmap trie */
paging_map_log_dirty_bitmap(struct domain * d)103 static mfn_t *paging_map_log_dirty_bitmap(struct domain *d)
104 {
105     if ( likely(mfn_valid(d->arch.paging.log_dirty.top)) )
106         return map_domain_page(d->arch.paging.log_dirty.top);
107     return NULL;
108 }
109 
paging_free_log_dirty_page(struct domain * d,mfn_t mfn)110 static void paging_free_log_dirty_page(struct domain *d, mfn_t mfn)
111 {
112     d->arch.paging.log_dirty.allocs--;
113     d->arch.paging.free_page(d, mfn_to_page(mfn));
114 }
115 
paging_free_log_dirty_bitmap(struct domain * d,int rc)116 static int paging_free_log_dirty_bitmap(struct domain *d, int rc)
117 {
118     mfn_t *l4, *l3, *l2;
119     int i4, i3, i2;
120 
121     paging_lock(d);
122 
123     if ( !mfn_valid(d->arch.paging.log_dirty.top) )
124     {
125         paging_unlock(d);
126         return 0;
127     }
128 
129     if ( !d->arch.paging.preempt.dom )
130     {
131         memset(&d->arch.paging.preempt.log_dirty, 0,
132                sizeof(d->arch.paging.preempt.log_dirty));
133         ASSERT(rc <= 0);
134         d->arch.paging.preempt.log_dirty.done = -rc;
135     }
136     else if ( d->arch.paging.preempt.dom != current->domain ||
137               d->arch.paging.preempt.op != XEN_DOMCTL_SHADOW_OP_OFF )
138     {
139         paging_unlock(d);
140         return -EBUSY;
141     }
142 
143     l4 = map_domain_page(d->arch.paging.log_dirty.top);
144     i4 = d->arch.paging.preempt.log_dirty.i4;
145     i3 = d->arch.paging.preempt.log_dirty.i3;
146     rc = 0;
147 
148     for ( ; i4 < LOGDIRTY_NODE_ENTRIES; i4++, i3 = 0 )
149     {
150         if ( !mfn_valid(l4[i4]) )
151             continue;
152 
153         l3 = map_domain_page(l4[i4]);
154 
155         for ( ; i3 < LOGDIRTY_NODE_ENTRIES; i3++ )
156         {
157             if ( !mfn_valid(l3[i3]) )
158                 continue;
159 
160             l2 = map_domain_page(l3[i3]);
161 
162             for ( i2 = 0; i2 < LOGDIRTY_NODE_ENTRIES; i2++ )
163                 if ( mfn_valid(l2[i2]) )
164                     paging_free_log_dirty_page(d, l2[i2]);
165 
166             unmap_domain_page(l2);
167             paging_free_log_dirty_page(d, l3[i3]);
168             l3[i3] = INVALID_MFN;
169 
170             if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
171             {
172                 d->arch.paging.preempt.log_dirty.i3 = i3 + 1;
173                 d->arch.paging.preempt.log_dirty.i4 = i4;
174                 rc = -ERESTART;
175                 break;
176             }
177         }
178 
179         unmap_domain_page(l3);
180         if ( rc )
181             break;
182         paging_free_log_dirty_page(d, l4[i4]);
183         l4[i4] = INVALID_MFN;
184 
185         if ( i4 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
186         {
187             d->arch.paging.preempt.log_dirty.i3 = 0;
188             d->arch.paging.preempt.log_dirty.i4 = i4 + 1;
189             rc = -ERESTART;
190             break;
191         }
192     }
193 
194     unmap_domain_page(l4);
195 
196     if ( !rc )
197     {
198         paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top);
199         d->arch.paging.log_dirty.top = INVALID_MFN;
200 
201         ASSERT(d->arch.paging.log_dirty.allocs == 0);
202         d->arch.paging.log_dirty.failed_allocs = 0;
203 
204         rc = -d->arch.paging.preempt.log_dirty.done;
205         d->arch.paging.preempt.dom = NULL;
206     }
207     else
208     {
209         d->arch.paging.preempt.dom = current->domain;
210         d->arch.paging.preempt.op = XEN_DOMCTL_SHADOW_OP_OFF;
211     }
212 
213     paging_unlock(d);
214 
215     return rc;
216 }
217 
paging_log_dirty_enable(struct domain * d,bool_t log_global)218 int paging_log_dirty_enable(struct domain *d, bool_t log_global)
219 {
220     int ret;
221 
222     if ( need_iommu(d) && log_global )
223     {
224         /*
225          * Refuse to turn on global log-dirty mode
226          * if the domain is using the IOMMU.
227          */
228         return -EINVAL;
229     }
230 
231     if ( paging_mode_log_dirty(d) )
232         return -EINVAL;
233 
234     domain_pause(d);
235     ret = d->arch.paging.log_dirty.ops->enable(d, log_global);
236     domain_unpause(d);
237 
238     return ret;
239 }
240 
paging_log_dirty_disable(struct domain * d,bool_t resuming)241 static int paging_log_dirty_disable(struct domain *d, bool_t resuming)
242 {
243     int ret = 1;
244 
245     if ( !resuming )
246     {
247         domain_pause(d);
248         /* Safe because the domain is paused. */
249         if ( paging_mode_log_dirty(d) )
250         {
251             ret = d->arch.paging.log_dirty.ops->disable(d);
252             ASSERT(ret <= 0);
253         }
254     }
255 
256     ret = paging_free_log_dirty_bitmap(d, ret);
257     if ( ret == -ERESTART )
258         return ret;
259 
260     domain_unpause(d);
261 
262     return ret;
263 }
264 
265 /* Mark a page as dirty, with taking guest pfn as parameter */
paging_mark_pfn_dirty(struct domain * d,pfn_t pfn)266 void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn)
267 {
268     bool changed;
269     mfn_t mfn, *l4, *l3, *l2;
270     unsigned long *l1;
271     unsigned int i1, i2, i3, i4;
272 
273     if ( !paging_mode_log_dirty(d) )
274         return;
275 
276     /* Shared MFNs should NEVER be marked dirty */
277     BUG_ON(paging_mode_translate(d) && SHARED_M2P(pfn_x(pfn)));
278 
279     /*
280      * Values with the MSB set denote MFNs that aren't really part of the
281      * domain's pseudo-physical memory map (e.g., the shared info frame).
282      * Nothing to do here...
283      */
284     if ( unlikely(!VALID_M2P(pfn_x(pfn))) )
285         return;
286 
287     i1 = L1_LOGDIRTY_IDX(pfn);
288     i2 = L2_LOGDIRTY_IDX(pfn);
289     i3 = L3_LOGDIRTY_IDX(pfn);
290     i4 = L4_LOGDIRTY_IDX(pfn);
291 
292     /* Recursive: this is called from inside the shadow code */
293     paging_lock_recursive(d);
294 
295     if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) )
296     {
297          d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d);
298          if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) )
299              goto out;
300     }
301 
302     l4 = paging_map_log_dirty_bitmap(d);
303     mfn = l4[i4];
304     if ( !mfn_valid(mfn) )
305         l4[i4] = mfn = paging_new_log_dirty_node(d);
306     unmap_domain_page(l4);
307     if ( !mfn_valid(mfn) )
308         goto out;
309 
310     l3 = map_domain_page(mfn);
311     mfn = l3[i3];
312     if ( !mfn_valid(mfn) )
313         l3[i3] = mfn = paging_new_log_dirty_node(d);
314     unmap_domain_page(l3);
315     if ( !mfn_valid(mfn) )
316         goto out;
317 
318     l2 = map_domain_page(mfn);
319     mfn = l2[i2];
320     if ( !mfn_valid(mfn) )
321         l2[i2] = mfn = paging_new_log_dirty_leaf(d);
322     unmap_domain_page(l2);
323     if ( !mfn_valid(mfn) )
324         goto out;
325 
326     l1 = map_domain_page(mfn);
327     changed = !__test_and_set_bit(i1, l1);
328     unmap_domain_page(l1);
329     if ( changed )
330     {
331         PAGING_DEBUG(LOGDIRTY,
332                      "d%d: marked mfn %" PRI_mfn " (pfn %" PRI_pfn ")\n",
333                      d->domain_id, mfn_x(mfn), pfn_x(pfn));
334         d->arch.paging.log_dirty.dirty_count++;
335     }
336 
337 out:
338     /* We've already recorded any failed allocations */
339     paging_unlock(d);
340     return;
341 }
342 
343 /* Mark a page as dirty */
paging_mark_dirty(struct domain * d,mfn_t gmfn)344 void paging_mark_dirty(struct domain *d, mfn_t gmfn)
345 {
346     pfn_t pfn;
347 
348     if ( !paging_mode_log_dirty(d) || !mfn_valid(gmfn) ||
349          page_get_owner(mfn_to_page(gmfn)) != d )
350         return;
351 
352     /* We /really/ mean PFN here, even for non-translated guests. */
353     pfn = _pfn(get_gpfn_from_mfn(mfn_x(gmfn)));
354 
355     paging_mark_pfn_dirty(d, pfn);
356 }
357 
358 
359 /* Is this guest page dirty? */
paging_mfn_is_dirty(struct domain * d,mfn_t gmfn)360 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn)
361 {
362     pfn_t pfn;
363     mfn_t mfn, *l4, *l3, *l2;
364     unsigned long *l1;
365     int rv;
366 
367     ASSERT(paging_locked_by_me(d));
368     ASSERT(paging_mode_log_dirty(d));
369 
370     /* We /really/ mean PFN here, even for non-translated guests. */
371     pfn = _pfn(get_gpfn_from_mfn(mfn_x(gmfn)));
372     /* Shared pages are always read-only; invalid pages can't be dirty. */
373     if ( unlikely(SHARED_M2P(pfn_x(pfn)) || !VALID_M2P(pfn_x(pfn))) )
374         return 0;
375 
376     mfn = d->arch.paging.log_dirty.top;
377     if ( !mfn_valid(mfn) )
378         return 0;
379 
380     l4 = map_domain_page(mfn);
381     mfn = l4[L4_LOGDIRTY_IDX(pfn)];
382     unmap_domain_page(l4);
383     if ( !mfn_valid(mfn) )
384         return 0;
385 
386     l3 = map_domain_page(mfn);
387     mfn = l3[L3_LOGDIRTY_IDX(pfn)];
388     unmap_domain_page(l3);
389     if ( !mfn_valid(mfn) )
390         return 0;
391 
392     l2 = map_domain_page(mfn);
393     mfn = l2[L2_LOGDIRTY_IDX(pfn)];
394     unmap_domain_page(l2);
395     if ( !mfn_valid(mfn) )
396         return 0;
397 
398     l1 = map_domain_page(mfn);
399     rv = test_bit(L1_LOGDIRTY_IDX(pfn), l1);
400     unmap_domain_page(l1);
401     return rv;
402 }
403 
404 
405 /* Read a domain's log-dirty bitmap and stats.  If the operation is a CLEAN,
406  * clear the bitmap and stats as well. */
paging_log_dirty_op(struct domain * d,struct xen_domctl_shadow_op * sc,bool_t resuming)407 static int paging_log_dirty_op(struct domain *d,
408                                struct xen_domctl_shadow_op *sc,
409                                bool_t resuming)
410 {
411     int rv = 0, clean = 0, peek = 1;
412     unsigned long pages = 0;
413     mfn_t *l4 = NULL, *l3 = NULL, *l2 = NULL;
414     unsigned long *l1 = NULL;
415     int i4, i3, i2;
416 
417     if ( !resuming )
418     {
419         /*
420          * Mark dirty all currently write-mapped pages on e.g. the
421          * final iteration of a save operation.
422          */
423         if ( is_hvm_domain(d) &&
424              (sc->mode & XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL) )
425             hvm_mapped_guest_frames_mark_dirty(d);
426 
427         domain_pause(d);
428 
429         /*
430          * Flush dirty GFNs potentially cached by hardware. Only need to flush
431          * when not resuming, as domain was paused in resuming case therefore
432          * it's not possible to have any new dirty pages.
433          */
434         p2m_flush_hardware_cached_dirty(d);
435     }
436 
437     paging_lock(d);
438 
439     if ( !d->arch.paging.preempt.dom )
440         memset(&d->arch.paging.preempt.log_dirty, 0,
441                sizeof(d->arch.paging.preempt.log_dirty));
442     else if ( d->arch.paging.preempt.dom != current->domain ||
443               d->arch.paging.preempt.op != sc->op )
444     {
445         paging_unlock(d);
446         ASSERT(!resuming);
447         domain_unpause(d);
448         return -EBUSY;
449     }
450 
451     clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
452 
453     PAGING_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n",
454                  (clean) ? "clean" : "peek",
455                  d->domain_id,
456                  d->arch.paging.log_dirty.fault_count,
457                  d->arch.paging.log_dirty.dirty_count);
458 
459     sc->stats.fault_count = d->arch.paging.log_dirty.fault_count;
460     sc->stats.dirty_count = d->arch.paging.log_dirty.dirty_count;
461 
462     if ( guest_handle_is_null(sc->dirty_bitmap) )
463         /* caller may have wanted just to clean the state or access stats. */
464         peek = 0;
465 
466     if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
467         printk(XENLOG_WARNING
468                "%u failed page allocs while logging dirty pages of d%d\n",
469                d->arch.paging.log_dirty.failed_allocs, d->domain_id);
470         rv = -ENOMEM;
471         goto out;
472     }
473 
474     l4 = paging_map_log_dirty_bitmap(d);
475     i4 = d->arch.paging.preempt.log_dirty.i4;
476     i3 = d->arch.paging.preempt.log_dirty.i3;
477     pages = d->arch.paging.preempt.log_dirty.done;
478 
479     for ( ; (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES); i4++, i3 = 0 )
480     {
481         l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(l4[i4]) : NULL;
482         for ( ; (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES); i3++ )
483         {
484             l2 = ((l3 && mfn_valid(l3[i3])) ?
485                   map_domain_page(l3[i3]) : NULL);
486             for ( i2 = 0;
487                   (pages < sc->pages) && (i2 < LOGDIRTY_NODE_ENTRIES);
488                   i2++ )
489             {
490                 unsigned int bytes = PAGE_SIZE;
491                 l1 = ((l2 && mfn_valid(l2[i2])) ?
492                       map_domain_page(l2[i2]) : NULL);
493                 if ( unlikely(((sc->pages - pages + 7) >> 3) < bytes) )
494                     bytes = (unsigned int)((sc->pages - pages + 7) >> 3);
495                 if ( likely(peek) )
496                 {
497                     if ( (l1 ? copy_to_guest_offset(sc->dirty_bitmap,
498                                                     pages >> 3, (uint8_t *)l1,
499                                                     bytes)
500                              : clear_guest_offset(sc->dirty_bitmap,
501                                                   pages >> 3, bytes)) != 0 )
502                     {
503                         rv = -EFAULT;
504                         goto out;
505                     }
506                 }
507                 pages += bytes << 3;
508                 if ( l1 )
509                 {
510                     if ( clean )
511                         clear_page(l1);
512                     unmap_domain_page(l1);
513                 }
514             }
515             if ( l2 )
516                 unmap_domain_page(l2);
517 
518             if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
519             {
520                 d->arch.paging.preempt.log_dirty.i4 = i4;
521                 d->arch.paging.preempt.log_dirty.i3 = i3 + 1;
522                 rv = -ERESTART;
523                 break;
524             }
525         }
526         if ( l3 )
527             unmap_domain_page(l3);
528 
529         if ( !rv && i4 < LOGDIRTY_NODE_ENTRIES - 1 &&
530              hypercall_preempt_check() )
531         {
532             d->arch.paging.preempt.log_dirty.i4 = i4 + 1;
533             d->arch.paging.preempt.log_dirty.i3 = 0;
534             rv = -ERESTART;
535         }
536         if ( rv )
537             break;
538     }
539     if ( l4 )
540         unmap_domain_page(l4);
541 
542     if ( !rv )
543     {
544         d->arch.paging.preempt.dom = NULL;
545         if ( clean )
546         {
547             d->arch.paging.log_dirty.fault_count = 0;
548             d->arch.paging.log_dirty.dirty_count = 0;
549         }
550     }
551     else
552     {
553         d->arch.paging.preempt.dom = current->domain;
554         d->arch.paging.preempt.op = sc->op;
555         d->arch.paging.preempt.log_dirty.done = pages;
556     }
557 
558     paging_unlock(d);
559 
560     if ( rv )
561     {
562         /* Never leave the domain paused on real errors. */
563         ASSERT(rv == -ERESTART);
564         return rv;
565     }
566 
567     if ( pages < sc->pages )
568         sc->pages = pages;
569     if ( clean )
570     {
571         /* We need to further call clean_dirty_bitmap() functions of specific
572          * paging modes (shadow or hap).  Safe because the domain is paused. */
573         d->arch.paging.log_dirty.ops->clean(d);
574     }
575     domain_unpause(d);
576     return rv;
577 
578  out:
579     d->arch.paging.preempt.dom = NULL;
580     paging_unlock(d);
581     domain_unpause(d);
582 
583     if ( l1 )
584         unmap_domain_page(l1);
585     if ( l2 )
586         unmap_domain_page(l2);
587     if ( l3 )
588         unmap_domain_page(l3);
589     if ( l4 )
590         unmap_domain_page(l4);
591 
592     return rv;
593 }
594 
paging_log_dirty_range(struct domain * d,unsigned long begin_pfn,unsigned long nr,uint8_t * dirty_bitmap)595 void paging_log_dirty_range(struct domain *d,
596                            unsigned long begin_pfn,
597                            unsigned long nr,
598                            uint8_t *dirty_bitmap)
599 {
600     struct p2m_domain *p2m = p2m_get_hostp2m(d);
601     int i;
602     unsigned long pfn;
603 
604     /*
605      * Set l1e entries of P2M table to be read-only.
606      *
607      * On first write, it page faults, its entry is changed to read-write,
608      * and on retry the write succeeds.
609      *
610      * We populate dirty_bitmap by looking for entries that have been
611      * switched to read-write.
612      */
613 
614     p2m_lock(p2m);
615 
616     for ( i = 0, pfn = begin_pfn; pfn < begin_pfn + nr; i++, pfn++ )
617         if ( !p2m_change_type_one(d, pfn, p2m_ram_rw, p2m_ram_logdirty) )
618             dirty_bitmap[i >> 3] |= (1 << (i & 7));
619 
620     p2m_unlock(p2m);
621 
622     flush_tlb_mask(d->domain_dirty_cpumask);
623 }
624 
625 /*
626  * Callers must supply log_dirty_ops for the log dirty code to call. This
627  * function usually is invoked when paging is enabled. Check shadow_enable()
628  * and hap_enable() for reference.
629  *
630  * These function pointers must not be followed with the log-dirty lock held.
631  */
paging_log_dirty_init(struct domain * d,const struct log_dirty_ops * ops)632 void paging_log_dirty_init(struct domain *d, const struct log_dirty_ops *ops)
633 {
634     d->arch.paging.log_dirty.ops = ops;
635 }
636 
637 /************************************************/
638 /*           CODE FOR PAGING SUPPORT            */
639 /************************************************/
640 /* Domain paging struct initialization. */
paging_domain_init(struct domain * d,unsigned int domcr_flags)641 int paging_domain_init(struct domain *d, unsigned int domcr_flags)
642 {
643     int rc;
644 
645     if ( (rc = p2m_init(d)) != 0 )
646         return rc;
647 
648     mm_lock_init(&d->arch.paging.lock);
649 
650     /* This must be initialized separately from the rest of the
651      * log-dirty init code as that can be called more than once and we
652      * don't want to leak any active log-dirty bitmaps */
653     d->arch.paging.log_dirty.top = INVALID_MFN;
654 
655     /*
656      * Shadow pagetables are the default, but we will use
657      * hardware assistance if it's available and enabled.
658      */
659     if ( hap_enabled(d) )
660         hap_domain_init(d);
661     else
662         rc = shadow_domain_init(d, domcr_flags);
663 
664     return rc;
665 }
666 
667 /* vcpu paging struct initialization goes here */
paging_vcpu_init(struct vcpu * v)668 void paging_vcpu_init(struct vcpu *v)
669 {
670     if ( hap_enabled(v->domain) )
671         hap_vcpu_init(v);
672     else
673         shadow_vcpu_init(v);
674 }
675 
676 
paging_domctl(struct domain * d,struct xen_domctl_shadow_op * sc,XEN_GUEST_HANDLE_PARAM (xen_domctl_t)u_domctl,bool_t resuming)677 int paging_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
678                   XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl,
679                   bool_t resuming)
680 {
681     int rc;
682 
683     if ( unlikely(d == current->domain) )
684     {
685         gdprintk(XENLOG_INFO, "Tried to do a paging op on itself.\n");
686         return -EINVAL;
687     }
688 
689     if ( unlikely(d->is_dying) )
690     {
691         gdprintk(XENLOG_INFO, "Ignoring paging op on dying domain %u\n",
692                  d->domain_id);
693         return 0;
694     }
695 
696     if ( unlikely(d->vcpu == NULL) || unlikely(d->vcpu[0] == NULL) )
697     {
698         gdprintk(XENLOG_DEBUG, "Paging op on a domain (%u) with no vcpus\n",
699                  d->domain_id);
700         return -EINVAL;
701     }
702 
703     if ( resuming
704          ? (d->arch.paging.preempt.dom != current->domain ||
705             d->arch.paging.preempt.op != sc->op)
706          : (d->arch.paging.preempt.dom &&
707             sc->op != XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION) )
708     {
709         printk(XENLOG_G_DEBUG
710                "%pv: Paging op %#x on Dom%u with unfinished prior op %#x by Dom%u\n",
711                current, sc->op, d->domain_id, d->arch.paging.preempt.op,
712                d->arch.paging.preempt.dom
713                ? d->arch.paging.preempt.dom->domain_id : DOMID_INVALID);
714         return -EBUSY;
715     }
716 
717     rc = xsm_shadow_control(XSM_HOOK, d, sc->op);
718     if ( rc )
719         return rc;
720 
721     /* Code to handle log-dirty. Note that some log dirty operations
722      * piggy-back on shadow operations. For example, when
723      * XEN_DOMCTL_SHADOW_OP_OFF is called, it first checks whether log dirty
724      * mode is enabled. If does, we disables log dirty and continues with
725      * shadow code. For this reason, we need to further dispatch domctl
726      * to next-level paging code (shadow or hap).
727      */
728     switch ( sc->op )
729     {
730 
731     case XEN_DOMCTL_SHADOW_OP_ENABLE:
732         if ( !(sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY) )
733             break;
734         /* Else fall through... */
735     case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
736         return paging_log_dirty_enable(d, 1);
737 
738     case XEN_DOMCTL_SHADOW_OP_OFF:
739         if ( (rc = paging_log_dirty_disable(d, resuming)) != 0 )
740             return rc;
741         break;
742 
743     case XEN_DOMCTL_SHADOW_OP_CLEAN:
744     case XEN_DOMCTL_SHADOW_OP_PEEK:
745         if ( sc->mode & ~XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL )
746             return -EINVAL;
747         return paging_log_dirty_op(d, sc, resuming);
748     }
749 
750     /* Here, dispatch domctl to the appropriate paging code */
751     if ( hap_enabled(d) )
752         return hap_domctl(d, sc, u_domctl);
753     else
754         return shadow_domctl(d, sc, u_domctl);
755 }
756 
paging_domctl_continuation(XEN_GUEST_HANDLE_PARAM (xen_domctl_t)u_domctl)757 long paging_domctl_continuation(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
758 {
759     struct xen_domctl op;
760     struct domain *d;
761     int ret;
762 
763     if ( copy_from_guest(&op, u_domctl, 1) )
764         return -EFAULT;
765 
766     if ( op.interface_version != XEN_DOMCTL_INTERFACE_VERSION ||
767          op.cmd != XEN_DOMCTL_shadow_op )
768         return -EOPNOTSUPP;
769 
770     d = rcu_lock_domain_by_id(op.domain);
771     if ( d == NULL )
772         return -ESRCH;
773 
774     ret = xsm_domctl(XSM_OTHER, d, op.cmd);
775     if ( !ret )
776     {
777         if ( domctl_lock_acquire() )
778         {
779             ret = paging_domctl(d, &op.u.shadow_op, u_domctl, 1);
780 
781             domctl_lock_release();
782         }
783         else
784             ret = -ERESTART;
785     }
786 
787     rcu_unlock_domain(d);
788 
789     if ( ret == -ERESTART )
790         ret = hypercall_create_continuation(__HYPERVISOR_arch_1,
791                                             "h", u_domctl);
792     else if ( __copy_field_to_guest(u_domctl, &op, u.shadow_op) )
793         ret = -EFAULT;
794 
795     return ret;
796 }
797 
798 /* Call when destroying a domain */
paging_teardown(struct domain * d)799 int paging_teardown(struct domain *d)
800 {
801     int rc;
802     bool preempted = false;
803 
804     if ( hap_enabled(d) )
805         hap_teardown(d, &preempted);
806     else
807         shadow_teardown(d, &preempted);
808 
809     if ( preempted )
810         return -ERESTART;
811 
812     /* clean up log dirty resources. */
813     rc = paging_free_log_dirty_bitmap(d, 0);
814     if ( rc == -ERESTART )
815         return rc;
816 
817     /* Move populate-on-demand cache back to domain_list for destruction */
818     rc = p2m_pod_empty_cache(d);
819 
820     return rc;
821 }
822 
823 /* Call once all of the references to the domain have gone away */
paging_final_teardown(struct domain * d)824 void paging_final_teardown(struct domain *d)
825 {
826     if ( hap_enabled(d) )
827         hap_final_teardown(d);
828     else
829         shadow_final_teardown(d);
830 
831     p2m_final_teardown(d);
832 }
833 
834 /* Enable an arbitrary paging-assistance mode.  Call once at domain
835  * creation. */
paging_enable(struct domain * d,u32 mode)836 int paging_enable(struct domain *d, u32 mode)
837 {
838     /* Unrecognised paging mode? */
839     if ( mode & ~PG_MASK )
840         return -EINVAL;
841 
842     /* All of external|translate|refcounts, or none. */
843     switch ( mode & (PG_external | PG_translate | PG_refcounts) )
844     {
845     case 0:
846     case PG_external | PG_translate | PG_refcounts:
847         break;
848     default:
849         return -EINVAL;
850     }
851 
852     if ( hap_enabled(d) )
853         return hap_enable(d, mode);
854     else
855         return shadow_enable(d, mode);
856 }
857 
858 /* Called from the guest to indicate that a process is being torn down
859  * and therefore its pagetables will soon be discarded */
pagetable_dying(struct domain * d,paddr_t gpa)860 void pagetable_dying(struct domain *d, paddr_t gpa)
861 {
862 #ifdef CONFIG_SHADOW_PAGING
863     struct vcpu *v;
864 
865     ASSERT(paging_mode_shadow(d));
866 
867     v = d->vcpu[0];
868     v->arch.paging.mode->shadow.pagetable_dying(v, gpa);
869 #else
870     BUG();
871 #endif
872 }
873 
874 /* Print paging-assistance info to the console */
paging_dump_domain_info(struct domain * d)875 void paging_dump_domain_info(struct domain *d)
876 {
877     if ( paging_mode_enabled(d) )
878     {
879         printk("    paging assistance: ");
880         if ( paging_mode_shadow(d) )
881             printk("shadow ");
882         if ( paging_mode_hap(d) )
883             printk("hap ");
884         if ( paging_mode_refcounts(d) )
885             printk("refcounts ");
886         if ( paging_mode_log_dirty(d) )
887             printk("log_dirty ");
888         if ( paging_mode_translate(d) )
889             printk("translate ");
890         if ( paging_mode_external(d) )
891             printk("external ");
892         printk("\n");
893     }
894 }
895 
paging_dump_vcpu_info(struct vcpu * v)896 void paging_dump_vcpu_info(struct vcpu *v)
897 {
898     if ( paging_mode_enabled(v->domain) )
899     {
900         printk("    paging assistance: ");
901         if ( paging_mode_shadow(v->domain) )
902         {
903             if ( paging_get_hostmode(v) )
904                 printk("shadowed %u-on-%u\n",
905                        paging_get_hostmode(v)->guest_levels,
906                        paging_get_hostmode(v)->shadow.shadow_levels);
907             else
908                 printk("not shadowed\n");
909         }
910         else if ( paging_mode_hap(v->domain) && paging_get_hostmode(v) )
911             printk("hap, %u levels\n",
912                    paging_get_hostmode(v)->guest_levels);
913         else
914             printk("none\n");
915     }
916 }
917 
paging_get_mode(struct vcpu * v)918 const struct paging_mode *paging_get_mode(struct vcpu *v)
919 {
920     if (!nestedhvm_is_n2(v))
921         return paging_get_hostmode(v);
922 
923     return paging_get_nestedmode(v);
924 }
925 
paging_update_nestedmode(struct vcpu * v)926 void paging_update_nestedmode(struct vcpu *v)
927 {
928     ASSERT(nestedhvm_enabled(v->domain));
929     if (nestedhvm_paging_mode_hap(v))
930         /* nested-on-nested */
931         v->arch.paging.nestedmode = hap_paging_get_mode(v);
932     else
933         /* TODO: shadow-on-shadow */
934         v->arch.paging.nestedmode = NULL;
935     hvm_asid_flush_vcpu(v);
936 }
937 
paging_write_p2m_entry(struct p2m_domain * p2m,unsigned long gfn,l1_pgentry_t * p,l1_pgentry_t new,unsigned int level)938 void paging_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
939                             l1_pgentry_t *p, l1_pgentry_t new,
940                             unsigned int level)
941 {
942     struct domain *d = p2m->domain;
943     struct vcpu *v = current;
944     if ( v->domain != d )
945         v = d->vcpu ? d->vcpu[0] : NULL;
946     if ( likely(v && paging_mode_enabled(d) && paging_get_hostmode(v) != NULL) )
947         paging_get_hostmode(v)->write_p2m_entry(d, gfn, p, new, level);
948     else
949         safe_write_pte(p, new);
950 }
951 
paging_set_allocation(struct domain * d,unsigned int pages,bool * preempted)952 int paging_set_allocation(struct domain *d, unsigned int pages, bool *preempted)
953 {
954     int rc;
955 
956     ASSERT(paging_mode_enabled(d));
957 
958     paging_lock(d);
959     if ( hap_enabled(d) )
960         rc = hap_set_allocation(d, pages, preempted);
961     else
962         rc = shadow_set_allocation(d, pages, preempted);
963     paging_unlock(d);
964 
965     return rc;
966 }
967 
968 /*
969  * Local variables:
970  * mode: C
971  * c-file-style: "BSD"
972  * c-basic-offset: 4
973  * indent-tabs-mode: nil
974  * End:
975  */
976