1 /******************************************************************************
2  * arch/x86/mm/hap/hap.c
3  *
4  * hardware assisted paging
5  * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
6  * Parts of this code are Copyright (c) 2007 by XenSource Inc.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; If not, see <http://www.gnu.org/licenses/>.
20  */
21 
22 #include <xen/types.h>
23 #include <xen/mm.h>
24 #include <xen/trace.h>
25 #include <xen/sched.h>
26 #include <xen/perfc.h>
27 #include <xen/irq.h>
28 #include <xen/domain_page.h>
29 #include <xen/guest_access.h>
30 #include <xen/keyhandler.h>
31 #include <asm/event.h>
32 #include <asm/page.h>
33 #include <asm/current.h>
34 #include <asm/flushtlb.h>
35 #include <asm/shared.h>
36 #include <asm/hap.h>
37 #include <asm/paging.h>
38 #include <asm/p2m.h>
39 #include <asm/domain.h>
40 #include <xen/numa.h>
41 #include <asm/hvm/nestedhvm.h>
42 
43 #include "private.h"
44 
45 /* Override macros from asm/page.h to make them work with mfn_t */
46 #undef mfn_to_page
47 #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
48 #undef page_to_mfn
49 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
50 
51 /************************************************/
52 /*          HAP VRAM TRACKING SUPPORT           */
53 /************************************************/
54 
55 /*
56  * hap_track_dirty_vram()
57  * Create the domain's dv_dirty_vram struct on demand.
58  * Create a dirty vram range on demand when some [begin_pfn:begin_pfn+nr] is
59  * first encountered.
60  * Collect the guest_dirty bitmask, a bit mask of the dirty vram pages, by
61  * calling paging_log_dirty_range(), which interrogates each vram
62  * page's p2m type looking for pages that have been made writable.
63  */
64 
hap_track_dirty_vram(struct domain * d,unsigned long begin_pfn,unsigned long nr,XEN_GUEST_HANDLE_PARAM (void)guest_dirty_bitmap)65 int hap_track_dirty_vram(struct domain *d,
66                          unsigned long begin_pfn,
67                          unsigned long nr,
68                          XEN_GUEST_HANDLE_PARAM(void) guest_dirty_bitmap)
69 {
70     long rc = 0;
71     struct sh_dirty_vram *dirty_vram;
72     uint8_t *dirty_bitmap = NULL;
73 
74     if ( nr )
75     {
76         int size = (nr + BITS_PER_BYTE - 1) / BITS_PER_BYTE;
77 
78         if ( !paging_mode_log_dirty(d) )
79         {
80             rc = paging_log_dirty_enable(d, 0);
81             if ( rc )
82                 goto out;
83         }
84 
85         rc = -ENOMEM;
86         dirty_bitmap = vzalloc(size);
87         if ( !dirty_bitmap )
88             goto out;
89 
90         paging_lock(d);
91 
92         dirty_vram = d->arch.hvm_domain.dirty_vram;
93         if ( !dirty_vram )
94         {
95             rc = -ENOMEM;
96             if ( (dirty_vram = xzalloc(struct sh_dirty_vram)) == NULL )
97             {
98                 paging_unlock(d);
99                 goto out;
100             }
101 
102             d->arch.hvm_domain.dirty_vram = dirty_vram;
103         }
104 
105         if ( begin_pfn != dirty_vram->begin_pfn ||
106              begin_pfn + nr != dirty_vram->end_pfn )
107         {
108             unsigned long ostart = dirty_vram->begin_pfn;
109             unsigned long oend = dirty_vram->end_pfn;
110 
111             dirty_vram->begin_pfn = begin_pfn;
112             dirty_vram->end_pfn = begin_pfn + nr;
113 
114             paging_unlock(d);
115 
116             if ( oend > ostart )
117                 p2m_change_type_range(d, ostart, oend,
118                                       p2m_ram_logdirty, p2m_ram_rw);
119 
120             /*
121              * Switch vram to log dirty mode, either by setting l1e entries of
122              * P2M table to be read-only, or via hardware-assisted log-dirty.
123              */
124             p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
125                                   p2m_ram_rw, p2m_ram_logdirty);
126 
127             flush_tlb_mask(d->domain_dirty_cpumask);
128 
129             memset(dirty_bitmap, 0xff, size); /* consider all pages dirty */
130         }
131         else
132         {
133             paging_unlock(d);
134 
135             domain_pause(d);
136 
137             /* Flush dirty GFNs potentially cached by hardware. */
138             p2m_flush_hardware_cached_dirty(d);
139 
140             /* get the bitmap */
141             paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
142 
143             domain_unpause(d);
144         }
145 
146         rc = -EFAULT;
147         if ( copy_to_guest(guest_dirty_bitmap, dirty_bitmap, size) == 0 )
148             rc = 0;
149     }
150     else
151     {
152         paging_lock(d);
153 
154         dirty_vram = d->arch.hvm_domain.dirty_vram;
155         if ( dirty_vram )
156         {
157             /*
158              * If zero pages specified while tracking dirty vram
159              * then stop tracking
160              */
161             begin_pfn = dirty_vram->begin_pfn;
162             nr = dirty_vram->end_pfn - dirty_vram->begin_pfn;
163             xfree(dirty_vram);
164             d->arch.hvm_domain.dirty_vram = NULL;
165         }
166 
167         paging_unlock(d);
168         if ( nr )
169             p2m_change_type_range(d, begin_pfn, begin_pfn + nr,
170                                   p2m_ram_logdirty, p2m_ram_rw);
171     }
172 out:
173     vfree(dirty_bitmap);
174 
175     return rc;
176 }
177 
178 /************************************************/
179 /*            HAP LOG DIRTY SUPPORT             */
180 /************************************************/
181 
182 /*
183  * hap code to call when log_dirty is enable. return 0 if no problem found.
184  *
185  * NB: Domain that having device assigned should not set log_global. Because
186  * there is no way to track the memory updating from device.
187  */
hap_enable_log_dirty(struct domain * d,bool_t log_global)188 static int hap_enable_log_dirty(struct domain *d, bool_t log_global)
189 {
190     struct p2m_domain *p2m = p2m_get_hostp2m(d);
191 
192     /*
193      * Refuse to turn on global log-dirty mode if
194      * there are outstanding p2m_ioreq_server pages.
195      */
196     if ( log_global && read_atomic(&p2m->ioreq.entry_count) )
197         return -EBUSY;
198 
199     /* turn on PG_log_dirty bit in paging mode */
200     paging_lock(d);
201     d->arch.paging.mode |= PG_log_dirty;
202     paging_unlock(d);
203 
204     /* Enable hardware-assisted log-dirty if it is supported. */
205     p2m_enable_hardware_log_dirty(d);
206 
207     if ( log_global )
208     {
209         /*
210          * Switch to log dirty mode, either by setting l1e entries of P2M table
211          * to be read-only, or via hardware-assisted log-dirty.
212          */
213         p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
214         flush_tlb_mask(d->domain_dirty_cpumask);
215     }
216     return 0;
217 }
218 
hap_disable_log_dirty(struct domain * d)219 static int hap_disable_log_dirty(struct domain *d)
220 {
221     paging_lock(d);
222     d->arch.paging.mode &= ~PG_log_dirty;
223     paging_unlock(d);
224 
225     /* Disable hardware-assisted log-dirty if it is supported. */
226     p2m_disable_hardware_log_dirty(d);
227 
228     /*
229      * switch to normal mode, either by setting l1e entries of P2M table to
230      * normal mode, or via hardware-assisted log-dirty.
231      */
232     p2m_change_entry_type_global(d, p2m_ram_logdirty, p2m_ram_rw);
233     return 0;
234 }
235 
hap_clean_dirty_bitmap(struct domain * d)236 static void hap_clean_dirty_bitmap(struct domain *d)
237 {
238     /*
239      * Switch to log-dirty mode, either by setting l1e entries of P2M table to
240      * be read-only, or via hardware-assisted log-dirty.
241      */
242     p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
243     flush_tlb_mask(d->domain_dirty_cpumask);
244 }
245 
246 /************************************************/
247 /*             HAP SUPPORT FUNCTIONS            */
248 /************************************************/
hap_alloc(struct domain * d)249 static struct page_info *hap_alloc(struct domain *d)
250 {
251     struct page_info *pg;
252 
253     ASSERT(paging_locked_by_me(d));
254 
255     pg = page_list_remove_head(&d->arch.paging.hap.freelist);
256     if ( unlikely(!pg) )
257         return NULL;
258 
259     d->arch.paging.hap.free_pages--;
260 
261     clear_domain_page(page_to_mfn(pg));
262 
263     return pg;
264 }
265 
hap_free(struct domain * d,mfn_t mfn)266 static void hap_free(struct domain *d, mfn_t mfn)
267 {
268     struct page_info *pg = mfn_to_page(mfn);
269 
270     ASSERT(paging_locked_by_me(d));
271 
272     d->arch.paging.hap.free_pages++;
273     page_list_add_tail(pg, &d->arch.paging.hap.freelist);
274 }
275 
hap_alloc_p2m_page(struct domain * d)276 static struct page_info *hap_alloc_p2m_page(struct domain *d)
277 {
278     struct page_info *pg;
279 
280     /* This is called both from the p2m code (which never holds the
281      * paging lock) and the log-dirty code (which always does). */
282     paging_lock_recursive(d);
283     pg = hap_alloc(d);
284 
285     if ( likely(pg != NULL) )
286     {
287         d->arch.paging.hap.total_pages--;
288         d->arch.paging.hap.p2m_pages++;
289         ASSERT(!page_get_owner(pg) && !(pg->count_info & PGC_count_mask));
290     }
291     else if ( !d->arch.paging.p2m_alloc_failed )
292     {
293         d->arch.paging.p2m_alloc_failed = 1;
294         dprintk(XENLOG_ERR, "d%i failed to allocate from HAP pool\n",
295                 d->domain_id);
296     }
297 
298     paging_unlock(d);
299     return pg;
300 }
301 
hap_free_p2m_page(struct domain * d,struct page_info * pg)302 static void hap_free_p2m_page(struct domain *d, struct page_info *pg)
303 {
304     struct domain *owner = page_get_owner(pg);
305 
306     /* This is called both from the p2m code (which never holds the
307      * paging lock) and the log-dirty code (which always does). */
308     paging_lock_recursive(d);
309 
310     /* Should still have no owner and count zero. */
311     if ( owner || (pg->count_info & PGC_count_mask) )
312     {
313         HAP_ERROR("d%d: Odd p2m page %"PRI_mfn" d=%d c=%lx t=%"PRtype_info"\n",
314                   d->domain_id, mfn_x(page_to_mfn(pg)),
315                   owner ? owner->domain_id : DOMID_INVALID,
316                   pg->count_info, pg->u.inuse.type_info);
317         WARN();
318         pg->count_info &= ~PGC_count_mask;
319         page_set_owner(pg, NULL);
320     }
321     d->arch.paging.hap.p2m_pages--;
322     d->arch.paging.hap.total_pages++;
323     hap_free(d, page_to_mfn(pg));
324 
325     paging_unlock(d);
326 }
327 
328 /* Return the size of the pool, rounded up to the nearest MB */
329 static unsigned int
hap_get_allocation(struct domain * d)330 hap_get_allocation(struct domain *d)
331 {
332     unsigned int pg = d->arch.paging.hap.total_pages
333         + d->arch.paging.hap.p2m_pages;
334 
335     return ((pg >> (20 - PAGE_SHIFT))
336             + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
337 }
338 
339 /* Set the pool of pages to the required number of pages.
340  * Returns 0 for success, non-zero for failure. */
hap_set_allocation(struct domain * d,unsigned int pages,bool * preempted)341 int hap_set_allocation(struct domain *d, unsigned int pages, bool *preempted)
342 {
343     struct page_info *pg;
344 
345     ASSERT(paging_locked_by_me(d));
346 
347     if ( pages < d->arch.paging.hap.p2m_pages )
348         pages = 0;
349     else
350         pages -= d->arch.paging.hap.p2m_pages;
351 
352     for ( ; ; )
353     {
354         if ( d->arch.paging.hap.total_pages < pages )
355         {
356             /* Need to allocate more memory from domheap */
357             pg = alloc_domheap_page(d, MEMF_no_owner);
358             if ( pg == NULL )
359             {
360                 HAP_PRINTK("failed to allocate hap pages.\n");
361                 return -ENOMEM;
362             }
363             d->arch.paging.hap.free_pages++;
364             d->arch.paging.hap.total_pages++;
365             page_list_add_tail(pg, &d->arch.paging.hap.freelist);
366         }
367         else if ( d->arch.paging.hap.total_pages > pages )
368         {
369             /* Need to return memory to domheap */
370             if ( page_list_empty(&d->arch.paging.hap.freelist) )
371             {
372                 HAP_PRINTK("failed to free enough hap pages.\n");
373                 return -ENOMEM;
374             }
375             pg = page_list_remove_head(&d->arch.paging.hap.freelist);
376             ASSERT(pg);
377             d->arch.paging.hap.free_pages--;
378             d->arch.paging.hap.total_pages--;
379             free_domheap_page(pg);
380         }
381         else
382             break;
383 
384         /* Check to see if we need to yield and try again */
385         if ( preempted && general_preempt_check() )
386         {
387             *preempted = true;
388             return 0;
389         }
390     }
391 
392     return 0;
393 }
394 
hap_make_monitor_table(struct vcpu * v)395 static mfn_t hap_make_monitor_table(struct vcpu *v)
396 {
397     struct domain *d = v->domain;
398     struct page_info *pg;
399     l4_pgentry_t *l4e;
400     mfn_t m4mfn;
401 
402     ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0);
403 
404     if ( (pg = hap_alloc(d)) == NULL )
405         goto oom;
406 
407     m4mfn = page_to_mfn(pg);
408     l4e = map_domain_page(m4mfn);
409 
410     init_xen_l4_slots(l4e, m4mfn, d, INVALID_MFN, false);
411     unmap_domain_page(l4e);
412 
413     return m4mfn;
414 
415  oom:
416     HAP_ERROR("out of memory building monitor pagetable\n");
417     domain_crash(d);
418     return INVALID_MFN;
419 }
420 
hap_destroy_monitor_table(struct vcpu * v,mfn_t mmfn)421 static void hap_destroy_monitor_table(struct vcpu* v, mfn_t mmfn)
422 {
423     struct domain *d = v->domain;
424 
425     /* Put the memory back in the pool */
426     hap_free(d, mmfn);
427 }
428 
429 /************************************************/
430 /*          HAP DOMAIN LEVEL FUNCTIONS          */
431 /************************************************/
hap_domain_init(struct domain * d)432 void hap_domain_init(struct domain *d)
433 {
434     static const struct log_dirty_ops hap_ops = {
435         .enable  = hap_enable_log_dirty,
436         .disable = hap_disable_log_dirty,
437         .clean   = hap_clean_dirty_bitmap,
438     };
439 
440     INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist);
441 
442     /* Use HAP logdirty mechanism. */
443     paging_log_dirty_init(d, &hap_ops);
444 }
445 
446 /* return 0 for success, -errno for failure */
hap_enable(struct domain * d,u32 mode)447 int hap_enable(struct domain *d, u32 mode)
448 {
449     unsigned int old_pages;
450     unsigned int i;
451     int rv = 0;
452 
453     domain_pause(d);
454 
455     old_pages = d->arch.paging.hap.total_pages;
456     if ( old_pages == 0 )
457     {
458         paging_lock(d);
459         rv = hap_set_allocation(d, 256, NULL);
460         if ( rv != 0 )
461         {
462             hap_set_allocation(d, 0, NULL);
463             paging_unlock(d);
464             goto out;
465         }
466         paging_unlock(d);
467     }
468 
469     /* Allow p2m and log-dirty code to borrow our memory */
470     d->arch.paging.alloc_page = hap_alloc_p2m_page;
471     d->arch.paging.free_page = hap_free_p2m_page;
472 
473     /* allocate P2m table */
474     if ( mode & PG_translate )
475     {
476         rv = p2m_alloc_table(p2m_get_hostp2m(d));
477         if ( rv != 0 )
478             goto out;
479     }
480 
481     for (i = 0; i < MAX_NESTEDP2M; i++) {
482         rv = p2m_alloc_table(d->arch.nested_p2m[i]);
483         if ( rv != 0 )
484            goto out;
485     }
486 
487     if ( hvm_altp2m_supported() )
488     {
489         /* Init alternate p2m data */
490         if ( (d->arch.altp2m_eptp = alloc_xenheap_page()) == NULL )
491         {
492             rv = -ENOMEM;
493             goto out;
494         }
495 
496         for ( i = 0; i < MAX_EPTP; i++ )
497             d->arch.altp2m_eptp[i] = mfn_x(INVALID_MFN);
498 
499         for ( i = 0; i < MAX_ALTP2M; i++ )
500         {
501             rv = p2m_alloc_table(d->arch.altp2m_p2m[i]);
502             if ( rv != 0 )
503                goto out;
504         }
505 
506         d->arch.altp2m_active = 0;
507     }
508 
509     /* Now let other users see the new mode */
510     d->arch.paging.mode = mode | PG_HAP_enable;
511 
512  out:
513     domain_unpause(d);
514     return rv;
515 }
516 
hap_final_teardown(struct domain * d)517 void hap_final_teardown(struct domain *d)
518 {
519     unsigned int i;
520 
521     if ( hvm_altp2m_supported() )
522     {
523         d->arch.altp2m_active = 0;
524 
525         if ( d->arch.altp2m_eptp )
526         {
527             free_xenheap_page(d->arch.altp2m_eptp);
528             d->arch.altp2m_eptp = NULL;
529         }
530 
531         for ( i = 0; i < MAX_ALTP2M; i++ )
532             p2m_teardown(d->arch.altp2m_p2m[i]);
533     }
534 
535     /* Destroy nestedp2m's first */
536     for (i = 0; i < MAX_NESTEDP2M; i++) {
537         p2m_teardown(d->arch.nested_p2m[i]);
538     }
539 
540     if ( d->arch.paging.hap.total_pages != 0 )
541         hap_teardown(d, NULL);
542 
543     p2m_teardown(p2m_get_hostp2m(d));
544     /* Free any memory that the p2m teardown released */
545     paging_lock(d);
546     hap_set_allocation(d, 0, NULL);
547     ASSERT(d->arch.paging.hap.p2m_pages == 0);
548     paging_unlock(d);
549 }
550 
hap_teardown(struct domain * d,bool * preempted)551 void hap_teardown(struct domain *d, bool *preempted)
552 {
553     struct vcpu *v;
554     mfn_t mfn;
555 
556     ASSERT(d->is_dying);
557     ASSERT(d != current->domain);
558 
559     paging_lock(d); /* Keep various asserts happy */
560 
561     if ( paging_mode_enabled(d) )
562     {
563         /* release the monitor table held by each vcpu */
564         for_each_vcpu ( d, v )
565         {
566             if ( paging_get_hostmode(v) && paging_mode_external(d) )
567             {
568                 mfn = pagetable_get_mfn(v->arch.monitor_table);
569                 if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
570                     hap_destroy_monitor_table(v, mfn);
571                 v->arch.monitor_table = pagetable_null();
572             }
573         }
574     }
575 
576     if ( d->arch.paging.hap.total_pages != 0 )
577     {
578         hap_set_allocation(d, 0, preempted);
579 
580         if ( preempted && *preempted )
581             goto out;
582 
583         ASSERT(d->arch.paging.hap.total_pages == 0);
584     }
585 
586     d->arch.paging.mode &= ~PG_log_dirty;
587 
588     xfree(d->arch.hvm_domain.dirty_vram);
589     d->arch.hvm_domain.dirty_vram = NULL;
590 
591 out:
592     paging_unlock(d);
593 }
594 
hap_domctl(struct domain * d,struct xen_domctl_shadow_op * sc,XEN_GUEST_HANDLE_PARAM (xen_domctl_t)u_domctl)595 int hap_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
596                XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
597 {
598     int rc;
599     bool preempted = false;
600 
601     switch ( sc->op )
602     {
603     case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
604         paging_lock(d);
605         rc = hap_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
606         paging_unlock(d);
607         if ( preempted )
608             /* Not finished.  Set up to re-run the call. */
609             rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h",
610                                                u_domctl);
611         else
612             /* Finished.  Return the new allocation */
613             sc->mb = hap_get_allocation(d);
614         return rc;
615     case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
616         sc->mb = hap_get_allocation(d);
617         /* Fall through... */
618     case XEN_DOMCTL_SHADOW_OP_OFF:
619         return 0;
620     default:
621         HAP_PRINTK("Bad hap domctl op %u\n", sc->op);
622         return -EINVAL;
623     }
624 }
625 
626 static const struct paging_mode hap_paging_real_mode;
627 static const struct paging_mode hap_paging_protected_mode;
628 static const struct paging_mode hap_paging_pae_mode;
629 static const struct paging_mode hap_paging_long_mode;
630 
hap_vcpu_init(struct vcpu * v)631 void hap_vcpu_init(struct vcpu *v)
632 {
633     v->arch.paging.mode = &hap_paging_real_mode;
634     v->arch.paging.nestedmode = &hap_paging_real_mode;
635 }
636 
637 /************************************************/
638 /*          HAP PAGING MODE FUNCTIONS           */
639 /************************************************/
640 /*
641  * HAP guests can handle page faults (in the guest page tables) without
642  * needing any action from Xen, so we should not be intercepting them.
643  */
hap_page_fault(struct vcpu * v,unsigned long va,struct cpu_user_regs * regs)644 static int hap_page_fault(struct vcpu *v, unsigned long va,
645                           struct cpu_user_regs *regs)
646 {
647     struct domain *d = v->domain;
648 
649     HAP_ERROR("Intercepted a guest #PF (%pv) with HAP enabled\n", v);
650     domain_crash(d);
651     return 0;
652 }
653 
654 /*
655  * HAP guests can handle invlpg without needing any action from Xen, so
656  * should not be intercepting it.  However, we need to correctly handle
657  * getting here from instruction emulation.
658  */
hap_invlpg(struct vcpu * v,unsigned long va)659 static bool_t hap_invlpg(struct vcpu *v, unsigned long va)
660 {
661     /*
662      * Emulate INVLPGA:
663      * Must perform the flush right now or an other vcpu may
664      * use it when we use the next VMRUN emulation, otherwise.
665      */
666     if ( nestedhvm_enabled(v->domain) && vcpu_nestedhvm(v).nv_p2m )
667         p2m_flush(v, vcpu_nestedhvm(v).nv_p2m);
668 
669     return 1;
670 }
671 
hap_update_cr3(struct vcpu * v,int do_locking)672 static void hap_update_cr3(struct vcpu *v, int do_locking)
673 {
674     v->arch.hvm_vcpu.hw_cr[3] = v->arch.hvm_vcpu.guest_cr[3];
675     hvm_update_guest_cr(v, 3);
676 }
677 
678 const struct paging_mode *
hap_paging_get_mode(struct vcpu * v)679 hap_paging_get_mode(struct vcpu *v)
680 {
681     return (!hvm_paging_enabled(v)  ? &hap_paging_real_mode :
682             hvm_long_mode_active(v) ? &hap_paging_long_mode :
683             hvm_pae_enabled(v)      ? &hap_paging_pae_mode  :
684                                       &hap_paging_protected_mode);
685 }
686 
hap_update_paging_modes(struct vcpu * v)687 static void hap_update_paging_modes(struct vcpu *v)
688 {
689     struct domain *d = v->domain;
690     unsigned long cr3_gfn = v->arch.hvm_vcpu.guest_cr[3] >> PAGE_SHIFT;
691     p2m_type_t t;
692 
693     /* We hold onto the cr3 as it may be modified later, and
694      * we need to respect lock ordering. No need for
695      * checks here as they are performed by vmx_load_pdptrs
696      * (the potential user of the cr3) */
697     (void)get_gfn(d, cr3_gfn, &t);
698     paging_lock(d);
699 
700     v->arch.paging.mode = hap_paging_get_mode(v);
701 
702     if ( pagetable_is_null(v->arch.monitor_table) )
703     {
704         mfn_t mmfn = hap_make_monitor_table(v);
705         v->arch.monitor_table = pagetable_from_mfn(mmfn);
706         make_cr3(v, mmfn);
707         hvm_update_host_cr3(v);
708     }
709 
710     /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */
711     hap_update_cr3(v, 0);
712 
713     paging_unlock(d);
714     put_gfn(d, cr3_gfn);
715 }
716 
717 static void
hap_write_p2m_entry(struct domain * d,unsigned long gfn,l1_pgentry_t * p,l1_pgentry_t new,unsigned int level)718 hap_write_p2m_entry(struct domain *d, unsigned long gfn, l1_pgentry_t *p,
719                     l1_pgentry_t new, unsigned int level)
720 {
721     uint32_t old_flags;
722     bool_t flush_nestedp2m = 0;
723 
724     /* We know always use the host p2m here, regardless if the vcpu
725      * is in host or guest mode. The vcpu can be in guest mode by
726      * a hypercall which passes a domain and chooses mostly the first
727      * vcpu. */
728 
729     paging_lock(d);
730     old_flags = l1e_get_flags(*p);
731 
732     if ( nestedhvm_enabled(d) && (old_flags & _PAGE_PRESENT)
733          && !p2m_get_hostp2m(d)->defer_nested_flush ) {
734         /* We are replacing a valid entry so we need to flush nested p2ms,
735          * unless the only change is an increase in access rights. */
736         mfn_t omfn = l1e_get_mfn(*p);
737         mfn_t nmfn = l1e_get_mfn(new);
738         flush_nestedp2m = !( mfn_x(omfn) == mfn_x(nmfn)
739             && perms_strictly_increased(old_flags, l1e_get_flags(new)) );
740     }
741 
742     safe_write_pte(p, new);
743     if ( old_flags & _PAGE_PRESENT )
744         flush_tlb_mask(d->domain_dirty_cpumask);
745 
746     paging_unlock(d);
747 
748     if ( flush_nestedp2m )
749         p2m_flush_nestedp2m(d);
750 }
751 
hap_gva_to_gfn_real_mode(struct vcpu * v,struct p2m_domain * p2m,unsigned long gva,uint32_t * pfec)752 static unsigned long hap_gva_to_gfn_real_mode(
753     struct vcpu *v, struct p2m_domain *p2m, unsigned long gva, uint32_t *pfec)
754 {
755     return ((paddr_t)gva >> PAGE_SHIFT);
756 }
757 
hap_p2m_ga_to_gfn_real_mode(struct vcpu * v,struct p2m_domain * p2m,unsigned long cr3,paddr_t ga,uint32_t * pfec,unsigned int * page_order)758 static unsigned long hap_p2m_ga_to_gfn_real_mode(
759     struct vcpu *v, struct p2m_domain *p2m, unsigned long cr3,
760     paddr_t ga, uint32_t *pfec, unsigned int *page_order)
761 {
762     if ( page_order )
763         *page_order = PAGE_ORDER_4K;
764     return (ga >> PAGE_SHIFT);
765 }
766 
767 /* Entry points into this mode of the hap code. */
768 static const struct paging_mode hap_paging_real_mode = {
769     .page_fault             = hap_page_fault,
770     .invlpg                 = hap_invlpg,
771     .gva_to_gfn             = hap_gva_to_gfn_real_mode,
772     .p2m_ga_to_gfn          = hap_p2m_ga_to_gfn_real_mode,
773     .update_cr3             = hap_update_cr3,
774     .update_paging_modes    = hap_update_paging_modes,
775     .write_p2m_entry        = hap_write_p2m_entry,
776     .guest_levels           = 1
777 };
778 
779 static const struct paging_mode hap_paging_protected_mode = {
780     .page_fault             = hap_page_fault,
781     .invlpg                 = hap_invlpg,
782     .gva_to_gfn             = hap_gva_to_gfn_2_levels,
783     .p2m_ga_to_gfn          = hap_p2m_ga_to_gfn_2_levels,
784     .update_cr3             = hap_update_cr3,
785     .update_paging_modes    = hap_update_paging_modes,
786     .write_p2m_entry        = hap_write_p2m_entry,
787     .guest_levels           = 2
788 };
789 
790 static const struct paging_mode hap_paging_pae_mode = {
791     .page_fault             = hap_page_fault,
792     .invlpg                 = hap_invlpg,
793     .gva_to_gfn             = hap_gva_to_gfn_3_levels,
794     .p2m_ga_to_gfn          = hap_p2m_ga_to_gfn_3_levels,
795     .update_cr3             = hap_update_cr3,
796     .update_paging_modes    = hap_update_paging_modes,
797     .write_p2m_entry        = hap_write_p2m_entry,
798     .guest_levels           = 3
799 };
800 
801 static const struct paging_mode hap_paging_long_mode = {
802     .page_fault             = hap_page_fault,
803     .invlpg                 = hap_invlpg,
804     .gva_to_gfn             = hap_gva_to_gfn_4_levels,
805     .p2m_ga_to_gfn          = hap_p2m_ga_to_gfn_4_levels,
806     .update_cr3             = hap_update_cr3,
807     .update_paging_modes    = hap_update_paging_modes,
808     .write_p2m_entry        = hap_write_p2m_entry,
809     .guest_levels           = 4
810 };
811 
812 /*
813  * Local variables:
814  * mode: C
815  * c-file-style: "BSD"
816  * c-basic-offset: 4
817  * indent-tabs-mode: nil
818  * End:
819  */
820