1 /*
2  * nestedsvm.c: Nested Virtualization
3  * Copyright (c) 2011, Advanced Micro Devices, Inc
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; If not, see <http://www.gnu.org/licenses/>.
16  *
17  */
18 
19 #include <asm/hvm/support.h>
20 #include <asm/hvm/svm/emulate.h>
21 #include <asm/hvm/svm/svm.h>
22 #include <asm/hvm/svm/vmcb.h>
23 #include <asm/hvm/nestedhvm.h>
24 #include <asm/hvm/svm/nestedsvm.h>
25 #include <asm/hvm/svm/svmdebug.h>
26 #include <asm/paging.h> /* paging_mode_hap */
27 #include <asm/event.h> /* for local_event_delivery_(en|dis)able */
28 #include <asm/p2m.h> /* p2m_get_pagetable, p2m_get_nestedp2m */
29 
30 
31 #define NSVM_ERROR_VVMCB        1
32 #define NSVM_ERROR_VMENTRY      2
33 
34 static void
nestedsvm_vcpu_clgi(struct vcpu * v)35 nestedsvm_vcpu_clgi(struct vcpu *v)
36 {
37     /* clear gif flag */
38     vcpu_nestedsvm(v).ns_gif = 0;
39     local_event_delivery_disable(); /* mask events for PV drivers */
40 }
41 
42 static void
nestedsvm_vcpu_stgi(struct vcpu * v)43 nestedsvm_vcpu_stgi(struct vcpu *v)
44 {
45     /* enable gif flag */
46     vcpu_nestedsvm(v).ns_gif = 1;
47     local_event_delivery_enable(); /* unmask events for PV drivers */
48 }
49 
50 static int
nestedsvm_vmcb_isvalid(struct vcpu * v,uint64_t vmcxaddr)51 nestedsvm_vmcb_isvalid(struct vcpu *v, uint64_t vmcxaddr)
52 {
53     /* Address must be 4k aligned */
54     if ( (vmcxaddr & ~PAGE_MASK) != 0 )
55         return 0;
56 
57     /* Maximum valid physical address.
58      * See AMD BKDG for HSAVE_PA MSR.
59      */
60     if ( vmcxaddr > 0xfd00000000ULL )
61         return 0;
62 
63     return 1;
64 }
65 
nestedsvm_vmcb_map(struct vcpu * v,uint64_t vmcbaddr)66 int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr)
67 {
68     struct nestedvcpu *nv = &vcpu_nestedhvm(v);
69 
70     if (nv->nv_vvmcx != NULL && nv->nv_vvmcxaddr != vmcbaddr) {
71         ASSERT(nv->nv_vvmcxaddr != INVALID_PADDR);
72         hvm_unmap_guest_frame(nv->nv_vvmcx, 1);
73         nv->nv_vvmcx = NULL;
74         nv->nv_vvmcxaddr = INVALID_PADDR;
75     }
76 
77     if ( !nv->nv_vvmcx )
78     {
79         bool_t writable;
80         void *vvmcx = hvm_map_guest_frame_rw(paddr_to_pfn(vmcbaddr), 1,
81                                              &writable);
82 
83         if ( !vvmcx )
84             return 0;
85         if ( !writable )
86         {
87             hvm_unmap_guest_frame(vvmcx, 1);
88             return 0;
89         }
90         nv->nv_vvmcx = vvmcx;
91         nv->nv_vvmcxaddr = vmcbaddr;
92     }
93 
94     return 1;
95 }
96 
97 /* Interface methods */
nsvm_vcpu_initialise(struct vcpu * v)98 int nsvm_vcpu_initialise(struct vcpu *v)
99 {
100     void *msrpm;
101     struct nestedvcpu *nv = &vcpu_nestedhvm(v);
102     struct nestedsvm *svm = &vcpu_nestedsvm(v);
103 
104     msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE), 0);
105     svm->ns_cached_msrpm = msrpm;
106     if (msrpm == NULL)
107         goto err;
108     memset(msrpm, 0x0, MSRPM_SIZE);
109 
110     msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE), 0);
111     svm->ns_merged_msrpm = msrpm;
112     if (msrpm == NULL)
113         goto err;
114     memset(msrpm, 0x0, MSRPM_SIZE);
115 
116     nv->nv_n2vmcx = alloc_vmcb();
117     if (nv->nv_n2vmcx == NULL)
118         goto err;
119     nv->nv_n2vmcx_pa = virt_to_maddr(nv->nv_n2vmcx);
120 
121     return 0;
122 
123 err:
124     nsvm_vcpu_destroy(v);
125     return -ENOMEM;
126 }
127 
nsvm_vcpu_destroy(struct vcpu * v)128 void nsvm_vcpu_destroy(struct vcpu *v)
129 {
130     struct nestedvcpu *nv = &vcpu_nestedhvm(v);
131     struct nestedsvm *svm = &vcpu_nestedsvm(v);
132 
133     /*
134      * When destroying the vcpu, it may be running on behalf of l2 guest.
135      * Therefore we need to switch the VMCB pointer back to the l1 vmcb,
136      * in order to avoid double free of l2 vmcb and the possible memory leak
137      * of l1 vmcb page.
138      */
139     if (nv->nv_n1vmcx)
140         v->arch.hvm_svm.vmcb = nv->nv_n1vmcx;
141 
142     if (svm->ns_cached_msrpm) {
143         free_xenheap_pages(svm->ns_cached_msrpm,
144                            get_order_from_bytes(MSRPM_SIZE));
145         svm->ns_cached_msrpm = NULL;
146     }
147     if (svm->ns_merged_msrpm) {
148         free_xenheap_pages(svm->ns_merged_msrpm,
149                            get_order_from_bytes(MSRPM_SIZE));
150         svm->ns_merged_msrpm = NULL;
151     }
152     hvm_unmap_guest_frame(nv->nv_vvmcx, 1);
153     nv->nv_vvmcx = NULL;
154     if (nv->nv_n2vmcx) {
155         free_vmcb(nv->nv_n2vmcx);
156         nv->nv_n2vmcx = NULL;
157         nv->nv_n2vmcx_pa = INVALID_PADDR;
158     }
159     if (svm->ns_iomap)
160         svm->ns_iomap = NULL;
161 }
162 
nsvm_vcpu_reset(struct vcpu * v)163 int nsvm_vcpu_reset(struct vcpu *v)
164 {
165     struct nestedsvm *svm = &vcpu_nestedsvm(v);
166 
167     svm->ns_msr_hsavepa = INVALID_PADDR;
168     svm->ns_ovvmcb_pa = INVALID_PADDR;
169 
170     svm->ns_tscratio = DEFAULT_TSC_RATIO;
171 
172     svm->ns_cr_intercepts = 0;
173     svm->ns_dr_intercepts = 0;
174     svm->ns_exception_intercepts = 0;
175     svm->ns_general1_intercepts = 0;
176     svm->ns_general2_intercepts = 0;
177     svm->ns_lbr_control.bytes = 0;
178 
179     svm->ns_hap_enabled = 0;
180     svm->ns_vmcb_guestcr3 = 0;
181     svm->ns_vmcb_hostcr3 = 0;
182     svm->ns_guest_asid = 0;
183     svm->ns_hostflags.bytes = 0;
184     svm->ns_vmexit.exitinfo1 = 0;
185     svm->ns_vmexit.exitinfo2 = 0;
186 
187     if (svm->ns_iomap)
188         svm->ns_iomap = NULL;
189 
190     nestedsvm_vcpu_stgi(v);
191     return 0;
192 }
193 
nestedsvm_fpu_vmentry(uint64_t n1cr0,struct vmcb_struct * vvmcb,struct vmcb_struct * n1vmcb,struct vmcb_struct * n2vmcb)194 static uint64_t nestedsvm_fpu_vmentry(uint64_t n1cr0,
195     struct vmcb_struct *vvmcb,
196     struct vmcb_struct *n1vmcb, struct vmcb_struct *n2vmcb)
197 {
198     uint64_t vcr0;
199 
200     vcr0 = vvmcb->_cr0;
201     if ( !(n1cr0 & X86_CR0_TS) && (n1vmcb->_cr0 & X86_CR0_TS) ) {
202         /* svm_fpu_leave() run while l1 guest was running.
203          * Sync FPU state with l2 guest.
204          */
205         vcr0 |= X86_CR0_TS;
206         n2vmcb->_exception_intercepts |= (1U << TRAP_no_device);
207     } else if ( !(vcr0 & X86_CR0_TS) && (n2vmcb->_cr0 & X86_CR0_TS) ) {
208         /* svm_fpu_enter() run while l1 guest was running.
209          * Sync FPU state with l2 guest. */
210         vcr0 &= ~X86_CR0_TS;
211         n2vmcb->_exception_intercepts &= ~(1U << TRAP_no_device);
212     }
213 
214     return vcr0;
215 }
216 
nestedsvm_fpu_vmexit(struct vmcb_struct * n1vmcb,struct vmcb_struct * n2vmcb,uint64_t n1cr0,uint64_t guest_cr0)217 static void nestedsvm_fpu_vmexit(struct vmcb_struct *n1vmcb,
218     struct vmcb_struct *n2vmcb, uint64_t n1cr0, uint64_t guest_cr0)
219 {
220     if ( !(guest_cr0 & X86_CR0_TS) && (n2vmcb->_cr0 & X86_CR0_TS) ) {
221         /* svm_fpu_leave() run while l2 guest was running.
222          * Sync FPU state with l1 guest. */
223         n1vmcb->_cr0 |= X86_CR0_TS;
224         n1vmcb->_exception_intercepts |= (1U << TRAP_no_device);
225     } else if ( !(n1cr0 & X86_CR0_TS) && (n1vmcb->_cr0 & X86_CR0_TS) ) {
226         /* svm_fpu_enter() run while l2 guest was running.
227          * Sync FPU state with l1 guest. */
228         n1vmcb->_cr0 &= ~X86_CR0_TS;
229         n1vmcb->_exception_intercepts &= ~(1U << TRAP_no_device);
230     }
231 }
232 
nsvm_vcpu_hostsave(struct vcpu * v,unsigned int inst_len)233 static int nsvm_vcpu_hostsave(struct vcpu *v, unsigned int inst_len)
234 {
235     struct nestedsvm *svm = &vcpu_nestedsvm(v);
236     struct nestedvcpu *nv = &vcpu_nestedhvm(v);
237     struct vmcb_struct *n1vmcb;
238 
239     n1vmcb = nv->nv_n1vmcx;
240     ASSERT(n1vmcb != NULL);
241 
242     n1vmcb->rip += inst_len;
243 
244     /* Save shadowed values. This ensures that the l1 guest
245      * cannot override them to break out. */
246     n1vmcb->_efer = v->arch.hvm_vcpu.guest_efer;
247     n1vmcb->_cr0 = v->arch.hvm_vcpu.guest_cr[0];
248     n1vmcb->_cr2 = v->arch.hvm_vcpu.guest_cr[2];
249     n1vmcb->_cr4 = v->arch.hvm_vcpu.guest_cr[4];
250 
251     /* Remember the host interrupt flag */
252     svm->ns_hostflags.fields.rflagsif =
253         (n1vmcb->rflags & X86_EFLAGS_IF) ? 1 : 0;
254 
255     return 0;
256 }
257 
nsvm_vcpu_hostrestore(struct vcpu * v,struct cpu_user_regs * regs)258 static int nsvm_vcpu_hostrestore(struct vcpu *v, struct cpu_user_regs *regs)
259 {
260     struct nestedvcpu *nv = &vcpu_nestedhvm(v);
261     struct nestedsvm *svm = &vcpu_nestedsvm(v);
262     struct vmcb_struct *n1vmcb, *n2vmcb;
263     int rc;
264 
265     n1vmcb = nv->nv_n1vmcx;
266     n2vmcb = nv->nv_n2vmcx;
267     ASSERT(n1vmcb != NULL);
268     ASSERT(n2vmcb != NULL);
269 
270     /* nsvm_vmcb_prepare4vmexit() already saved register values
271      * handled by VMSAVE/VMLOAD into n1vmcb directly.
272      */
273 
274     /* switch vmcb to l1 guest's vmcb */
275     v->arch.hvm_svm.vmcb = n1vmcb;
276     v->arch.hvm_svm.vmcb_pa = nv->nv_n1vmcx_pa;
277 
278     /* EFER */
279     v->arch.hvm_vcpu.guest_efer = n1vmcb->_efer;
280     rc = hvm_set_efer(n1vmcb->_efer);
281     if ( rc == X86EMUL_EXCEPTION )
282         hvm_inject_hw_exception(TRAP_gp_fault, 0);
283     if (rc != X86EMUL_OKAY)
284         gdprintk(XENLOG_ERR, "hvm_set_efer failed, rc: %u\n", rc);
285 
286     /* CR4 */
287     v->arch.hvm_vcpu.guest_cr[4] = n1vmcb->_cr4;
288     rc = hvm_set_cr4(n1vmcb->_cr4, 1);
289     if ( rc == X86EMUL_EXCEPTION )
290         hvm_inject_hw_exception(TRAP_gp_fault, 0);
291     if (rc != X86EMUL_OKAY)
292         gdprintk(XENLOG_ERR, "hvm_set_cr4 failed, rc: %u\n", rc);
293 
294     /* CR0 */
295     nestedsvm_fpu_vmexit(n1vmcb, n2vmcb,
296         svm->ns_cr0, v->arch.hvm_vcpu.guest_cr[0]);
297     v->arch.hvm_vcpu.guest_cr[0] = n1vmcb->_cr0 | X86_CR0_PE;
298     n1vmcb->rflags &= ~X86_EFLAGS_VM;
299     rc = hvm_set_cr0(n1vmcb->_cr0 | X86_CR0_PE, 1);
300     if ( rc == X86EMUL_EXCEPTION )
301         hvm_inject_hw_exception(TRAP_gp_fault, 0);
302     if (rc != X86EMUL_OKAY)
303         gdprintk(XENLOG_ERR, "hvm_set_cr0 failed, rc: %u\n", rc);
304     svm->ns_cr0 = v->arch.hvm_vcpu.guest_cr[0];
305 
306     /* CR2 */
307     v->arch.hvm_vcpu.guest_cr[2] = n1vmcb->_cr2;
308     hvm_update_guest_cr(v, 2);
309 
310     /* CR3 */
311     /* Nested paging mode */
312     if (nestedhvm_paging_mode_hap(v)) {
313         /* host nested paging + guest nested paging. */
314         /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
315     } else if (paging_mode_hap(v->domain)) {
316         /* host nested paging + guest shadow paging. */
317         /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
318     } else {
319         /* host shadow paging + guest shadow paging. */
320 
321         /* Reset MMU context  -- XXX (hostrestore) not yet working*/
322         if (!pagetable_is_null(v->arch.guest_table))
323             put_page(pagetable_get_page(v->arch.guest_table));
324         v->arch.guest_table = pagetable_null();
325         /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
326     }
327     rc = hvm_set_cr3(n1vmcb->_cr3, 1);
328     if ( rc == X86EMUL_EXCEPTION )
329         hvm_inject_hw_exception(TRAP_gp_fault, 0);
330     if (rc != X86EMUL_OKAY)
331         gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
332 
333     regs->rax = n1vmcb->rax;
334     regs->rsp = n1vmcb->rsp;
335     regs->rip = n1vmcb->rip;
336     regs->rflags = n1vmcb->rflags;
337     n1vmcb->_dr7 = 0; /* disable all breakpoints */
338     n1vmcb->_cpl = 0;
339 
340     /* Clear exitintinfo to prevent a fault loop of re-injecting
341      * exceptions forever.
342      */
343     n1vmcb->exitintinfo.bytes = 0;
344 
345     /* Cleanbits */
346     n1vmcb->cleanbits.bytes = 0;
347 
348     return 0;
349 }
350 
nsvm_vmrun_permissionmap(struct vcpu * v,bool_t viopm)351 static int nsvm_vmrun_permissionmap(struct vcpu *v, bool_t viopm)
352 {
353     struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
354     struct nestedsvm *svm = &vcpu_nestedsvm(v);
355     struct nestedvcpu *nv = &vcpu_nestedhvm(v);
356     struct vmcb_struct *ns_vmcb = nv->nv_vvmcx;
357     struct vmcb_struct *host_vmcb = arch_svm->vmcb;
358     unsigned long *ns_msrpm_ptr;
359     unsigned int i;
360     enum hvm_translation_result ret;
361     unsigned long *ns_viomap;
362     bool_t ioport_80 = 1, ioport_ed = 1;
363 
364     ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm;
365 
366     ret = hvm_copy_from_guest_phys(svm->ns_cached_msrpm,
367                                    ns_vmcb->_msrpm_base_pa, MSRPM_SIZE);
368     if ( ret != HVMTRANS_okay )
369     {
370         gdprintk(XENLOG_ERR, "hvm_copy_from_guest_phys msrpm %u\n", ret);
371         return 1;
372     }
373 
374     /* Check l1 guest io permission map and get a shadow one based on
375      * if l1 guest intercepts io ports 0x80 and/or 0xED.
376      */
377     svm->ns_oiomap_pa = svm->ns_iomap_pa;
378     svm->ns_iomap_pa = ns_vmcb->_iopm_base_pa;
379 
380     ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT, 0);
381     if ( ns_viomap )
382     {
383         ioport_80 = test_bit(0x80, ns_viomap);
384         ioport_ed = test_bit(0xed, ns_viomap);
385         hvm_unmap_guest_frame(ns_viomap, 0);
386     }
387 
388     svm->ns_iomap = nestedhvm_vcpu_iomap_get(ioport_80, ioport_ed);
389 
390     nv->nv_ioport80 = ioport_80;
391     nv->nv_ioportED = ioport_ed;
392 
393     /* v->arch.hvm_svm.msrpm has type unsigned long, thus
394      * BYTES_PER_LONG.
395      */
396     for (i = 0; i < MSRPM_SIZE / BYTES_PER_LONG; i++)
397         svm->ns_merged_msrpm[i] = arch_svm->msrpm[i] | ns_msrpm_ptr[i];
398 
399     host_vmcb->_iopm_base_pa =
400         (uint64_t)virt_to_maddr(svm->ns_iomap);
401     host_vmcb->_msrpm_base_pa =
402         (uint64_t)virt_to_maddr(svm->ns_merged_msrpm);
403 
404     return 0;
405 }
406 
nestedsvm_vmcb_set_nestedp2m(struct vcpu * v,struct vmcb_struct * vvmcb,struct vmcb_struct * n2vmcb)407 static void nestedsvm_vmcb_set_nestedp2m(struct vcpu *v,
408     struct vmcb_struct *vvmcb, struct vmcb_struct *n2vmcb)
409 {
410     struct p2m_domain *p2m;
411 
412     ASSERT(v != NULL);
413     ASSERT(vvmcb != NULL);
414     ASSERT(n2vmcb != NULL);
415 
416     /* This will allow nsvm_vcpu_hostcr3() to return correct np2m_base */
417     vcpu_nestedsvm(v).ns_vmcb_hostcr3 = vvmcb->_h_cr3;
418 
419     p2m = p2m_get_nestedp2m(v);
420     n2vmcb->_h_cr3 = pagetable_get_paddr(p2m_get_pagetable(p2m));
421 }
422 
nsvm_vmcb_prepare4vmrun(struct vcpu * v,struct cpu_user_regs * regs)423 static int nsvm_vmcb_prepare4vmrun(struct vcpu *v, struct cpu_user_regs *regs)
424 {
425     struct nestedvcpu *nv = &vcpu_nestedhvm(v);
426     struct nestedsvm *svm = &vcpu_nestedsvm(v);
427     struct vmcb_struct *ns_vmcb, *n1vmcb, *n2vmcb;
428     bool_t vcleanbits_valid;
429     int rc;
430     uint64_t cr0;
431 
432     ns_vmcb = nv->nv_vvmcx;
433     n1vmcb = nv->nv_n1vmcx;
434     n2vmcb = nv->nv_n2vmcx;
435     ASSERT(ns_vmcb != NULL);
436     ASSERT(n1vmcb != NULL);
437     ASSERT(n2vmcb != NULL);
438 
439     /* Check if virtual VMCB cleanbits are valid */
440     vcleanbits_valid = 1;
441     if ( svm->ns_ovvmcb_pa == INVALID_PADDR )
442         vcleanbits_valid = 0;
443     if (svm->ns_ovvmcb_pa != nv->nv_vvmcxaddr)
444         vcleanbits_valid = 0;
445 
446 #define vcleanbit_set(_name)	\
447     (vcleanbits_valid && ns_vmcb->cleanbits.fields._name)
448 
449     /* Enable l2 guest intercepts */
450     if (!vcleanbit_set(intercepts)) {
451         svm->ns_cr_intercepts = ns_vmcb->_cr_intercepts;
452         svm->ns_dr_intercepts = ns_vmcb->_dr_intercepts;
453         svm->ns_exception_intercepts = ns_vmcb->_exception_intercepts;
454         svm->ns_general1_intercepts = ns_vmcb->_general1_intercepts;
455         svm->ns_general2_intercepts = ns_vmcb->_general2_intercepts;
456     }
457 
458     /* We could track the cleanbits of the n1vmcb from
459      * last emulated #VMEXIT to this emulated VMRUN to safe the merges
460      * below. Those cleanbits would be tracked in an integer field
461      * in struct nestedsvm.
462      * But this effort is not worth doing because:
463      * - Only the intercepts bit of the n1vmcb can effectively be used here
464      * - The CPU runs more instructions for the tracking than can be
465      *   safed here.
466      * The overhead comes from (ordered from highest to lowest):
467      * - svm_ctxt_switch_to (CPU context switching)
468      * - svm_fpu_enter, svm_fpu_leave (lazy FPU switching)
469      * - emulated CLGI (clears VINTR intercept)
470      * - host clears VINTR intercept
471      * Test results show that the overhead is high enough that the
472      * tracked intercepts bit of the n1vmcb is practically *always* cleared.
473      */
474 
475     n2vmcb->_cr_intercepts =
476         n1vmcb->_cr_intercepts | ns_vmcb->_cr_intercepts;
477     n2vmcb->_dr_intercepts =
478         n1vmcb->_dr_intercepts | ns_vmcb->_dr_intercepts;
479     n2vmcb->_exception_intercepts =
480         n1vmcb->_exception_intercepts | ns_vmcb->_exception_intercepts;
481     n2vmcb->_general1_intercepts =
482         n1vmcb->_general1_intercepts | ns_vmcb->_general1_intercepts;
483     n2vmcb->_general2_intercepts =
484         n1vmcb->_general2_intercepts | ns_vmcb->_general2_intercepts;
485 
486     /* Nested Pause Filter */
487     if (ns_vmcb->_general1_intercepts & GENERAL1_INTERCEPT_PAUSE)
488         n2vmcb->_pause_filter_count =
489             min(n1vmcb->_pause_filter_count, ns_vmcb->_pause_filter_count);
490     else
491         n2vmcb->_pause_filter_count = n1vmcb->_pause_filter_count;
492 
493     /* TSC offset */
494     n2vmcb->_tsc_offset = n1vmcb->_tsc_offset + ns_vmcb->_tsc_offset;
495 
496     /* Nested IO permission bitmaps */
497     rc = nsvm_vmrun_permissionmap(v, vcleanbit_set(iopm));
498     if (rc)
499         return rc;
500 
501     /* ASID - Emulation handled in hvm_asid_handle_vmenter() */
502 
503     /* TLB control */
504     n2vmcb->tlb_control = ns_vmcb->tlb_control;
505 
506     /* Virtual Interrupts */
507     if (!vcleanbit_set(tpr)) {
508         n2vmcb->_vintr = ns_vmcb->_vintr;
509         n2vmcb->_vintr.fields.intr_masking = 1;
510     }
511 
512     /* Shadow Mode */
513     n2vmcb->interrupt_shadow = ns_vmcb->interrupt_shadow;
514 
515     /* Exit codes */
516     n2vmcb->exitcode = ns_vmcb->exitcode;
517     n2vmcb->exitinfo1 = ns_vmcb->exitinfo1;
518     n2vmcb->exitinfo2 = ns_vmcb->exitinfo2;
519     n2vmcb->exitintinfo = ns_vmcb->exitintinfo;
520 
521     /* Pending Interrupts */
522     n2vmcb->eventinj = ns_vmcb->eventinj;
523 
524     /* LBR virtualization */
525     if (!vcleanbit_set(lbr)) {
526         svm->ns_lbr_control = ns_vmcb->lbr_control;
527     }
528     n2vmcb->lbr_control.bytes =
529         n1vmcb->lbr_control.bytes | ns_vmcb->lbr_control.bytes;
530 
531     /* NextRIP - only evaluated on #VMEXIT. */
532 
533     /*
534      * VMCB Save State Area
535      */
536 
537     /* Segments */
538     if (!vcleanbit_set(seg)) {
539         n2vmcb->es = ns_vmcb->es;
540         n2vmcb->cs = ns_vmcb->cs;
541         n2vmcb->ss = ns_vmcb->ss;
542         n2vmcb->ds = ns_vmcb->ds;
543         /* CPL */
544         n2vmcb->_cpl = ns_vmcb->_cpl;
545     }
546     if (!vcleanbit_set(dt)) {
547         n2vmcb->gdtr = ns_vmcb->gdtr;
548         n2vmcb->idtr = ns_vmcb->idtr;
549     }
550 
551     /* EFER */
552     v->arch.hvm_vcpu.guest_efer = ns_vmcb->_efer;
553     rc = hvm_set_efer(ns_vmcb->_efer);
554     if ( rc == X86EMUL_EXCEPTION )
555         hvm_inject_hw_exception(TRAP_gp_fault, 0);
556     if (rc != X86EMUL_OKAY)
557         gdprintk(XENLOG_ERR, "hvm_set_efer failed, rc: %u\n", rc);
558 
559     /* CR4 */
560     v->arch.hvm_vcpu.guest_cr[4] = ns_vmcb->_cr4;
561     rc = hvm_set_cr4(ns_vmcb->_cr4, 1);
562     if ( rc == X86EMUL_EXCEPTION )
563         hvm_inject_hw_exception(TRAP_gp_fault, 0);
564     if (rc != X86EMUL_OKAY)
565         gdprintk(XENLOG_ERR, "hvm_set_cr4 failed, rc: %u\n", rc);
566 
567     /* CR0 */
568     svm->ns_cr0 = v->arch.hvm_vcpu.guest_cr[0];
569     cr0 = nestedsvm_fpu_vmentry(svm->ns_cr0, ns_vmcb, n1vmcb, n2vmcb);
570     v->arch.hvm_vcpu.guest_cr[0] = ns_vmcb->_cr0;
571     rc = hvm_set_cr0(cr0, 1);
572     if ( rc == X86EMUL_EXCEPTION )
573         hvm_inject_hw_exception(TRAP_gp_fault, 0);
574     if (rc != X86EMUL_OKAY)
575         gdprintk(XENLOG_ERR, "hvm_set_cr0 failed, rc: %u\n", rc);
576 
577     /* CR2 */
578     v->arch.hvm_vcpu.guest_cr[2] = ns_vmcb->_cr2;
579     hvm_update_guest_cr(v, 2);
580 
581     /* Nested paging mode */
582     if (nestedhvm_paging_mode_hap(v)) {
583         /* host nested paging + guest nested paging. */
584         n2vmcb->_np_enable = 1;
585 
586         nestedsvm_vmcb_set_nestedp2m(v, ns_vmcb, n2vmcb);
587 
588         /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
589         rc = hvm_set_cr3(ns_vmcb->_cr3, 1);
590         if ( rc == X86EMUL_EXCEPTION )
591             hvm_inject_hw_exception(TRAP_gp_fault, 0);
592         if (rc != X86EMUL_OKAY)
593             gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
594     } else if (paging_mode_hap(v->domain)) {
595         /* host nested paging + guest shadow paging. */
596         n2vmcb->_np_enable = 1;
597         /* Keep h_cr3 as it is. */
598         n2vmcb->_h_cr3 = n1vmcb->_h_cr3;
599         /* When l1 guest does shadow paging
600          * we assume it intercepts page faults.
601          */
602         /* hvm_set_cr3() below sets v->arch.hvm_vcpu.guest_cr[3] for us. */
603         rc = hvm_set_cr3(ns_vmcb->_cr3, 1);
604         if ( rc == X86EMUL_EXCEPTION )
605             hvm_inject_hw_exception(TRAP_gp_fault, 0);
606         if (rc != X86EMUL_OKAY)
607             gdprintk(XENLOG_ERR, "hvm_set_cr3 failed, rc: %u\n", rc);
608     } else {
609         /* host shadow paging + guest shadow paging. */
610         n2vmcb->_np_enable = 0;
611         n2vmcb->_h_cr3 = 0x0;
612 
613         /* TODO: Once shadow-shadow paging is in place come back to here
614          * and set host_vmcb->_cr3 to the shadowed shadow table.
615          */
616     }
617 
618     /* DRn */
619     if (!vcleanbit_set(dr)) {
620         n2vmcb->_dr7 = ns_vmcb->_dr7;
621         n2vmcb->_dr6 = ns_vmcb->_dr6;
622     }
623 
624     /* RFLAGS */
625     n2vmcb->rflags = ns_vmcb->rflags;
626 
627     /* RIP */
628     n2vmcb->rip = ns_vmcb->rip;
629 
630     /* RSP */
631     n2vmcb->rsp = ns_vmcb->rsp;
632 
633     /* RAX */
634     n2vmcb->rax = ns_vmcb->rax;
635 
636     /* Keep the host values of the fs, gs, ldtr, tr, kerngsbase,
637      * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp,
638      * sysenter_eip. These are handled via VMSAVE/VMLOAD emulation.
639      */
640 
641     /* Page tables */
642     n2vmcb->pdpe0 = ns_vmcb->pdpe0;
643     n2vmcb->pdpe1 = ns_vmcb->pdpe1;
644     n2vmcb->pdpe2 = ns_vmcb->pdpe2;
645     n2vmcb->pdpe3 = ns_vmcb->pdpe3;
646 
647     /* PAT */
648     if (!vcleanbit_set(np)) {
649         n2vmcb->_g_pat = ns_vmcb->_g_pat;
650     }
651 
652     if (!vcleanbit_set(lbr)) {
653         /* Debug Control MSR */
654         n2vmcb->_debugctlmsr = ns_vmcb->_debugctlmsr;
655 
656         /* LBR MSRs */
657         n2vmcb->_lastbranchfromip = ns_vmcb->_lastbranchfromip;
658         n2vmcb->_lastbranchtoip = ns_vmcb->_lastbranchtoip;
659         n2vmcb->_lastintfromip = ns_vmcb->_lastintfromip;
660         n2vmcb->_lastinttoip = ns_vmcb->_lastinttoip;
661     }
662 
663     /* Cleanbits */
664     n2vmcb->cleanbits.bytes = 0;
665 
666     rc = svm_vmcb_isvalid(__func__, ns_vmcb, v, true);
667     if (rc) {
668         gdprintk(XENLOG_ERR, "virtual vmcb invalid\n");
669         return NSVM_ERROR_VVMCB;
670     }
671 
672     rc = svm_vmcb_isvalid(__func__, n2vmcb, v, true);
673     if (rc) {
674         gdprintk(XENLOG_ERR, "n2vmcb invalid\n");
675         return NSVM_ERROR_VMENTRY;
676     }
677 
678     /* Switch guest registers to l2 guest */
679     regs->rax = ns_vmcb->rax;
680     regs->rip = ns_vmcb->rip;
681     regs->rsp = ns_vmcb->rsp;
682     regs->rflags = ns_vmcb->rflags;
683 
684 #undef vcleanbit_set
685     return 0;
686 }
687 
688 static int
nsvm_vcpu_vmentry(struct vcpu * v,struct cpu_user_regs * regs,unsigned int inst_len)689 nsvm_vcpu_vmentry(struct vcpu *v, struct cpu_user_regs *regs,
690     unsigned int inst_len)
691 {
692     int ret;
693     struct nestedvcpu *nv = &vcpu_nestedhvm(v);
694     struct nestedsvm *svm = &vcpu_nestedsvm(v);
695     struct vmcb_struct *ns_vmcb;
696 
697     ns_vmcb = nv->nv_vvmcx;
698     ASSERT(ns_vmcb != NULL);
699     ASSERT(nv->nv_n2vmcx != NULL);
700     ASSERT(nv->nv_n2vmcx_pa != INVALID_PADDR);
701 
702     /* Save values for later use. Needed for Nested-on-Nested and
703      * Shadow-on-Shadow paging.
704      */
705     svm->ns_vmcb_guestcr3 = ns_vmcb->_cr3;
706     svm->ns_vmcb_hostcr3 = ns_vmcb->_h_cr3;
707 
708     /* Convert explicitely to boolean. Deals with l1 guests
709      * that use flush-by-asid w/o checking the cpuid bits */
710     nv->nv_flushp2m = !!ns_vmcb->tlb_control;
711     if ( svm->ns_guest_asid != ns_vmcb->_guest_asid )
712     {
713         nv->nv_flushp2m = 1;
714         hvm_asid_flush_vcpu_asid(&vcpu_nestedhvm(v).nv_n2asid);
715         svm->ns_guest_asid = ns_vmcb->_guest_asid;
716     }
717 
718     /* nested paging for the guest */
719     svm->ns_hap_enabled = (ns_vmcb->_np_enable) ? 1 : 0;
720 
721     /* Remember the V_INTR_MASK in hostflags */
722     svm->ns_hostflags.fields.vintrmask =
723         (ns_vmcb->_vintr.fields.intr_masking) ? 1 : 0;
724 
725     /* Save l1 guest state (= host state) */
726     ret = nsvm_vcpu_hostsave(v, inst_len);
727     if (ret) {
728         gdprintk(XENLOG_ERR, "hostsave failed, ret = %i\n", ret);
729         return ret;
730     }
731 
732     /* switch vmcb to shadow vmcb */
733     v->arch.hvm_svm.vmcb = nv->nv_n2vmcx;
734     v->arch.hvm_svm.vmcb_pa = nv->nv_n2vmcx_pa;
735 
736     ret = nsvm_vmcb_prepare4vmrun(v, regs);
737     if (ret) {
738         gdprintk(XENLOG_ERR, "prepare4vmrun failed, ret = %i\n", ret);
739         return ret;
740     }
741 
742     nestedsvm_vcpu_stgi(v);
743     return 0;
744 }
745 
746 int
nsvm_vcpu_vmrun(struct vcpu * v,struct cpu_user_regs * regs)747 nsvm_vcpu_vmrun(struct vcpu *v, struct cpu_user_regs *regs)
748 {
749     int ret;
750     unsigned int inst_len;
751     struct nestedvcpu *nv = &vcpu_nestedhvm(v);
752     struct nestedsvm *svm = &vcpu_nestedsvm(v);
753 
754     inst_len = __get_instruction_length(v, INSTR_VMRUN);
755     if (inst_len == 0) {
756         svm->ns_vmexit.exitcode = VMEXIT_SHUTDOWN;
757         return -1;
758     }
759 
760     nv->nv_vmswitch_in_progress = 1;
761     ASSERT(nv->nv_vvmcx != NULL);
762 
763     /* save host state */
764     ret = nsvm_vcpu_vmentry(v, regs, inst_len);
765 
766     /* Switch vcpu to guest mode. In the error case
767      * this ensures the host mode is restored correctly
768      * and l1 guest keeps alive. */
769     nestedhvm_vcpu_enter_guestmode(v);
770 
771     switch (ret) {
772     case 0:
773         break;
774     case NSVM_ERROR_VVMCB:
775         gdprintk(XENLOG_ERR, "inject VMEXIT(INVALID)\n");
776         svm->ns_vmexit.exitcode = VMEXIT_INVALID;
777         return -1;
778     case NSVM_ERROR_VMENTRY:
779     default:
780         gdprintk(XENLOG_ERR,
781             "nsvm_vcpu_vmentry failed, injecting #UD\n");
782         hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC);
783         /* Must happen after hvm_inject_hw_exception or it doesn't work right. */
784         nv->nv_vmswitch_in_progress = 0;
785         return 1;
786     }
787 
788     /* If l1 guest uses shadow paging, update the paging mode. */
789     if (!nestedhvm_paging_mode_hap(v))
790         paging_update_paging_modes(v);
791 
792     nv->nv_vmswitch_in_progress = 0;
793     return 0;
794 }
795 
796 static int
nsvm_vcpu_vmexit_inject(struct vcpu * v,struct cpu_user_regs * regs,uint64_t exitcode)797 nsvm_vcpu_vmexit_inject(struct vcpu *v, struct cpu_user_regs *regs,
798     uint64_t exitcode)
799 {
800     struct nestedvcpu *nv = &vcpu_nestedhvm(v);
801     struct nestedsvm *svm = &vcpu_nestedsvm(v);
802     struct vmcb_struct *ns_vmcb;
803 
804     ASSERT(svm->ns_gif == 0);
805     ns_vmcb = nv->nv_vvmcx;
806 
807     if (nv->nv_vmexit_pending) {
808 
809         switch (exitcode) {
810         case VMEXIT_INTR:
811             if ( unlikely(ns_vmcb->eventinj.fields.v)
812                 && nv->nv_vmentry_pending
813                 && hvm_event_needs_reinjection(ns_vmcb->eventinj.fields.type,
814                     ns_vmcb->eventinj.fields.vector) )
815             {
816                 ns_vmcb->exitintinfo.bytes = ns_vmcb->eventinj.bytes;
817             }
818             break;
819         case VMEXIT_EXCEPTION_PF:
820             ns_vmcb->_cr2 = ns_vmcb->exitinfo2;
821             /* fall through */
822         case VMEXIT_NPF:
823             /* PF error code */
824             ns_vmcb->exitinfo1 = svm->ns_vmexit.exitinfo1;
825             /* fault address */
826             ns_vmcb->exitinfo2 = svm->ns_vmexit.exitinfo2;
827             break;
828         case VMEXIT_EXCEPTION_NP:
829         case VMEXIT_EXCEPTION_SS:
830         case VMEXIT_EXCEPTION_GP:
831         case VMEXIT_EXCEPTION_15:
832         case VMEXIT_EXCEPTION_MF:
833         case VMEXIT_EXCEPTION_AC:
834             ns_vmcb->exitinfo1 = svm->ns_vmexit.exitinfo1;
835             break;
836         default:
837             break;
838         }
839     }
840 
841     ns_vmcb->exitcode = exitcode;
842     ns_vmcb->eventinj.bytes = 0;
843     return 0;
844 }
845 
846 int
nsvm_vcpu_vmexit_event(struct vcpu * v,const struct x86_event * trap)847 nsvm_vcpu_vmexit_event(struct vcpu *v, const struct x86_event *trap)
848 {
849     ASSERT(vcpu_nestedhvm(v).nv_vvmcx != NULL);
850 
851     nestedsvm_vmexit_defer(v, VMEXIT_EXCEPTION_DE + trap->vector,
852                            trap->error_code, trap->cr2);
853     return NESTEDHVM_VMEXIT_DONE;
854 }
855 
nsvm_vcpu_hostcr3(struct vcpu * v)856 uint64_t nsvm_vcpu_hostcr3(struct vcpu *v)
857 {
858     return vcpu_nestedsvm(v).ns_vmcb_hostcr3;
859 }
860 
861 static int
nsvm_vmcb_guest_intercepts_msr(unsigned long * msr_bitmap,uint32_t msr,bool_t write)862 nsvm_vmcb_guest_intercepts_msr(unsigned long *msr_bitmap,
863     uint32_t msr, bool_t write)
864 {
865     bool_t enabled;
866     unsigned long *msr_bit;
867 
868     msr_bit = svm_msrbit(msr_bitmap, msr);
869 
870     if (msr_bit == NULL)
871         /* MSR not in the permission map: Let the guest handle it. */
872         return NESTEDHVM_VMEXIT_INJECT;
873 
874     msr &= 0x1fff;
875 
876     if (write)
877         /* write access */
878         enabled = test_bit(msr * 2 + 1, msr_bit);
879     else
880         /* read access */
881         enabled = test_bit(msr * 2, msr_bit);
882 
883     if (!enabled)
884         return NESTEDHVM_VMEXIT_HOST;
885 
886     return NESTEDHVM_VMEXIT_INJECT;
887 }
888 
889 static int
nsvm_vmcb_guest_intercepts_ioio(paddr_t iopm_pa,uint64_t exitinfo1)890 nsvm_vmcb_guest_intercepts_ioio(paddr_t iopm_pa, uint64_t exitinfo1)
891 {
892     unsigned long gfn = iopm_pa >> PAGE_SHIFT;
893     unsigned long *io_bitmap;
894     ioio_info_t ioinfo;
895     uint16_t port;
896     unsigned int size;
897     bool_t enabled;
898 
899     ioinfo.bytes = exitinfo1;
900     port = ioinfo.fields.port;
901     size = ioinfo.fields.sz32 ? 4 : ioinfo.fields.sz16 ? 2 : 1;
902 
903     switch ( port )
904     {
905     case 0 ... 8 * PAGE_SIZE - 1: /* first 4KB page */
906         break;
907     case 8 * PAGE_SIZE ... 2 * 8 * PAGE_SIZE - 1: /* second 4KB page */
908         port -= 8 * PAGE_SIZE;
909         ++gfn;
910         break;
911     default:
912         BUG();
913         break;
914     }
915 
916     for ( io_bitmap = hvm_map_guest_frame_ro(gfn, 0); ; )
917     {
918         enabled = io_bitmap && test_bit(port, io_bitmap);
919         if ( !enabled || !--size )
920             break;
921         if ( unlikely(++port == 8 * PAGE_SIZE) )
922         {
923             hvm_unmap_guest_frame(io_bitmap, 0);
924             io_bitmap = hvm_map_guest_frame_ro(++gfn, 0);
925             port -= 8 * PAGE_SIZE;
926         }
927     }
928     hvm_unmap_guest_frame(io_bitmap, 0);
929 
930     if ( !enabled )
931         return NESTEDHVM_VMEXIT_HOST;
932 
933     return NESTEDHVM_VMEXIT_INJECT;
934 }
935 
936 static bool_t
nsvm_vmcb_guest_intercepts_exitcode(struct vcpu * v,struct cpu_user_regs * regs,uint64_t exitcode)937 nsvm_vmcb_guest_intercepts_exitcode(struct vcpu *v,
938     struct cpu_user_regs *regs, uint64_t exitcode)
939 {
940     uint64_t exit_bits;
941     struct nestedvcpu *nv = &vcpu_nestedhvm(v);
942     struct nestedsvm *svm = &vcpu_nestedsvm(v);
943     struct vmcb_struct *ns_vmcb = nv->nv_vvmcx;
944     enum nestedhvm_vmexits vmexits;
945 
946     switch (exitcode) {
947     case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
948     case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
949         exit_bits = 1ULL << (exitcode - VMEXIT_CR0_READ);
950         if (svm->ns_cr_intercepts & exit_bits)
951             break;
952         return 0;
953 
954     case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
955     case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
956         exit_bits = 1ULL << (exitcode - VMEXIT_DR0_READ);
957         if (svm->ns_dr_intercepts & exit_bits)
958             break;
959         return 0;
960 
961     case VMEXIT_EXCEPTION_DE ... VMEXIT_EXCEPTION_XF:
962         exit_bits = 1ULL << (exitcode - VMEXIT_EXCEPTION_DE);
963         if (svm->ns_exception_intercepts & exit_bits)
964             break;
965         return 0;
966 
967     case VMEXIT_INTR ... VMEXIT_SHUTDOWN:
968         exit_bits = 1ULL << (exitcode - VMEXIT_INTR);
969         if (svm->ns_general1_intercepts & exit_bits)
970             break;
971         return 0;
972 
973     case VMEXIT_VMRUN ... VMEXIT_XSETBV:
974         exit_bits = 1ULL << (exitcode - VMEXIT_VMRUN);
975         if (svm->ns_general2_intercepts & exit_bits)
976             break;
977         return 0;
978 
979     case VMEXIT_NPF:
980         if (nestedhvm_paging_mode_hap(v))
981             break;
982         return 0;
983     case VMEXIT_INVALID:
984         /* Always intercepted */
985         break;
986 
987     default:
988         gdprintk(XENLOG_ERR, "Illegal exitcode %#"PRIx64"\n", exitcode);
989         BUG();
990         break;
991     }
992 
993     /* Special cases: Do more detailed checks */
994     switch (exitcode) {
995     case VMEXIT_MSR:
996         ASSERT(regs != NULL);
997         if ( !nestedsvm_vmcb_map(v, nv->nv_vvmcxaddr) )
998             break;
999         ns_vmcb = nv->nv_vvmcx;
1000         vmexits = nsvm_vmcb_guest_intercepts_msr(svm->ns_cached_msrpm,
1001             regs->ecx, ns_vmcb->exitinfo1 != 0);
1002         if (vmexits == NESTEDHVM_VMEXIT_HOST)
1003             return 0;
1004         break;
1005     case VMEXIT_IOIO:
1006         if ( !nestedsvm_vmcb_map(v, nv->nv_vvmcxaddr) )
1007             break;
1008         ns_vmcb = nv->nv_vvmcx;
1009         vmexits = nsvm_vmcb_guest_intercepts_ioio(ns_vmcb->_iopm_base_pa,
1010             ns_vmcb->exitinfo1);
1011         if (vmexits == NESTEDHVM_VMEXIT_HOST)
1012             return 0;
1013         break;
1014     }
1015 
1016     return 1;
1017 }
1018 
1019 bool_t
nsvm_vmcb_guest_intercepts_event(struct vcpu * v,unsigned int vector,int errcode)1020 nsvm_vmcb_guest_intercepts_event(
1021     struct vcpu *v, unsigned int vector, int errcode)
1022 {
1023     return nsvm_vmcb_guest_intercepts_exitcode(v,
1024         guest_cpu_user_regs(), VMEXIT_EXCEPTION_DE + vector);
1025 }
1026 
1027 static int
nsvm_vmcb_prepare4vmexit(struct vcpu * v,struct cpu_user_regs * regs)1028 nsvm_vmcb_prepare4vmexit(struct vcpu *v, struct cpu_user_regs *regs)
1029 {
1030     struct nestedvcpu *nv = &vcpu_nestedhvm(v);
1031     struct nestedsvm *svm = &vcpu_nestedsvm(v);
1032     struct vmcb_struct *ns_vmcb = nv->nv_vvmcx;
1033     struct vmcb_struct *n2vmcb = nv->nv_n2vmcx;
1034 
1035     svm_vmsave(nv->nv_n1vmcx);
1036 
1037     /* Cache guest physical address of virtual vmcb
1038      * for VMCB Cleanbit emulation.
1039      */
1040     svm->ns_ovvmcb_pa = nv->nv_vvmcxaddr;
1041 
1042     /* Intercepts - keep them as they are */
1043 
1044     /* Pausefilter - keep it as is */
1045 
1046     /* Nested IO permission bitmap */
1047     /* Just keep the iopm_base_pa and msrpm_base_pa values.
1048      * The guest must not see the virtualized values.
1049      */
1050 
1051     /* TSC offset */
1052     /* Keep it. It's maintainted by the l1 guest. */
1053 
1054     /* ASID */
1055     /* ns_vmcb->_guest_asid = n2vmcb->_guest_asid; */
1056 
1057     /* TLB control */
1058     ns_vmcb->tlb_control = 0;
1059 
1060     /* Virtual Interrupts */
1061     ns_vmcb->_vintr = n2vmcb->_vintr;
1062     if (!(svm->ns_hostflags.fields.vintrmask))
1063         ns_vmcb->_vintr.fields.intr_masking = 0;
1064 
1065     /* Shadow mode */
1066     ns_vmcb->interrupt_shadow = n2vmcb->interrupt_shadow;
1067 
1068     /* Exit codes */
1069     ns_vmcb->exitcode = n2vmcb->exitcode;
1070     ns_vmcb->exitinfo1 = n2vmcb->exitinfo1;
1071     ns_vmcb->exitinfo2 = n2vmcb->exitinfo2;
1072     ns_vmcb->exitintinfo = n2vmcb->exitintinfo;
1073 
1074     /* Interrupts */
1075     /* If we emulate a VMRUN/#VMEXIT in the same host #VMEXIT cycle we have
1076      * to make sure that we do not lose injected events. So check eventinj
1077      * here and copy it to exitintinfo if it is valid.
1078      * exitintinfo and eventinj can't be both valid because the case below
1079      * only happens on a VMRUN instruction intercept which has no valid
1080      * exitintinfo set.
1081      */
1082     if ( unlikely(n2vmcb->eventinj.fields.v) &&
1083          hvm_event_needs_reinjection(n2vmcb->eventinj.fields.type,
1084                                      n2vmcb->eventinj.fields.vector) )
1085     {
1086         ns_vmcb->exitintinfo = n2vmcb->eventinj;
1087     }
1088 
1089     ns_vmcb->eventinj.bytes = 0;
1090 
1091     /* Nested paging mode */
1092     if (nestedhvm_paging_mode_hap(v)) {
1093         /* host nested paging + guest nested paging. */
1094         ns_vmcb->_np_enable = n2vmcb->_np_enable;
1095         ns_vmcb->_cr3 = n2vmcb->_cr3;
1096         /* The vmcb->h_cr3 is the shadowed h_cr3. The original
1097          * unshadowed guest h_cr3 is kept in ns_vmcb->h_cr3,
1098          * hence we keep the ns_vmcb->h_cr3 value. */
1099     } else if (paging_mode_hap(v->domain)) {
1100         /* host nested paging + guest shadow paging. */
1101         ns_vmcb->_np_enable = 0;
1102         /* Throw h_cr3 away. Guest is not allowed to set it or
1103          * it can break out, otherwise (security hole!) */
1104         ns_vmcb->_h_cr3 = 0x0;
1105         /* Stop intercepting #PF (already done above
1106          * by restoring cached intercepts). */
1107         ns_vmcb->_cr3 = n2vmcb->_cr3;
1108     } else {
1109         /* host shadow paging + guest shadow paging. */
1110         ns_vmcb->_np_enable = 0;
1111         ns_vmcb->_h_cr3 = 0x0;
1112         /* The vmcb->_cr3 is the shadowed cr3. The original
1113          * unshadowed guest cr3 is kept in ns_vmcb->_cr3,
1114          * hence we keep the ns_vmcb->_cr3 value. */
1115     }
1116 
1117     /* LBR virtualization - keep lbr control as is */
1118 
1119     /* NextRIP */
1120     ns_vmcb->nextrip = n2vmcb->nextrip;
1121 
1122     /* Decode Assist */
1123     ns_vmcb->guest_ins_len = n2vmcb->guest_ins_len;
1124     memcpy(ns_vmcb->guest_ins, n2vmcb->guest_ins, sizeof(ns_vmcb->guest_ins));
1125 
1126     /*
1127      * VMCB Save State Area
1128      */
1129 
1130     /* Segments */
1131     ns_vmcb->es = n2vmcb->es;
1132     ns_vmcb->cs = n2vmcb->cs;
1133     ns_vmcb->ss = n2vmcb->ss;
1134     ns_vmcb->ds = n2vmcb->ds;
1135     ns_vmcb->gdtr = n2vmcb->gdtr;
1136     ns_vmcb->idtr = n2vmcb->idtr;
1137 
1138     /* CPL */
1139     ns_vmcb->_cpl = n2vmcb->_cpl;
1140 
1141     /* EFER */
1142     ns_vmcb->_efer = n2vmcb->_efer;
1143 
1144     /* CRn */
1145     ns_vmcb->_cr4 = n2vmcb->_cr4;
1146     ns_vmcb->_cr0 = n2vmcb->_cr0;
1147 
1148     /* DRn */
1149     ns_vmcb->_dr7 = n2vmcb->_dr7;
1150     ns_vmcb->_dr6 = n2vmcb->_dr6;
1151 
1152     /* Restore registers from regs as those values
1153      * can be newer than in n2vmcb (e.g. due to an
1154      * instruction emulation right before).
1155      */
1156 
1157     /* RFLAGS */
1158     ns_vmcb->rflags = n2vmcb->rflags = regs->rflags;
1159 
1160     /* RIP */
1161     ns_vmcb->rip = n2vmcb->rip = regs->rip;
1162 
1163     /* RSP */
1164     ns_vmcb->rsp = n2vmcb->rsp = regs->rsp;
1165 
1166     /* RAX */
1167     ns_vmcb->rax = n2vmcb->rax = regs->rax;
1168 
1169     /* Keep the l2 guest values of the fs, gs, ldtr, tr, kerngsbase,
1170      * star, lstar, cstar, sfmask, sysenter_cs, sysenter_esp,
1171      * sysenter_eip. These are handled via VMSAVE/VMLOAD emulation.
1172      */
1173 
1174     /* CR2 */
1175     ns_vmcb->_cr2 = n2vmcb->_cr2;
1176 
1177     /* Page tables */
1178     ns_vmcb->pdpe0 = n2vmcb->pdpe0;
1179     ns_vmcb->pdpe1 = n2vmcb->pdpe1;
1180     ns_vmcb->pdpe2 = n2vmcb->pdpe2;
1181     ns_vmcb->pdpe3 = n2vmcb->pdpe3;
1182 
1183     /* PAT */
1184     ns_vmcb->_g_pat = n2vmcb->_g_pat;
1185 
1186     /* Debug Control MSR */
1187     ns_vmcb->_debugctlmsr = n2vmcb->_debugctlmsr;
1188 
1189     /* LBR MSRs */
1190     ns_vmcb->_lastbranchfromip = n2vmcb->_lastbranchfromip;
1191     ns_vmcb->_lastbranchtoip = n2vmcb->_lastbranchtoip;
1192     ns_vmcb->_lastintfromip = n2vmcb->_lastintfromip;
1193     ns_vmcb->_lastinttoip = n2vmcb->_lastinttoip;
1194 
1195     return 0;
1196 }
1197 
1198 bool_t
nsvm_vmcb_hap_enabled(struct vcpu * v)1199 nsvm_vmcb_hap_enabled(struct vcpu *v)
1200 {
1201     return vcpu_nestedsvm(v).ns_hap_enabled;
1202 }
1203 
1204 /* This function uses L2_gpa to walk the P2M page table in L1. If the
1205  * walk is successful, the translated value is returned in
1206  * L1_gpa. The result value tells what to do next.
1207  */
1208 int
nsvm_hap_walk_L1_p2m(struct vcpu * v,paddr_t L2_gpa,paddr_t * L1_gpa,unsigned int * page_order,uint8_t * p2m_acc,bool_t access_r,bool_t access_w,bool_t access_x)1209 nsvm_hap_walk_L1_p2m(struct vcpu *v, paddr_t L2_gpa, paddr_t *L1_gpa,
1210                      unsigned int *page_order, uint8_t *p2m_acc,
1211                      bool_t access_r, bool_t access_w, bool_t access_x)
1212 {
1213     uint32_t pfec;
1214     unsigned long nested_cr3, gfn;
1215 
1216     nested_cr3 = nhvm_vcpu_p2m_base(v);
1217 
1218     pfec = PFEC_user_mode | PFEC_page_present;
1219     if ( access_w )
1220         pfec |= PFEC_write_access;
1221     if ( access_x )
1222         pfec |= PFEC_insn_fetch;
1223 
1224     /* Walk the guest-supplied NPT table, just as if it were a pagetable */
1225     gfn = paging_ga_to_gfn_cr3(v, nested_cr3, L2_gpa, &pfec, page_order);
1226 
1227     if ( gfn == gfn_x(INVALID_GFN) )
1228         return NESTEDHVM_PAGEFAULT_INJECT;
1229 
1230     *L1_gpa = (gfn << PAGE_SHIFT) + (L2_gpa & ~PAGE_MASK);
1231     return NESTEDHVM_PAGEFAULT_DONE;
1232 }
1233 
nsvm_intr_blocked(struct vcpu * v)1234 enum hvm_intblk nsvm_intr_blocked(struct vcpu *v)
1235 {
1236     struct nestedsvm *svm = &vcpu_nestedsvm(v);
1237     struct nestedvcpu *nv = &vcpu_nestedhvm(v);
1238 
1239     ASSERT(nestedhvm_enabled(v->domain));
1240 
1241     if ( !nestedsvm_gif_isset(v) )
1242         return hvm_intblk_svm_gif;
1243 
1244     if ( nestedhvm_vcpu_in_guestmode(v) ) {
1245         struct vmcb_struct *n2vmcb = nv->nv_n2vmcx;
1246 
1247         if ( svm->ns_hostflags.fields.vintrmask )
1248             if ( !svm->ns_hostflags.fields.rflagsif )
1249                 return hvm_intblk_rflags_ie;
1250 
1251         /* when l1 guest passes its devices through to the l2 guest
1252          * and l2 guest does an MMIO access then we may want to
1253          * inject an VMEXIT(#INTR) exitcode into the l1 guest.
1254          * Delay the injection because this would result in delivering
1255          * an interrupt *within* the execution of an instruction.
1256          */
1257         if ( v->arch.hvm_vcpu.hvm_io.io_req.state != STATE_IOREQ_NONE )
1258             return hvm_intblk_shadow;
1259 
1260         if ( !nv->nv_vmexit_pending && n2vmcb->exitintinfo.bytes != 0 ) {
1261             /* Give the l2 guest a chance to finish the delivery of
1262              * the last injected interrupt or exception before we
1263              * emulate a VMEXIT (e.g. VMEXIT(INTR) ).
1264              */
1265             return hvm_intblk_shadow;
1266         }
1267     }
1268 
1269     if ( nv->nv_vmexit_pending ) {
1270         /* hvm_inject_hw_exception() must have run before.
1271          * exceptions have higher priority than interrupts.
1272          */
1273         return hvm_intblk_rflags_ie;
1274     }
1275 
1276     return hvm_intblk_none;
1277 }
1278 
1279 /* MSR handling */
nsvm_rdmsr(struct vcpu * v,unsigned int msr,uint64_t * msr_content)1280 int nsvm_rdmsr(struct vcpu *v, unsigned int msr, uint64_t *msr_content)
1281 {
1282     struct nestedsvm *svm = &vcpu_nestedsvm(v);
1283     int ret = 1;
1284 
1285     *msr_content = 0;
1286 
1287     switch (msr) {
1288     case MSR_K8_VM_CR:
1289         break;
1290     case MSR_K8_VM_HSAVE_PA:
1291         *msr_content = svm->ns_msr_hsavepa;
1292         break;
1293     case MSR_AMD64_TSC_RATIO:
1294         *msr_content = svm->ns_tscratio;
1295         break;
1296     default:
1297         ret = 0;
1298         break;
1299     }
1300 
1301     return ret;
1302 }
1303 
nsvm_wrmsr(struct vcpu * v,unsigned int msr,uint64_t msr_content)1304 int nsvm_wrmsr(struct vcpu *v, unsigned int msr, uint64_t msr_content)
1305 {
1306     int ret = 1;
1307     struct nestedsvm *svm = &vcpu_nestedsvm(v);
1308 
1309     switch (msr) {
1310     case MSR_K8_VM_CR:
1311         /* ignore write. handle all bits as read-only. */
1312         break;
1313     case MSR_K8_VM_HSAVE_PA:
1314         if (!nestedsvm_vmcb_isvalid(v, msr_content)) {
1315             gdprintk(XENLOG_ERR,
1316                 "MSR_K8_VM_HSAVE_PA value invalid %#"PRIx64"\n", msr_content);
1317             ret = -1; /* inject #GP */
1318             break;
1319         }
1320         svm->ns_msr_hsavepa = msr_content;
1321         break;
1322     case MSR_AMD64_TSC_RATIO:
1323         if ((msr_content & ~TSC_RATIO_RSVD_BITS) != msr_content) {
1324             gdprintk(XENLOG_ERR,
1325                 "reserved bits set in MSR_AMD64_TSC_RATIO %#"PRIx64"\n",
1326                 msr_content);
1327             ret = -1; /* inject #GP */
1328             break;
1329         }
1330         svm->ns_tscratio = msr_content;
1331         break;
1332     default:
1333         ret = 0;
1334         break;
1335     }
1336 
1337     return ret;
1338 }
1339 
1340 /* VMEXIT emulation */
1341 void
nestedsvm_vmexit_defer(struct vcpu * v,uint64_t exitcode,uint64_t exitinfo1,uint64_t exitinfo2)1342 nestedsvm_vmexit_defer(struct vcpu *v,
1343     uint64_t exitcode, uint64_t exitinfo1, uint64_t exitinfo2)
1344 {
1345     struct nestedsvm *svm = &vcpu_nestedsvm(v);
1346 
1347     nestedsvm_vcpu_clgi(v);
1348     svm->ns_vmexit.exitcode = exitcode;
1349     svm->ns_vmexit.exitinfo1 = exitinfo1;
1350     svm->ns_vmexit.exitinfo2 = exitinfo2;
1351     vcpu_nestedhvm(v).nv_vmexit_pending = 1;
1352 }
1353 
1354 enum nestedhvm_vmexits
nestedsvm_check_intercepts(struct vcpu * v,struct cpu_user_regs * regs,uint64_t exitcode)1355 nestedsvm_check_intercepts(struct vcpu *v, struct cpu_user_regs *regs,
1356     uint64_t exitcode)
1357 {
1358     bool_t is_intercepted;
1359 
1360     ASSERT(vcpu_nestedhvm(v).nv_vmexit_pending == 0);
1361     is_intercepted = nsvm_vmcb_guest_intercepts_exitcode(v, regs, exitcode);
1362 
1363     switch (exitcode) {
1364     case VMEXIT_INVALID:
1365         if (is_intercepted)
1366             return NESTEDHVM_VMEXIT_INJECT;
1367         return NESTEDHVM_VMEXIT_HOST;
1368 
1369     case VMEXIT_INTR:
1370     case VMEXIT_NMI:
1371         return NESTEDHVM_VMEXIT_HOST;
1372     case VMEXIT_EXCEPTION_NM:
1373         /* Host must handle lazy fpu context switching first.
1374          * Then inject the VMEXIT if L1 guest intercepts this.
1375          */
1376         return NESTEDHVM_VMEXIT_HOST;
1377 
1378     case VMEXIT_NPF:
1379         if (nestedhvm_paging_mode_hap(v)) {
1380             if (!is_intercepted)
1381                 return NESTEDHVM_VMEXIT_FATALERROR;
1382             /* host nested paging + guest nested paging */
1383             return NESTEDHVM_VMEXIT_HOST;
1384         }
1385         if (paging_mode_hap(v->domain)) {
1386             if (is_intercepted)
1387                 return NESTEDHVM_VMEXIT_FATALERROR;
1388             /* host nested paging + guest shadow paging */
1389             return NESTEDHVM_VMEXIT_HOST;
1390         }
1391         /* host shadow paging + guest shadow paging */
1392         /* Can this happen? */
1393         BUG();
1394         return NESTEDHVM_VMEXIT_FATALERROR;
1395     case VMEXIT_EXCEPTION_PF:
1396         if (nestedhvm_paging_mode_hap(v)) {
1397             /* host nested paging + guest nested paging */
1398             if (!is_intercepted)
1399                 /* l1 guest intercepts #PF unnecessarily */
1400                 return NESTEDHVM_VMEXIT_HOST;
1401             /* l2 guest intercepts #PF unnecessarily */
1402             return NESTEDHVM_VMEXIT_INJECT;
1403         }
1404         if (!paging_mode_hap(v->domain)) {
1405             /* host shadow paging + guest shadow paging */
1406             return NESTEDHVM_VMEXIT_HOST;
1407         }
1408         /* host nested paging + guest shadow paging */
1409         return NESTEDHVM_VMEXIT_INJECT;
1410     case VMEXIT_VMMCALL:
1411         /* Always let the guest handle VMMCALL/VMCALL */
1412         return NESTEDHVM_VMEXIT_INJECT;
1413     default:
1414         gprintk(XENLOG_ERR, "Unexpected nested vmexit: reason %#"PRIx64"\n",
1415                 exitcode);
1416         break;
1417     }
1418 
1419     if (is_intercepted)
1420         return NESTEDHVM_VMEXIT_INJECT;
1421     return NESTEDHVM_VMEXIT_HOST;
1422 }
1423 
1424 enum nestedhvm_vmexits
nestedsvm_vmexit_n2n1(struct vcpu * v,struct cpu_user_regs * regs)1425 nestedsvm_vmexit_n2n1(struct vcpu *v, struct cpu_user_regs *regs)
1426 {
1427     int rc;
1428     enum nestedhvm_vmexits ret = NESTEDHVM_VMEXIT_DONE;
1429 
1430     ASSERT(vcpu_nestedhvm(v).nv_vmswitch_in_progress);
1431     ASSERT(nestedhvm_vcpu_in_guestmode(v));
1432 
1433     rc = nsvm_vmcb_prepare4vmexit(v, regs);
1434     if (rc)
1435         ret = NESTEDHVM_VMEXIT_ERROR;
1436 
1437     rc = nsvm_vcpu_hostrestore(v, regs);
1438     if (rc)
1439         ret = NESTEDHVM_VMEXIT_FATALERROR;
1440 
1441     nestedhvm_vcpu_exit_guestmode(v);
1442     return ret;
1443 }
1444 
1445 /* The exitcode is in native SVM/VMX format. The forced exitcode
1446  * is in generic format.
1447  */
1448 static enum nestedhvm_vmexits
nestedsvm_vcpu_vmexit(struct vcpu * v,struct cpu_user_regs * regs,uint64_t exitcode)1449 nestedsvm_vcpu_vmexit(struct vcpu *v, struct cpu_user_regs *regs,
1450     uint64_t exitcode)
1451 {
1452     int rc;
1453     struct nestedvcpu *nv = &vcpu_nestedhvm(v);
1454 
1455     nv->nv_vmswitch_in_progress = 1;
1456 
1457     ASSERT(nv->nv_vvmcx != NULL);
1458 
1459     /* On special intercepts the host has to handle
1460      * the vcpu is still in guest mode here.
1461      */
1462     if (nestedhvm_vcpu_in_guestmode(v)) {
1463         enum nestedhvm_vmexits ret;
1464 
1465         ret = nestedsvm_vmexit_n2n1(v, regs);
1466         switch (ret) {
1467         case NESTEDHVM_VMEXIT_FATALERROR:
1468             gdprintk(XENLOG_ERR, "VMEXIT: fatal error\n");
1469             return ret;
1470         case NESTEDHVM_VMEXIT_HOST:
1471             BUG();
1472             return ret;
1473         case NESTEDHVM_VMEXIT_ERROR:
1474             exitcode = VMEXIT_INVALID;
1475             break;
1476         default:
1477             ASSERT(!nestedhvm_vcpu_in_guestmode(v));
1478             break;
1479         }
1480 
1481         /* host state has been restored */
1482     }
1483 
1484     ASSERT(!nestedhvm_vcpu_in_guestmode(v));
1485 
1486     /* Prepare for running the l1 guest. Make the actual
1487      * modifications to the virtual VMCB/VMCS.
1488      */
1489     rc = nsvm_vcpu_vmexit_inject(v, regs, exitcode);
1490 
1491     /* If l1 guest uses shadow paging, update the paging mode. */
1492     if (!nestedhvm_paging_mode_hap(v))
1493         paging_update_paging_modes(v);
1494 
1495     nv->nv_vmswitch_in_progress = 0;
1496 
1497     if (rc)
1498         return NESTEDHVM_VMEXIT_FATALERROR;
1499 
1500     return NESTEDHVM_VMEXIT_DONE;
1501 }
1502 
1503 /* VCPU switch */
nsvm_vcpu_switch(struct cpu_user_regs * regs)1504 void nsvm_vcpu_switch(struct cpu_user_regs *regs)
1505 {
1506     struct vcpu *v = current;
1507     struct nestedvcpu *nv;
1508     struct nestedsvm *svm;
1509 
1510     if (!nestedhvm_enabled(v->domain))
1511         return;
1512 
1513     nv = &vcpu_nestedhvm(v);
1514     svm = &vcpu_nestedsvm(v);
1515     ASSERT(v->arch.hvm_svm.vmcb != NULL);
1516     ASSERT(nv->nv_n1vmcx != NULL);
1517     ASSERT(nv->nv_n2vmcx != NULL);
1518     ASSERT(nv->nv_n1vmcx_pa != INVALID_PADDR);
1519     ASSERT(nv->nv_n2vmcx_pa != INVALID_PADDR);
1520 
1521     if (nv->nv_vmexit_pending) {
1522  vmexit:
1523         nestedsvm_vcpu_vmexit(v, regs, svm->ns_vmexit.exitcode);
1524         nv->nv_vmexit_pending = 0;
1525         nv->nv_vmentry_pending = 0;
1526         return;
1527     }
1528     if (nv->nv_vmentry_pending) {
1529         int ret;
1530         ASSERT(!nv->nv_vmexit_pending);
1531         ret = nsvm_vcpu_vmrun(v, regs);
1532         if (ret)
1533             goto vmexit;
1534 
1535         ASSERT(nestedhvm_vcpu_in_guestmode(v));
1536         nv->nv_vmentry_pending = 0;
1537     }
1538 
1539     if (nestedhvm_vcpu_in_guestmode(v)
1540        && nestedhvm_paging_mode_hap(v))
1541     {
1542         /* In case left the l2 guest due to a physical interrupt (e.g. IPI)
1543          * that is not for the l1 guest then we continue running the l2 guest
1544          * but check if the nestedp2m is still valid.
1545          */
1546         if (nv->nv_p2m == NULL)
1547             nestedsvm_vmcb_set_nestedp2m(v, nv->nv_vvmcx, nv->nv_n2vmcx);
1548     }
1549 }
1550 
1551 /* Interrupts, Virtual GIF */
1552 int
nestedsvm_vcpu_interrupt(struct vcpu * v,const struct hvm_intack intack)1553 nestedsvm_vcpu_interrupt(struct vcpu *v, const struct hvm_intack intack)
1554 {
1555     int ret;
1556     enum hvm_intblk intr;
1557     uint64_t exitcode = VMEXIT_INTR;
1558     uint64_t exitinfo2 = 0;
1559     ASSERT(nestedhvm_vcpu_in_guestmode(v));
1560 
1561     intr = nhvm_interrupt_blocked(v);
1562     if ( intr != hvm_intblk_none )
1563         return NSVM_INTR_MASKED;
1564 
1565     switch (intack.source) {
1566     case hvm_intsrc_pic:
1567     case hvm_intsrc_lapic:
1568     case hvm_intsrc_vector:
1569         exitcode = VMEXIT_INTR;
1570         exitinfo2 = intack.vector;
1571         break;
1572     case hvm_intsrc_nmi:
1573         exitcode = VMEXIT_NMI;
1574         exitinfo2 = intack.vector;
1575         break;
1576     case hvm_intsrc_mce:
1577         exitcode = VMEXIT_EXCEPTION_MC;
1578         exitinfo2 = intack.vector;
1579         break;
1580     case hvm_intsrc_none:
1581         return NSVM_INTR_NOTHANDLED;
1582     default:
1583         BUG();
1584     }
1585 
1586     ret = nsvm_vmcb_guest_intercepts_exitcode(v,
1587                                      guest_cpu_user_regs(), exitcode);
1588     if (ret) {
1589         nestedsvm_vmexit_defer(v, exitcode, intack.source, exitinfo2);
1590         return NSVM_INTR_FORCEVMEXIT;
1591     }
1592 
1593     return NSVM_INTR_NOTINTERCEPTED;
1594 }
1595 
1596 bool_t
nestedsvm_gif_isset(struct vcpu * v)1597 nestedsvm_gif_isset(struct vcpu *v)
1598 {
1599     struct nestedsvm *svm = &vcpu_nestedsvm(v);
1600 
1601     return (!!svm->ns_gif);
1602 }
1603 
svm_vmexit_do_stgi(struct cpu_user_regs * regs,struct vcpu * v)1604 void svm_vmexit_do_stgi(struct cpu_user_regs *regs, struct vcpu *v)
1605 {
1606     unsigned int inst_len;
1607 
1608     if ( !nestedhvm_enabled(v->domain) ) {
1609         hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC);
1610         return;
1611     }
1612 
1613     if ( (inst_len = __get_instruction_length(v, INSTR_STGI)) == 0 )
1614         return;
1615 
1616     nestedsvm_vcpu_stgi(v);
1617 
1618     __update_guest_eip(regs, inst_len);
1619 }
1620 
svm_vmexit_do_clgi(struct cpu_user_regs * regs,struct vcpu * v)1621 void svm_vmexit_do_clgi(struct cpu_user_regs *regs, struct vcpu *v)
1622 {
1623     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1624     unsigned int inst_len;
1625     uint32_t general1_intercepts = vmcb_get_general1_intercepts(vmcb);
1626     vintr_t intr;
1627 
1628     if ( !nestedhvm_enabled(v->domain) ) {
1629         hvm_inject_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC);
1630         return;
1631     }
1632 
1633     if ( (inst_len = __get_instruction_length(v, INSTR_CLGI)) == 0 )
1634         return;
1635 
1636     nestedsvm_vcpu_clgi(v);
1637 
1638     /* After a CLGI no interrupts should come */
1639     intr = vmcb_get_vintr(vmcb);
1640     intr.fields.irq = 0;
1641     general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
1642     vmcb_set_vintr(vmcb, intr);
1643     vmcb_set_general1_intercepts(vmcb, general1_intercepts);
1644 
1645     __update_guest_eip(regs, inst_len);
1646 }
1647