1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * intr.c: handling I/O, interrupts related VMX entry/exit
4  * Copyright (c) 2004, Intel Corporation.
5  * Copyright (c) 2004-2007, XenSource Inc.
6  */
7 
8 #include <xen/init.h>
9 #include <xen/mm.h>
10 #include <xen/lib.h>
11 #include <xen/errno.h>
12 #include <xen/trace.h>
13 #include <xen/event.h>
14 #include <asm/apicdef.h>
15 #include <asm/current.h>
16 #include <asm/cpufeature.h>
17 #include <asm/processor.h>
18 #include <asm/msr.h>
19 #include <asm/hvm/hvm.h>
20 #include <asm/hvm/io.h>
21 #include <asm/hvm/vmx/vmx.h>
22 #include <asm/hvm/vmx/vmcs.h>
23 #include <asm/hvm/vlapic.h>
24 #include <asm/hvm/nestedhvm.h>
25 #include <public/hvm/ioreq.h>
26 #include <asm/vm_event.h>
27 
28 /*
29  * A few notes on virtual NMI and INTR delivery, and interactions with
30  * interruptibility states:
31  *
32  * We can only inject an ExtInt if EFLAGS.IF = 1 and no blocking by
33  * STI nor MOV SS. Otherwise the VM entry fails. The 'virtual interrupt
34  * pending' control causes a VM exit when all these checks succeed. It will
35  * exit immediately after VM entry if the checks succeed at that point.
36  *
37  * We can only inject an NMI if no blocking by MOV SS (also, depending on
38  * implementation, if no blocking by STI). If pin-based 'virtual NMIs'
39  * control is specified then the NMI-blocking interruptibility flag is
40  * also checked. The 'virtual NMI pending' control (available only in
41  * conjunction with 'virtual NMIs') causes a VM exit when all these checks
42  * succeed. It will exit immediately after VM entry if the checks succeed
43  * at that point.
44  *
45  * Because a processor may or may not check blocking-by-STI when injecting
46  * a virtual NMI, it will be necessary to convert that to block-by-MOV-SS
47  * before specifying the 'virtual NMI pending' control. Otherwise we could
48  * enter an infinite loop where we check blocking-by-STI in software and
49  * thus delay delivery of a virtual NMI, but the processor causes immediate
50  * VM exit because it does not check blocking-by-STI.
51  *
52  * Injecting a virtual NMI sets the NMI-blocking interruptibility flag only
53  * if the 'virtual NMIs' control is set. Injecting *any* kind of event clears
54  * the STI- and MOV-SS-blocking interruptibility-state flags.
55  */
56 
vmx_enable_intr_window(struct vcpu * v,struct hvm_intack intack)57 static void vmx_enable_intr_window(struct vcpu *v, struct hvm_intack intack)
58 {
59     u32 ctl = CPU_BASED_VIRTUAL_INTR_PENDING;
60 
61     ASSERT(intack.source != hvm_intsrc_none);
62 
63     if ( unlikely(tb_init_done) )
64     {
65         unsigned long intr;
66 
67         __vmread(VM_ENTRY_INTR_INFO, &intr);
68         TRACE(TRC_HVM_INTR_WINDOW, intack.vector, intack.source,
69               (intr & INTR_INFO_VALID_MASK) ? intr & 0xff : -1);
70     }
71 
72     if ( (intack.source == hvm_intsrc_nmi) && cpu_has_vmx_vnmi )
73     {
74         /*
75          * We set MOV-SS blocking in lieu of STI blocking when delivering an
76          * NMI. This is because it is processor-specific whether STI-blocking
77          * blocks NMIs. Hence we *must* check for STI-blocking on NMI delivery
78          * (otherwise vmentry will fail on processors that check for STI-
79          * blocking) but if the processor does not check for STI-blocking then
80          * we may immediately vmexit and hance make no progress!
81          * (see SDM 3B 21.3, "Other Causes of VM Exits").
82          */
83         unsigned long intr_shadow;
84 
85         __vmread(GUEST_INTERRUPTIBILITY_INFO, &intr_shadow);
86         if ( intr_shadow & VMX_INTR_SHADOW_STI )
87         {
88             /* Having both STI-blocking and MOV-SS-blocking fails vmentry. */
89             intr_shadow &= ~VMX_INTR_SHADOW_STI;
90             intr_shadow |= VMX_INTR_SHADOW_MOV_SS;
91             __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
92         }
93         ctl = CPU_BASED_VIRTUAL_NMI_PENDING;
94     }
95 
96     if ( !(v->arch.hvm.vmx.exec_control & ctl) )
97     {
98         v->arch.hvm.vmx.exec_control |= ctl;
99         vmx_update_cpu_exec_control(v);
100     }
101 }
102 
103 /*
104  * Injecting interrupts for nested virtualization
105  *
106  *  When injecting virtual interrupts (originated from L0), there are
107  *  two major possibilities, within L1 context and within L2 context
108  *   1. L1 context (in_nesting == 0)
109  *     Everything is the same as without nested, check RFLAGS.IF to
110  *     see if the injection can be done, using VMCS to inject the
111  *     interrupt
112  *
113  *   2. L2 context (in_nesting == 1)
114  *     Causes a virtual VMExit, RFLAGS.IF is ignored, whether to ack
115  *     irq according to intr_ack_on_exit, shouldn't block normally,
116  *     except for:
117  *    a. context transition
118  *     interrupt needs to be blocked at virtual VMEntry time
119  *    b. L2 idtv reinjection
120  *     if L2 idtv is handled within L0 (e.g. L0 shadow page fault),
121  *     it needs to be reinjected without exiting to L1, interrupt
122  *     injection should be blocked as well at this point.
123  *
124  *  Unfortunately, interrupt blocking in L2 won't work with simple
125  *  intr_window_open (which depends on L2's IF). To solve this,
126  *  the following algorithm can be used:
127  *   v->arch.hvm.vmx.exec_control.VIRTUAL_INTR_PENDING now denotes
128  *   only L0 control, physical control may be different from it.
129  *       - if in L1, it behaves normally, intr window is written
130  *         to physical control as it is
131  *       - if in L2, replace it to MTF (or NMI window) if possible
132  *       - if MTF/NMI window is not used, intr window can still be
133  *         used but may have negative impact on interrupt performance.
134  */
135 
nvmx_intr_blocked(struct vcpu * v)136 enum hvm_intblk cf_check nvmx_intr_blocked(struct vcpu *v)
137 {
138     int r = hvm_intblk_none;
139     struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
140 
141     if ( nestedhvm_vcpu_in_guestmode(v) )
142     {
143         if ( nvcpu->nv_vmexit_pending ||
144              nvcpu->nv_vmswitch_in_progress )
145             r = hvm_intblk_rflags_ie;
146         else
147         {
148             unsigned long intr_info;
149 
150             __vmread(VM_ENTRY_INTR_INFO, &intr_info);
151             if ( intr_info & INTR_INFO_VALID_MASK )
152                 r = hvm_intblk_rflags_ie;
153         }
154     }
155     else if ( nvcpu->nv_vmentry_pending )
156         r = hvm_intblk_rflags_ie;
157 
158     return r;
159 }
160 
nvmx_intr_intercept(struct vcpu * v,struct hvm_intack intack)161 static int nvmx_intr_intercept(struct vcpu *v, struct hvm_intack intack)
162 {
163     u32 ctrl;
164 
165     /* If blocked by L1's tpr, then nothing to do. */
166     if ( nestedhvm_vcpu_in_guestmode(v) &&
167          hvm_interrupt_blocked(v, intack) == hvm_intblk_tpr )
168         return 1;
169 
170     if ( nvmx_intr_blocked(v) != hvm_intblk_none )
171     {
172         vmx_enable_intr_window(v, intack);
173         return 1;
174     }
175 
176     if ( nestedhvm_vcpu_in_guestmode(v) )
177     {
178         ctrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
179         if ( !(ctrl & PIN_BASED_EXT_INTR_MASK) )
180             return 0;
181 
182         if ( intack.source == hvm_intsrc_pic ||
183                  intack.source == hvm_intsrc_lapic )
184         {
185             vmx_inject_extint(intack.vector, intack.source);
186 
187             ctrl = get_vvmcs(v, VM_EXIT_CONTROLS);
188             if ( ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
189             {
190                 /* for now, duplicate the ack path in vmx_intr_assist */
191                 hvm_vcpu_ack_pending_irq(v, intack);
192                 pt_intr_post(v, intack);
193 
194                 intack = hvm_vcpu_has_pending_irq(v);
195                 if ( unlikely(intack.source != hvm_intsrc_none) )
196                     vmx_enable_intr_window(v, intack);
197             }
198             else if ( !cpu_has_vmx_virtual_intr_delivery )
199                 vmx_enable_intr_window(v, intack);
200 
201             return 1;
202         }
203         else if ( intack.source == hvm_intsrc_vector )
204         {
205             vmx_inject_extint(intack.vector, intack.source);
206             return 1;
207         }
208     }
209 
210     return 0;
211 }
212 
vmx_sync_exit_bitmap(struct vcpu * v)213 void vmx_sync_exit_bitmap(struct vcpu *v)
214 {
215     const unsigned int n = ARRAY_SIZE(v->arch.hvm.vmx.eoi_exit_bitmap);
216     unsigned int i;
217 
218     while ( (i = find_first_bit(&v->arch.hvm.vmx.eoi_exitmap_changed, n)) < n )
219     {
220         clear_bit(i, &v->arch.hvm.vmx.eoi_exitmap_changed);
221         __vmwrite(EOI_EXIT_BITMAP(i), v->arch.hvm.vmx.eoi_exit_bitmap[i]);
222     }
223 }
224 
vmx_intr_assist(void)225 void asmlinkage vmx_intr_assist(void)
226 {
227     struct hvm_intack intack;
228     struct vcpu *v = current;
229     unsigned int tpr_threshold = 0;
230     enum hvm_intblk intblk;
231     int pt_vector;
232 
233     /* Block event injection when single step with MTF. */
234     if ( unlikely(v->arch.hvm.single_step) )
235     {
236         v->arch.hvm.vmx.exec_control |= CPU_BASED_MONITOR_TRAP_FLAG;
237         vmx_update_cpu_exec_control(v);
238         return;
239     }
240 
241     /* Block event injection while handling a sync vm_event. */
242     if ( unlikely(v->arch.vm_event) && v->arch.vm_event->sync_event )
243         return;
244 
245 #ifdef CONFIG_MEM_SHARING
246     /* Block event injection for VM fork if requested */
247     if ( unlikely(v->domain->arch.hvm.mem_sharing.block_interrupts) )
248         return;
249 #endif
250 
251     /* Crank the handle on interrupt state. */
252     pt_vector = pt_update_irq(v);
253 
254     do {
255         unsigned long intr_info;
256 
257         intack = hvm_vcpu_has_pending_irq(v);
258         if ( likely(intack.source == hvm_intsrc_none) )
259             goto out;
260 
261         if ( unlikely(nvmx_intr_intercept(v, intack)) )
262             goto out;
263 
264         intblk = hvm_interrupt_blocked(v, intack);
265         if ( cpu_has_vmx_virtual_intr_delivery )
266         {
267             /* Set "Interrupt-window exiting" for ExtINT and NMI. */
268             if ( (intblk != hvm_intblk_none) &&
269                  (intack.source == hvm_intsrc_pic ||
270                   intack.source == hvm_intsrc_vector ||
271                   intack.source == hvm_intsrc_nmi) )
272             {
273                 vmx_enable_intr_window(v, intack);
274                 goto out;
275             }
276 
277             __vmread(VM_ENTRY_INTR_INFO, &intr_info);
278             if ( intr_info & INTR_INFO_VALID_MASK )
279             {
280                 if ( (intack.source == hvm_intsrc_pic) ||
281                      (intack.source == hvm_intsrc_nmi) ||
282                      (intack.source == hvm_intsrc_mce) )
283                     vmx_enable_intr_window(v, intack);
284 
285                 goto out;
286             }
287         } else if ( intblk == hvm_intblk_tpr )
288         {
289             ASSERT(vlapic_enabled(vcpu_vlapic(v)));
290             ASSERT(intack.source == hvm_intsrc_lapic);
291             tpr_threshold = intack.vector >> 4;
292             goto out;
293         }
294         else if ( intblk != hvm_intblk_none )
295         {
296             vmx_enable_intr_window(v, intack);
297             goto out;
298         }
299         else
300         {
301             __vmread(VM_ENTRY_INTR_INFO, &intr_info);
302             if ( intr_info & INTR_INFO_VALID_MASK )
303             {
304                 vmx_enable_intr_window(v, intack);
305                 goto out;
306             }
307         }
308 
309         intack = hvm_vcpu_ack_pending_irq(v, intack);
310     } while ( intack.source == hvm_intsrc_none );
311 
312     if ( intack.source == hvm_intsrc_nmi )
313     {
314         vmx_inject_nmi();
315     }
316     else if ( intack.source == hvm_intsrc_mce )
317     {
318         hvm_inject_hw_exception(X86_EXC_MC, X86_EVENT_NO_EC);
319     }
320     else if ( cpu_has_vmx_virtual_intr_delivery &&
321               intack.source != hvm_intsrc_pic &&
322               intack.source != hvm_intsrc_vector )
323     {
324         unsigned long status;
325 
326        /*
327         * intack.vector is the highest priority vector. So we set eoi_exit_bitmap
328         * for intack.vector - give a chance to post periodic time interrupts when
329         * periodic time interrupts become the highest one
330         */
331         if ( pt_vector != -1 )
332         {
333 #ifndef NDEBUG
334             /*
335              * We assert that intack.vector is the highest priority vector for
336              * only an interrupt from vlapic can reach this point and the
337              * highest vector is chosen in hvm_vcpu_has_pending_irq().
338              * But, in fact, the assertion failed sometimes. It is suspected
339              * that PIR is not synced to vIRR which makes pt_vector is left in
340              * PIR. In order to verify this suspicion, dump some information
341              * when the assertion fails.
342              */
343             if ( unlikely(intack.vector < pt_vector) )
344             {
345                 const struct vlapic *vlapic;
346                 const struct pi_desc *pi_desc;
347                 const uint32_t *word;
348                 unsigned int i;
349 
350                 printk(XENLOG_ERR "%pv: intack: %u:%02x pt: %02x\n",
351                        current, intack.source, intack.vector, pt_vector);
352 
353                 vlapic = vcpu_vlapic(v);
354                 if ( vlapic && vlapic->regs )
355                 {
356                     word = (const void *)&vlapic->regs->data[APIC_IRR];
357                     printk(XENLOG_ERR "vIRR:");
358                     for ( i = X86_IDT_VECTORS / 32; i-- ; )
359                         printk(" %08x", word[i*4]);
360                     printk("\n");
361                 }
362 
363                 pi_desc = &v->arch.hvm.vmx.pi_desc;
364                 if ( pi_desc )
365                 {
366                     word = (const void *)&pi_desc->pir;
367                     printk(XENLOG_ERR " PIR:");
368                     for ( i = X86_IDT_VECTORS / 32; i-- ; )
369                         printk(" %08x", word[i]);
370                     printk("\n");
371                 }
372             }
373 #endif
374             ASSERT(intack.vector >= pt_vector);
375             vmx_set_eoi_exit_bitmap(v, intack.vector);
376         }
377 
378         /* we need update the RVI field */
379         __vmread(GUEST_INTR_STATUS, &status);
380         status &= ~VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK;
381         status |= VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK &
382                     intack.vector;
383         __vmwrite(GUEST_INTR_STATUS, status);
384 
385         vmx_sync_exit_bitmap(v);
386 
387         pt_intr_post(v, intack);
388     }
389     else
390     {
391         TRACE(TRC_HVM_INJ_VIRQ, intack.vector, /*fake=*/ 0);
392         vmx_inject_extint(intack.vector, intack.source);
393         pt_intr_post(v, intack);
394     }
395 
396     /* Is there another IRQ to queue up behind this one? */
397     intack = hvm_vcpu_has_pending_irq(v);
398     if ( !cpu_has_vmx_virtual_intr_delivery ||
399          intack.source == hvm_intsrc_pic ||
400          intack.source == hvm_intsrc_vector )
401     {
402         if ( unlikely(intack.source != hvm_intsrc_none) )
403             vmx_enable_intr_window(v, intack);
404     }
405 
406  out:
407     if ( !nestedhvm_vcpu_in_guestmode(v) &&
408          !cpu_has_vmx_virtual_intr_delivery &&
409          cpu_has_vmx_tpr_shadow )
410         __vmwrite(TPR_THRESHOLD, tpr_threshold);
411 }
412 
413 /*
414  * Local variables:
415  * mode: C
416  * c-file-style: "BSD"
417  * c-basic-offset: 4
418  * tab-width: 4
419  * indent-tabs-mode: nil
420  * End:
421  */
422