1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3 * intr.c: handling I/O, interrupts related VMX entry/exit
4 * Copyright (c) 2004, Intel Corporation.
5 * Copyright (c) 2004-2007, XenSource Inc.
6 */
7
8 #include <xen/init.h>
9 #include <xen/mm.h>
10 #include <xen/lib.h>
11 #include <xen/errno.h>
12 #include <xen/trace.h>
13 #include <xen/event.h>
14 #include <asm/apicdef.h>
15 #include <asm/current.h>
16 #include <asm/cpufeature.h>
17 #include <asm/processor.h>
18 #include <asm/msr.h>
19 #include <asm/hvm/hvm.h>
20 #include <asm/hvm/io.h>
21 #include <asm/hvm/vmx/vmx.h>
22 #include <asm/hvm/vmx/vmcs.h>
23 #include <asm/hvm/vlapic.h>
24 #include <asm/hvm/nestedhvm.h>
25 #include <public/hvm/ioreq.h>
26 #include <asm/vm_event.h>
27
28 /*
29 * A few notes on virtual NMI and INTR delivery, and interactions with
30 * interruptibility states:
31 *
32 * We can only inject an ExtInt if EFLAGS.IF = 1 and no blocking by
33 * STI nor MOV SS. Otherwise the VM entry fails. The 'virtual interrupt
34 * pending' control causes a VM exit when all these checks succeed. It will
35 * exit immediately after VM entry if the checks succeed at that point.
36 *
37 * We can only inject an NMI if no blocking by MOV SS (also, depending on
38 * implementation, if no blocking by STI). If pin-based 'virtual NMIs'
39 * control is specified then the NMI-blocking interruptibility flag is
40 * also checked. The 'virtual NMI pending' control (available only in
41 * conjunction with 'virtual NMIs') causes a VM exit when all these checks
42 * succeed. It will exit immediately after VM entry if the checks succeed
43 * at that point.
44 *
45 * Because a processor may or may not check blocking-by-STI when injecting
46 * a virtual NMI, it will be necessary to convert that to block-by-MOV-SS
47 * before specifying the 'virtual NMI pending' control. Otherwise we could
48 * enter an infinite loop where we check blocking-by-STI in software and
49 * thus delay delivery of a virtual NMI, but the processor causes immediate
50 * VM exit because it does not check blocking-by-STI.
51 *
52 * Injecting a virtual NMI sets the NMI-blocking interruptibility flag only
53 * if the 'virtual NMIs' control is set. Injecting *any* kind of event clears
54 * the STI- and MOV-SS-blocking interruptibility-state flags.
55 */
56
vmx_enable_intr_window(struct vcpu * v,struct hvm_intack intack)57 static void vmx_enable_intr_window(struct vcpu *v, struct hvm_intack intack)
58 {
59 u32 ctl = CPU_BASED_VIRTUAL_INTR_PENDING;
60
61 ASSERT(intack.source != hvm_intsrc_none);
62
63 if ( unlikely(tb_init_done) )
64 {
65 unsigned long intr;
66
67 __vmread(VM_ENTRY_INTR_INFO, &intr);
68 TRACE(TRC_HVM_INTR_WINDOW, intack.vector, intack.source,
69 (intr & INTR_INFO_VALID_MASK) ? intr & 0xff : -1);
70 }
71
72 if ( (intack.source == hvm_intsrc_nmi) && cpu_has_vmx_vnmi )
73 {
74 /*
75 * We set MOV-SS blocking in lieu of STI blocking when delivering an
76 * NMI. This is because it is processor-specific whether STI-blocking
77 * blocks NMIs. Hence we *must* check for STI-blocking on NMI delivery
78 * (otherwise vmentry will fail on processors that check for STI-
79 * blocking) but if the processor does not check for STI-blocking then
80 * we may immediately vmexit and hance make no progress!
81 * (see SDM 3B 21.3, "Other Causes of VM Exits").
82 */
83 unsigned long intr_shadow;
84
85 __vmread(GUEST_INTERRUPTIBILITY_INFO, &intr_shadow);
86 if ( intr_shadow & VMX_INTR_SHADOW_STI )
87 {
88 /* Having both STI-blocking and MOV-SS-blocking fails vmentry. */
89 intr_shadow &= ~VMX_INTR_SHADOW_STI;
90 intr_shadow |= VMX_INTR_SHADOW_MOV_SS;
91 __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
92 }
93 ctl = CPU_BASED_VIRTUAL_NMI_PENDING;
94 }
95
96 if ( !(v->arch.hvm.vmx.exec_control & ctl) )
97 {
98 v->arch.hvm.vmx.exec_control |= ctl;
99 vmx_update_cpu_exec_control(v);
100 }
101 }
102
103 /*
104 * Injecting interrupts for nested virtualization
105 *
106 * When injecting virtual interrupts (originated from L0), there are
107 * two major possibilities, within L1 context and within L2 context
108 * 1. L1 context (in_nesting == 0)
109 * Everything is the same as without nested, check RFLAGS.IF to
110 * see if the injection can be done, using VMCS to inject the
111 * interrupt
112 *
113 * 2. L2 context (in_nesting == 1)
114 * Causes a virtual VMExit, RFLAGS.IF is ignored, whether to ack
115 * irq according to intr_ack_on_exit, shouldn't block normally,
116 * except for:
117 * a. context transition
118 * interrupt needs to be blocked at virtual VMEntry time
119 * b. L2 idtv reinjection
120 * if L2 idtv is handled within L0 (e.g. L0 shadow page fault),
121 * it needs to be reinjected without exiting to L1, interrupt
122 * injection should be blocked as well at this point.
123 *
124 * Unfortunately, interrupt blocking in L2 won't work with simple
125 * intr_window_open (which depends on L2's IF). To solve this,
126 * the following algorithm can be used:
127 * v->arch.hvm.vmx.exec_control.VIRTUAL_INTR_PENDING now denotes
128 * only L0 control, physical control may be different from it.
129 * - if in L1, it behaves normally, intr window is written
130 * to physical control as it is
131 * - if in L2, replace it to MTF (or NMI window) if possible
132 * - if MTF/NMI window is not used, intr window can still be
133 * used but may have negative impact on interrupt performance.
134 */
135
nvmx_intr_blocked(struct vcpu * v)136 enum hvm_intblk cf_check nvmx_intr_blocked(struct vcpu *v)
137 {
138 int r = hvm_intblk_none;
139 struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
140
141 if ( nestedhvm_vcpu_in_guestmode(v) )
142 {
143 if ( nvcpu->nv_vmexit_pending ||
144 nvcpu->nv_vmswitch_in_progress )
145 r = hvm_intblk_rflags_ie;
146 else
147 {
148 unsigned long intr_info;
149
150 __vmread(VM_ENTRY_INTR_INFO, &intr_info);
151 if ( intr_info & INTR_INFO_VALID_MASK )
152 r = hvm_intblk_rflags_ie;
153 }
154 }
155 else if ( nvcpu->nv_vmentry_pending )
156 r = hvm_intblk_rflags_ie;
157
158 return r;
159 }
160
nvmx_intr_intercept(struct vcpu * v,struct hvm_intack intack)161 static int nvmx_intr_intercept(struct vcpu *v, struct hvm_intack intack)
162 {
163 u32 ctrl;
164
165 /* If blocked by L1's tpr, then nothing to do. */
166 if ( nestedhvm_vcpu_in_guestmode(v) &&
167 hvm_interrupt_blocked(v, intack) == hvm_intblk_tpr )
168 return 1;
169
170 if ( nvmx_intr_blocked(v) != hvm_intblk_none )
171 {
172 vmx_enable_intr_window(v, intack);
173 return 1;
174 }
175
176 if ( nestedhvm_vcpu_in_guestmode(v) )
177 {
178 ctrl = get_vvmcs(v, PIN_BASED_VM_EXEC_CONTROL);
179 if ( !(ctrl & PIN_BASED_EXT_INTR_MASK) )
180 return 0;
181
182 if ( intack.source == hvm_intsrc_pic ||
183 intack.source == hvm_intsrc_lapic )
184 {
185 vmx_inject_extint(intack.vector, intack.source);
186
187 ctrl = get_vvmcs(v, VM_EXIT_CONTROLS);
188 if ( ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
189 {
190 /* for now, duplicate the ack path in vmx_intr_assist */
191 hvm_vcpu_ack_pending_irq(v, intack);
192 pt_intr_post(v, intack);
193
194 intack = hvm_vcpu_has_pending_irq(v);
195 if ( unlikely(intack.source != hvm_intsrc_none) )
196 vmx_enable_intr_window(v, intack);
197 }
198 else if ( !cpu_has_vmx_virtual_intr_delivery )
199 vmx_enable_intr_window(v, intack);
200
201 return 1;
202 }
203 else if ( intack.source == hvm_intsrc_vector )
204 {
205 vmx_inject_extint(intack.vector, intack.source);
206 return 1;
207 }
208 }
209
210 return 0;
211 }
212
vmx_sync_exit_bitmap(struct vcpu * v)213 void vmx_sync_exit_bitmap(struct vcpu *v)
214 {
215 const unsigned int n = ARRAY_SIZE(v->arch.hvm.vmx.eoi_exit_bitmap);
216 unsigned int i;
217
218 while ( (i = find_first_bit(&v->arch.hvm.vmx.eoi_exitmap_changed, n)) < n )
219 {
220 clear_bit(i, &v->arch.hvm.vmx.eoi_exitmap_changed);
221 __vmwrite(EOI_EXIT_BITMAP(i), v->arch.hvm.vmx.eoi_exit_bitmap[i]);
222 }
223 }
224
vmx_intr_assist(void)225 void asmlinkage vmx_intr_assist(void)
226 {
227 struct hvm_intack intack;
228 struct vcpu *v = current;
229 unsigned int tpr_threshold = 0;
230 enum hvm_intblk intblk;
231 int pt_vector;
232
233 /* Block event injection when single step with MTF. */
234 if ( unlikely(v->arch.hvm.single_step) )
235 {
236 v->arch.hvm.vmx.exec_control |= CPU_BASED_MONITOR_TRAP_FLAG;
237 vmx_update_cpu_exec_control(v);
238 return;
239 }
240
241 /* Block event injection while handling a sync vm_event. */
242 if ( unlikely(v->arch.vm_event) && v->arch.vm_event->sync_event )
243 return;
244
245 #ifdef CONFIG_MEM_SHARING
246 /* Block event injection for VM fork if requested */
247 if ( unlikely(v->domain->arch.hvm.mem_sharing.block_interrupts) )
248 return;
249 #endif
250
251 /* Crank the handle on interrupt state. */
252 pt_vector = pt_update_irq(v);
253
254 do {
255 unsigned long intr_info;
256
257 intack = hvm_vcpu_has_pending_irq(v);
258 if ( likely(intack.source == hvm_intsrc_none) )
259 goto out;
260
261 if ( unlikely(nvmx_intr_intercept(v, intack)) )
262 goto out;
263
264 intblk = hvm_interrupt_blocked(v, intack);
265 if ( cpu_has_vmx_virtual_intr_delivery )
266 {
267 /* Set "Interrupt-window exiting" for ExtINT and NMI. */
268 if ( (intblk != hvm_intblk_none) &&
269 (intack.source == hvm_intsrc_pic ||
270 intack.source == hvm_intsrc_vector ||
271 intack.source == hvm_intsrc_nmi) )
272 {
273 vmx_enable_intr_window(v, intack);
274 goto out;
275 }
276
277 __vmread(VM_ENTRY_INTR_INFO, &intr_info);
278 if ( intr_info & INTR_INFO_VALID_MASK )
279 {
280 if ( (intack.source == hvm_intsrc_pic) ||
281 (intack.source == hvm_intsrc_nmi) ||
282 (intack.source == hvm_intsrc_mce) )
283 vmx_enable_intr_window(v, intack);
284
285 goto out;
286 }
287 } else if ( intblk == hvm_intblk_tpr )
288 {
289 ASSERT(vlapic_enabled(vcpu_vlapic(v)));
290 ASSERT(intack.source == hvm_intsrc_lapic);
291 tpr_threshold = intack.vector >> 4;
292 goto out;
293 }
294 else if ( intblk != hvm_intblk_none )
295 {
296 vmx_enable_intr_window(v, intack);
297 goto out;
298 }
299 else
300 {
301 __vmread(VM_ENTRY_INTR_INFO, &intr_info);
302 if ( intr_info & INTR_INFO_VALID_MASK )
303 {
304 vmx_enable_intr_window(v, intack);
305 goto out;
306 }
307 }
308
309 intack = hvm_vcpu_ack_pending_irq(v, intack);
310 } while ( intack.source == hvm_intsrc_none );
311
312 if ( intack.source == hvm_intsrc_nmi )
313 {
314 vmx_inject_nmi();
315 }
316 else if ( intack.source == hvm_intsrc_mce )
317 {
318 hvm_inject_hw_exception(X86_EXC_MC, X86_EVENT_NO_EC);
319 }
320 else if ( cpu_has_vmx_virtual_intr_delivery &&
321 intack.source != hvm_intsrc_pic &&
322 intack.source != hvm_intsrc_vector )
323 {
324 unsigned long status;
325
326 /*
327 * intack.vector is the highest priority vector. So we set eoi_exit_bitmap
328 * for intack.vector - give a chance to post periodic time interrupts when
329 * periodic time interrupts become the highest one
330 */
331 if ( pt_vector != -1 )
332 {
333 #ifndef NDEBUG
334 /*
335 * We assert that intack.vector is the highest priority vector for
336 * only an interrupt from vlapic can reach this point and the
337 * highest vector is chosen in hvm_vcpu_has_pending_irq().
338 * But, in fact, the assertion failed sometimes. It is suspected
339 * that PIR is not synced to vIRR which makes pt_vector is left in
340 * PIR. In order to verify this suspicion, dump some information
341 * when the assertion fails.
342 */
343 if ( unlikely(intack.vector < pt_vector) )
344 {
345 const struct vlapic *vlapic;
346 const struct pi_desc *pi_desc;
347 const uint32_t *word;
348 unsigned int i;
349
350 printk(XENLOG_ERR "%pv: intack: %u:%02x pt: %02x\n",
351 current, intack.source, intack.vector, pt_vector);
352
353 vlapic = vcpu_vlapic(v);
354 if ( vlapic && vlapic->regs )
355 {
356 word = (const void *)&vlapic->regs->data[APIC_IRR];
357 printk(XENLOG_ERR "vIRR:");
358 for ( i = X86_IDT_VECTORS / 32; i-- ; )
359 printk(" %08x", word[i*4]);
360 printk("\n");
361 }
362
363 pi_desc = &v->arch.hvm.vmx.pi_desc;
364 if ( pi_desc )
365 {
366 word = (const void *)&pi_desc->pir;
367 printk(XENLOG_ERR " PIR:");
368 for ( i = X86_IDT_VECTORS / 32; i-- ; )
369 printk(" %08x", word[i]);
370 printk("\n");
371 }
372 }
373 #endif
374 ASSERT(intack.vector >= pt_vector);
375 vmx_set_eoi_exit_bitmap(v, intack.vector);
376 }
377
378 /* we need update the RVI field */
379 __vmread(GUEST_INTR_STATUS, &status);
380 status &= ~VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK;
381 status |= VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK &
382 intack.vector;
383 __vmwrite(GUEST_INTR_STATUS, status);
384
385 vmx_sync_exit_bitmap(v);
386
387 pt_intr_post(v, intack);
388 }
389 else
390 {
391 TRACE(TRC_HVM_INJ_VIRQ, intack.vector, /*fake=*/ 0);
392 vmx_inject_extint(intack.vector, intack.source);
393 pt_intr_post(v, intack);
394 }
395
396 /* Is there another IRQ to queue up behind this one? */
397 intack = hvm_vcpu_has_pending_irq(v);
398 if ( !cpu_has_vmx_virtual_intr_delivery ||
399 intack.source == hvm_intsrc_pic ||
400 intack.source == hvm_intsrc_vector )
401 {
402 if ( unlikely(intack.source != hvm_intsrc_none) )
403 vmx_enable_intr_window(v, intack);
404 }
405
406 out:
407 if ( !nestedhvm_vcpu_in_guestmode(v) &&
408 !cpu_has_vmx_virtual_intr_delivery &&
409 cpu_has_vmx_tpr_shadow )
410 __vmwrite(TPR_THRESHOLD, tpr_threshold);
411 }
412
413 /*
414 * Local variables:
415 * mode: C
416 * c-file-style: "BSD"
417 * c-basic-offset: 4
418 * tab-width: 4
419 * indent-tabs-mode: nil
420 * End:
421 */
422