1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; If not, see <http://www.gnu.org/licenses/>.
15 *
16 * Copyright (C) Allen Kay <allen.m.kay@intel.com>
17 * Copyright (C) Xiaohui Xin <xiaohui.xin@intel.com>
18 */
19
20 #include <xen/event.h>
21 #include <xen/iommu.h>
22 #include <xen/cpu.h>
23 #include <xen/irq.h>
24 #include <asm/hvm/irq.h>
25 #include <asm/hvm/support.h>
26 #include <asm/io_apic.h>
27
28 static DEFINE_PER_CPU(struct list_head, dpci_list);
29
30 /*
31 * These two bit states help to safely schedule, deschedule, and wait until
32 * the softirq has finished.
33 *
34 * The semantics behind these two bits is as follow:
35 * - STATE_SCHED - whoever modifies it has to ref-count the domain (->dom).
36 * - STATE_RUN - only softirq is allowed to set and clear it. If it has
37 * been set hvm_dirq_assist will RUN with a saved value of the
38 * 'struct domain' copied from 'pirq_dpci->dom' before STATE_RUN was set.
39 *
40 * The usual states are: STATE_SCHED(set) -> STATE_RUN(set) ->
41 * STATE_SCHED(unset) -> STATE_RUN(unset).
42 *
43 * However the states can also diverge such as: STATE_SCHED(set) ->
44 * STATE_SCHED(unset) -> STATE_RUN(set) -> STATE_RUN(unset). That means
45 * the 'hvm_dirq_assist' never run and that the softirq did not do any
46 * ref-counting.
47 */
48
49 enum {
50 STATE_SCHED,
51 STATE_RUN
52 };
53
54 /*
55 * This can be called multiple times, but the softirq is only raised once.
56 * That is until the STATE_SCHED state has been cleared. The state can be
57 * cleared by: the 'dpci_softirq' (when it has executed 'hvm_dirq_assist'),
58 * or by 'pt_pirq_softirq_reset' (which will try to clear the state before
59 * the softirq had a chance to run).
60 */
raise_softirq_for(struct hvm_pirq_dpci * pirq_dpci)61 static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci)
62 {
63 unsigned long flags;
64
65 if ( test_and_set_bit(STATE_SCHED, &pirq_dpci->state) )
66 return;
67
68 get_knownalive_domain(pirq_dpci->dom);
69
70 local_irq_save(flags);
71 list_add_tail(&pirq_dpci->softirq_list, &this_cpu(dpci_list));
72 local_irq_restore(flags);
73
74 raise_softirq(HVM_DPCI_SOFTIRQ);
75 }
76
77 /*
78 * If we are racing with softirq_dpci (STATE_SCHED) we return
79 * true. Otherwise we return false.
80 *
81 * If it is false, it is the callers responsibility to make sure
82 * that the softirq (with the event_lock dropped) has ran.
83 */
pt_pirq_softirq_active(struct hvm_pirq_dpci * pirq_dpci)84 bool pt_pirq_softirq_active(struct hvm_pirq_dpci *pirq_dpci)
85 {
86 if ( pirq_dpci->state & ((1 << STATE_RUN) | (1 << STATE_SCHED)) )
87 return true;
88
89 /*
90 * If in the future we would call 'raise_softirq_for' right away
91 * after 'pt_pirq_softirq_active' we MUST reset the list (otherwise it
92 * might have stale data).
93 */
94 return false;
95 }
96
97 /*
98 * Reset the pirq_dpci->dom parameter to NULL.
99 *
100 * This function checks the different states to make sure it can do it
101 * at the right time. If it unschedules the 'hvm_dirq_assist' from running
102 * it also refcounts (which is what the softirq would have done) properly.
103 */
pt_pirq_softirq_reset(struct hvm_pirq_dpci * pirq_dpci)104 static void pt_pirq_softirq_reset(struct hvm_pirq_dpci *pirq_dpci)
105 {
106 struct domain *d = pirq_dpci->dom;
107
108 ASSERT(spin_is_locked(&d->event_lock));
109
110 switch ( cmpxchg(&pirq_dpci->state, 1 << STATE_SCHED, 0) )
111 {
112 case (1 << STATE_SCHED):
113 /*
114 * We are going to try to de-schedule the softirq before it goes in
115 * STATE_RUN. Whoever clears STATE_SCHED MUST refcount the 'dom'.
116 */
117 put_domain(d);
118 /* fallthrough. */
119 case (1 << STATE_RUN):
120 case (1 << STATE_RUN) | (1 << STATE_SCHED):
121 /*
122 * The reason it is OK to reset 'dom' when STATE_RUN bit is set is due
123 * to a shortcut the 'dpci_softirq' implements. It stashes the 'dom'
124 * in local variable before it sets STATE_RUN - and therefore will not
125 * dereference '->dom' which would crash.
126 */
127 pirq_dpci->dom = NULL;
128 break;
129 }
130 /*
131 * Inhibit 'hvm_dirq_assist' from doing anything useful and at worst
132 * calling 'set_timer' which will blow up (as we have called kill_timer
133 * or never initialized it). Note that we hold the lock that
134 * 'hvm_dirq_assist' could be spinning on.
135 */
136 pirq_dpci->masked = 0;
137 }
138
pt_irq_need_timer(uint32_t flags)139 bool pt_irq_need_timer(uint32_t flags)
140 {
141 return !(flags & (HVM_IRQ_DPCI_GUEST_MSI | HVM_IRQ_DPCI_TRANSLATE));
142 }
143
pt_irq_guest_eoi(struct domain * d,struct hvm_pirq_dpci * pirq_dpci,void * arg)144 static int pt_irq_guest_eoi(struct domain *d, struct hvm_pirq_dpci *pirq_dpci,
145 void *arg)
146 {
147 if ( __test_and_clear_bit(_HVM_IRQ_DPCI_EOI_LATCH_SHIFT,
148 &pirq_dpci->flags) )
149 {
150 pirq_dpci->masked = 0;
151 pirq_dpci->pending = 0;
152 pirq_guest_eoi(dpci_pirq(pirq_dpci));
153 }
154
155 return 0;
156 }
157
pt_irq_time_out(void * data)158 static void pt_irq_time_out(void *data)
159 {
160 struct hvm_pirq_dpci *irq_map = data;
161 const struct hvm_irq_dpci *dpci;
162 const struct dev_intx_gsi_link *digl;
163
164 spin_lock(&irq_map->dom->event_lock);
165
166 if ( irq_map->flags & HVM_IRQ_DPCI_IDENTITY_GSI )
167 {
168 ASSERT(is_hardware_domain(irq_map->dom));
169 /*
170 * Identity mapped, no need to iterate over the guest GSI list to find
171 * other pirqs sharing the same guest GSI.
172 *
173 * In the identity mapped case the EOI can also be done now, this way
174 * the iteration over the list of domain pirqs is avoided.
175 */
176 hvm_gsi_deassert(irq_map->dom, dpci_pirq(irq_map)->pirq);
177 irq_map->flags |= HVM_IRQ_DPCI_EOI_LATCH;
178 pt_irq_guest_eoi(irq_map->dom, irq_map, NULL);
179 spin_unlock(&irq_map->dom->event_lock);
180 return;
181 }
182
183 dpci = domain_get_irq_dpci(irq_map->dom);
184 if ( unlikely(!dpci) )
185 {
186 ASSERT_UNREACHABLE();
187 spin_unlock(&irq_map->dom->event_lock);
188 return;
189 }
190 list_for_each_entry ( digl, &irq_map->digl_list, list )
191 {
192 unsigned int guest_gsi = hvm_pci_intx_gsi(digl->device, digl->intx);
193 const struct hvm_girq_dpci_mapping *girq;
194
195 list_for_each_entry ( girq, &dpci->girq[guest_gsi], list )
196 {
197 struct pirq *pirq = pirq_info(irq_map->dom, girq->machine_gsi);
198
199 pirq_dpci(pirq)->flags |= HVM_IRQ_DPCI_EOI_LATCH;
200 }
201 hvm_pci_intx_deassert(irq_map->dom, digl->device, digl->intx);
202 }
203
204 pt_pirq_iterate(irq_map->dom, pt_irq_guest_eoi, NULL);
205
206 spin_unlock(&irq_map->dom->event_lock);
207 }
208
domain_get_irq_dpci(const struct domain * d)209 struct hvm_irq_dpci *domain_get_irq_dpci(const struct domain *d)
210 {
211 if ( !d || !is_hvm_domain(d) )
212 return NULL;
213
214 return hvm_domain_irq(d)->dpci;
215 }
216
free_hvm_irq_dpci(struct hvm_irq_dpci * dpci)217 void free_hvm_irq_dpci(struct hvm_irq_dpci *dpci)
218 {
219 xfree(dpci);
220 }
221
222 /*
223 * This routine handles lowest-priority interrupts using vector-hashing
224 * mechanism. As an example, modern Intel CPUs use this method to handle
225 * lowest-priority interrupts.
226 *
227 * Here is the details about the vector-hashing mechanism:
228 * 1. For lowest-priority interrupts, store all the possible destination
229 * vCPUs in an array.
230 * 2. Use "gvec % max number of destination vCPUs" to find the right
231 * destination vCPU in the array for the lowest-priority interrupt.
232 */
vector_hashing_dest(const struct domain * d,uint32_t dest_id,bool dest_mode,uint8_t gvec)233 static struct vcpu *vector_hashing_dest(const struct domain *d,
234 uint32_t dest_id,
235 bool dest_mode,
236 uint8_t gvec)
237
238 {
239 unsigned long *dest_vcpu_bitmap;
240 unsigned int dest_vcpus = 0;
241 struct vcpu *v, *dest = NULL;
242 unsigned int i;
243
244 dest_vcpu_bitmap = xzalloc_array(unsigned long,
245 BITS_TO_LONGS(d->max_vcpus));
246 if ( !dest_vcpu_bitmap )
247 return NULL;
248
249 for_each_vcpu ( d, v )
250 {
251 if ( !vlapic_match_dest(vcpu_vlapic(v), NULL, APIC_DEST_NOSHORT,
252 dest_id, dest_mode) )
253 continue;
254
255 __set_bit(v->vcpu_id, dest_vcpu_bitmap);
256 dest_vcpus++;
257 }
258
259 if ( dest_vcpus != 0 )
260 {
261 unsigned int mod = gvec % dest_vcpus;
262 unsigned int idx = 0;
263
264 for ( i = 0; i <= mod; i++ )
265 {
266 idx = find_next_bit(dest_vcpu_bitmap, d->max_vcpus, idx) + 1;
267 BUG_ON(idx > d->max_vcpus);
268 }
269
270 dest = d->vcpu[idx - 1];
271 }
272
273 xfree(dest_vcpu_bitmap);
274
275 return dest;
276 }
277
pt_irq_create_bind(struct domain * d,const struct xen_domctl_bind_pt_irq * pt_irq_bind)278 int pt_irq_create_bind(
279 struct domain *d, const struct xen_domctl_bind_pt_irq *pt_irq_bind)
280 {
281 struct hvm_irq_dpci *hvm_irq_dpci;
282 struct hvm_pirq_dpci *pirq_dpci;
283 struct pirq *info;
284 int rc, pirq = pt_irq_bind->machine_irq;
285
286 if ( pirq < 0 || pirq >= d->nr_pirqs )
287 return -EINVAL;
288
289 restart:
290 spin_lock(&d->event_lock);
291
292 hvm_irq_dpci = domain_get_irq_dpci(d);
293 if ( !hvm_irq_dpci && !is_hardware_domain(d) )
294 {
295 unsigned int i;
296
297 /*
298 * NB: the hardware domain doesn't use a hvm_irq_dpci struct because
299 * it's only allowed to identity map GSIs, and so the data contained in
300 * that struct (used to map guest GSIs into machine GSIs and perform
301 * interrupt routing) is completely useless to it.
302 */
303 hvm_irq_dpci = xzalloc(struct hvm_irq_dpci);
304 if ( hvm_irq_dpci == NULL )
305 {
306 spin_unlock(&d->event_lock);
307 return -ENOMEM;
308 }
309 for ( i = 0; i < NR_HVM_DOMU_IRQS; i++ )
310 INIT_LIST_HEAD(&hvm_irq_dpci->girq[i]);
311
312 hvm_domain_irq(d)->dpci = hvm_irq_dpci;
313 }
314
315 info = pirq_get_info(d, pirq);
316 if ( !info )
317 {
318 spin_unlock(&d->event_lock);
319 return -ENOMEM;
320 }
321 pirq_dpci = pirq_dpci(info);
322
323 /*
324 * A crude 'while' loop with us dropping the spinlock and giving
325 * the softirq_dpci a chance to run.
326 * We MUST check for this condition as the softirq could be scheduled
327 * and hasn't run yet. Note that this code replaced tasklet_kill which
328 * would have spun forever and would do the same thing (wait to flush out
329 * outstanding hvm_dirq_assist calls.
330 */
331 if ( pt_pirq_softirq_active(pirq_dpci) )
332 {
333 spin_unlock(&d->event_lock);
334 cpu_relax();
335 goto restart;
336 }
337
338 switch ( pt_irq_bind->irq_type )
339 {
340 case PT_IRQ_TYPE_MSI:
341 {
342 uint8_t dest, delivery_mode;
343 bool dest_mode;
344 int dest_vcpu_id;
345 const struct vcpu *vcpu;
346 uint32_t gflags = pt_irq_bind->u.msi.gflags &
347 ~XEN_DOMCTL_VMSI_X86_UNMASKED;
348
349 if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
350 {
351 pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED | HVM_IRQ_DPCI_MACH_MSI |
352 HVM_IRQ_DPCI_GUEST_MSI;
353 pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec;
354 pirq_dpci->gmsi.gflags = gflags;
355 /*
356 * 'pt_irq_create_bind' can be called after 'pt_irq_destroy_bind'.
357 * The 'pirq_cleanup_check' which would free the structure is only
358 * called if the event channel for the PIRQ is active. However
359 * OS-es that use event channels usually bind PIRQs to eventds
360 * and unbind them before calling 'pt_irq_destroy_bind' - with the
361 * result that we re-use the 'dpci' structure. This can be
362 * reproduced with unloading and loading the driver for a device.
363 *
364 * As such on every 'pt_irq_create_bind' call we MUST set it.
365 */
366 pirq_dpci->dom = d;
367 /* bind after hvm_irq_dpci is setup to avoid race with irq handler*/
368 rc = pirq_guest_bind(d->vcpu[0], info, 0);
369 if ( rc == 0 && pt_irq_bind->u.msi.gtable )
370 {
371 rc = msixtbl_pt_register(d, info, pt_irq_bind->u.msi.gtable);
372 if ( unlikely(rc) )
373 {
374 pirq_guest_unbind(d, info);
375 /*
376 * Between 'pirq_guest_bind' and before 'pirq_guest_unbind'
377 * an interrupt can be scheduled. No more of them are going
378 * to be scheduled but we must deal with the one that may be
379 * in the queue.
380 */
381 pt_pirq_softirq_reset(pirq_dpci);
382 }
383 }
384 if ( unlikely(rc) )
385 {
386 pirq_dpci->gmsi.gflags = 0;
387 pirq_dpci->gmsi.gvec = 0;
388 pirq_dpci->dom = NULL;
389 pirq_dpci->flags = 0;
390 pirq_cleanup_check(info, d);
391 spin_unlock(&d->event_lock);
392 return rc;
393 }
394 }
395 else
396 {
397 uint32_t mask = HVM_IRQ_DPCI_MACH_MSI | HVM_IRQ_DPCI_GUEST_MSI;
398
399 if ( (pirq_dpci->flags & mask) != mask )
400 {
401 spin_unlock(&d->event_lock);
402 return -EBUSY;
403 }
404
405 /* If pirq is already mapped as vmsi, update guest data/addr. */
406 if ( pirq_dpci->gmsi.gvec != pt_irq_bind->u.msi.gvec ||
407 pirq_dpci->gmsi.gflags != gflags )
408 {
409 /* Directly clear pending EOIs before enabling new MSI info. */
410 pirq_guest_eoi(info);
411
412 pirq_dpci->gmsi.gvec = pt_irq_bind->u.msi.gvec;
413 pirq_dpci->gmsi.gflags = gflags;
414 }
415 }
416 /* Calculate dest_vcpu_id for MSI-type pirq migration. */
417 dest = MASK_EXTR(pirq_dpci->gmsi.gflags,
418 XEN_DOMCTL_VMSI_X86_DEST_ID_MASK);
419 dest_mode = pirq_dpci->gmsi.gflags & XEN_DOMCTL_VMSI_X86_DM_MASK;
420 delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags,
421 XEN_DOMCTL_VMSI_X86_DELIV_MASK);
422
423 dest_vcpu_id = hvm_girq_dest_2_vcpu_id(d, dest, dest_mode);
424 pirq_dpci->gmsi.dest_vcpu_id = dest_vcpu_id;
425 spin_unlock(&d->event_lock);
426
427 pirq_dpci->gmsi.posted = false;
428 vcpu = (dest_vcpu_id >= 0) ? d->vcpu[dest_vcpu_id] : NULL;
429 if ( iommu_intpost )
430 {
431 if ( delivery_mode == dest_LowestPrio )
432 vcpu = vector_hashing_dest(d, dest, dest_mode,
433 pirq_dpci->gmsi.gvec);
434 if ( vcpu )
435 pirq_dpci->gmsi.posted = true;
436 }
437 if ( dest_vcpu_id >= 0 )
438 hvm_migrate_pirqs(d->vcpu[dest_vcpu_id]);
439
440 /* Use interrupt posting if it is supported. */
441 if ( iommu_intpost )
442 pi_update_irte(vcpu ? &vcpu->arch.hvm_vmx.pi_desc : NULL,
443 info, pirq_dpci->gmsi.gvec);
444
445 if ( pt_irq_bind->u.msi.gflags & XEN_DOMCTL_VMSI_X86_UNMASKED )
446 {
447 unsigned long flags;
448 struct irq_desc *desc = pirq_spin_lock_irq_desc(info, &flags);
449
450 if ( !desc )
451 {
452 pt_irq_destroy_bind(d, pt_irq_bind);
453 return -EINVAL;
454 }
455
456 guest_mask_msi_irq(desc, false);
457 spin_unlock_irqrestore(&desc->lock, flags);
458 }
459
460 break;
461 }
462
463 case PT_IRQ_TYPE_PCI:
464 case PT_IRQ_TYPE_MSI_TRANSLATE:
465 {
466 struct dev_intx_gsi_link *digl = NULL;
467 struct hvm_girq_dpci_mapping *girq = NULL;
468 unsigned int guest_gsi;
469
470 /*
471 * Mapping GSIs for the hardware domain is different than doing it for
472 * an unpriviledged guest, the hardware domain is only allowed to
473 * identity map GSIs, and as such all the data in the u.pci union is
474 * discarded.
475 */
476 if ( hvm_irq_dpci )
477 {
478 unsigned int link;
479
480 digl = xmalloc(struct dev_intx_gsi_link);
481 girq = xmalloc(struct hvm_girq_dpci_mapping);
482
483 if ( !digl || !girq )
484 {
485 spin_unlock(&d->event_lock);
486 xfree(girq);
487 xfree(digl);
488 return -ENOMEM;
489 }
490
491 girq->bus = digl->bus = pt_irq_bind->u.pci.bus;
492 girq->device = digl->device = pt_irq_bind->u.pci.device;
493 girq->intx = digl->intx = pt_irq_bind->u.pci.intx;
494 list_add_tail(&digl->list, &pirq_dpci->digl_list);
495
496 guest_gsi = hvm_pci_intx_gsi(digl->device, digl->intx);
497 link = hvm_pci_intx_link(digl->device, digl->intx);
498
499 hvm_irq_dpci->link_cnt[link]++;
500
501 girq->machine_gsi = pirq;
502 list_add_tail(&girq->list, &hvm_irq_dpci->girq[guest_gsi]);
503 }
504 else
505 {
506 ASSERT(is_hardware_domain(d));
507
508 /* MSI_TRANSLATE is not supported for the hardware domain. */
509 if ( pt_irq_bind->irq_type != PT_IRQ_TYPE_PCI ||
510 pirq >= hvm_domain_irq(d)->nr_gsis )
511 {
512 spin_unlock(&d->event_lock);
513
514 return -EINVAL;
515 }
516 guest_gsi = pirq;
517 }
518
519 /* Bind the same mirq once in the same domain */
520 if ( !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
521 {
522 unsigned int share;
523
524 /* MUST be set, as the pirq_dpci can be re-used. */
525 pirq_dpci->dom = d;
526 if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI_TRANSLATE )
527 {
528 pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED |
529 HVM_IRQ_DPCI_MACH_MSI |
530 HVM_IRQ_DPCI_GUEST_PCI |
531 HVM_IRQ_DPCI_TRANSLATE;
532 share = 0;
533 }
534 else /* PT_IRQ_TYPE_PCI */
535 {
536 pirq_dpci->flags = HVM_IRQ_DPCI_MAPPED |
537 HVM_IRQ_DPCI_MACH_PCI |
538 HVM_IRQ_DPCI_GUEST_PCI;
539 if ( !is_hardware_domain(d) )
540 share = BIND_PIRQ__WILL_SHARE;
541 else
542 {
543 int mask = vioapic_get_mask(d, guest_gsi);
544 int trigger_mode = vioapic_get_trigger_mode(d, guest_gsi);
545
546 if ( mask < 0 || trigger_mode < 0 )
547 {
548 spin_unlock(&d->event_lock);
549
550 ASSERT_UNREACHABLE();
551 return -EINVAL;
552 }
553 pirq_dpci->flags |= HVM_IRQ_DPCI_IDENTITY_GSI;
554 /*
555 * Check if the corresponding vIO APIC pin is configured
556 * level or edge trigger, level triggered interrupts will
557 * be marked as shareable.
558 */
559 ASSERT(!mask);
560 share = trigger_mode;
561 }
562 }
563
564 /* Init timer before binding */
565 if ( pt_irq_need_timer(pirq_dpci->flags) )
566 init_timer(&pirq_dpci->timer, pt_irq_time_out, pirq_dpci, 0);
567 /* Deal with gsi for legacy devices */
568 rc = pirq_guest_bind(d->vcpu[0], info, share);
569 if ( unlikely(rc) )
570 {
571 if ( pt_irq_need_timer(pirq_dpci->flags) )
572 kill_timer(&pirq_dpci->timer);
573 /*
574 * There is no path for __do_IRQ to schedule softirq as
575 * IRQ_GUEST is not set. As such we can reset 'dom' directly.
576 */
577 pirq_dpci->dom = NULL;
578 if ( hvm_irq_dpci )
579 {
580 unsigned int link;
581
582 ASSERT(girq && digl);
583 list_del(&girq->list);
584 list_del(&digl->list);
585 link = hvm_pci_intx_link(digl->device, digl->intx);
586 hvm_irq_dpci->link_cnt[link]--;
587 }
588 pirq_dpci->flags = 0;
589 pirq_cleanup_check(info, d);
590 spin_unlock(&d->event_lock);
591 xfree(girq);
592 xfree(digl);
593 return rc;
594 }
595 }
596
597 spin_unlock(&d->event_lock);
598
599 if ( iommu_verbose )
600 {
601 char buf[24] = "";
602
603 if ( digl )
604 snprintf(buf, ARRAY_SIZE(buf), " dev=%02x.%02x.%u intx=%u",
605 digl->bus, PCI_SLOT(digl->device),
606 PCI_FUNC(digl->device), digl->intx);
607
608 printk(XENLOG_G_INFO "d%d: bind: m_gsi=%u g_gsi=%u%s\n",
609 d->domain_id, pirq, guest_gsi, buf);
610 }
611 break;
612 }
613
614 default:
615 spin_unlock(&d->event_lock);
616 return -EOPNOTSUPP;
617 }
618
619 return 0;
620 }
621
pt_irq_destroy_bind(struct domain * d,const struct xen_domctl_bind_pt_irq * pt_irq_bind)622 int pt_irq_destroy_bind(
623 struct domain *d, const struct xen_domctl_bind_pt_irq *pt_irq_bind)
624 {
625 struct hvm_irq_dpci *hvm_irq_dpci;
626 struct hvm_pirq_dpci *pirq_dpci;
627 unsigned int machine_gsi = pt_irq_bind->machine_irq;
628 struct pirq *pirq;
629 const char *what = NULL;
630
631 switch ( pt_irq_bind->irq_type )
632 {
633 case PT_IRQ_TYPE_PCI:
634 case PT_IRQ_TYPE_MSI_TRANSLATE:
635 if ( iommu_verbose )
636 {
637 unsigned int device = pt_irq_bind->u.pci.device;
638 unsigned int intx = pt_irq_bind->u.pci.intx;
639
640 printk(XENLOG_G_INFO
641 "d%d: unbind: m_gsi=%u g_gsi=%u dev=%02x:%02x.%u intx=%u\n",
642 d->domain_id, machine_gsi, hvm_pci_intx_gsi(device, intx),
643 pt_irq_bind->u.pci.bus,
644 PCI_SLOT(device), PCI_FUNC(device), intx);
645 }
646 break;
647 case PT_IRQ_TYPE_MSI:
648 break;
649 default:
650 return -EOPNOTSUPP;
651 }
652
653 spin_lock(&d->event_lock);
654
655 hvm_irq_dpci = domain_get_irq_dpci(d);
656
657 if ( !hvm_irq_dpci && !is_hardware_domain(d) )
658 {
659 spin_unlock(&d->event_lock);
660 return -EINVAL;
661 }
662
663 pirq = pirq_info(d, machine_gsi);
664 pirq_dpci = pirq_dpci(pirq);
665
666 if ( hvm_irq_dpci && pt_irq_bind->irq_type != PT_IRQ_TYPE_MSI )
667 {
668 unsigned int bus = pt_irq_bind->u.pci.bus;
669 unsigned int device = pt_irq_bind->u.pci.device;
670 unsigned int intx = pt_irq_bind->u.pci.intx;
671 unsigned int guest_gsi = hvm_pci_intx_gsi(device, intx);
672 unsigned int link = hvm_pci_intx_link(device, intx);
673 struct hvm_girq_dpci_mapping *girq;
674 struct dev_intx_gsi_link *digl, *tmp;
675
676 list_for_each_entry ( girq, &hvm_irq_dpci->girq[guest_gsi], list )
677 {
678 if ( girq->bus == bus &&
679 girq->device == device &&
680 girq->intx == intx &&
681 girq->machine_gsi == machine_gsi )
682 {
683 list_del(&girq->list);
684 xfree(girq);
685 girq = NULL;
686 break;
687 }
688 }
689
690 if ( girq )
691 {
692 spin_unlock(&d->event_lock);
693 return -EINVAL;
694 }
695
696 hvm_irq_dpci->link_cnt[link]--;
697
698 /* clear the mirq info */
699 if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
700 {
701 list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list )
702 {
703 if ( digl->bus == bus &&
704 digl->device == device &&
705 digl->intx == intx )
706 {
707 list_del(&digl->list);
708 xfree(digl);
709 }
710 }
711 what = list_empty(&pirq_dpci->digl_list) ? "final" : "partial";
712 }
713 else
714 what = "bogus";
715 }
716 else if ( pirq_dpci && pirq_dpci->gmsi.posted )
717 pi_update_irte(NULL, pirq, 0);
718
719 if ( pirq_dpci && (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) &&
720 list_empty(&pirq_dpci->digl_list) )
721 {
722 pirq_guest_unbind(d, pirq);
723 msixtbl_pt_unregister(d, pirq);
724 if ( pt_irq_need_timer(pirq_dpci->flags) )
725 kill_timer(&pirq_dpci->timer);
726 pirq_dpci->flags = 0;
727 /*
728 * See comment in pt_irq_create_bind's PT_IRQ_TYPE_MSI before the
729 * call to pt_pirq_softirq_reset.
730 */
731 pt_pirq_softirq_reset(pirq_dpci);
732
733 pirq_cleanup_check(pirq, d);
734 }
735
736 spin_unlock(&d->event_lock);
737
738 if ( what && iommu_verbose )
739 {
740 unsigned int device = pt_irq_bind->u.pci.device;
741 char buf[24] = "";
742
743 if ( hvm_irq_dpci )
744 snprintf(buf, ARRAY_SIZE(buf), " dev=%02x.%02x.%u intx=%u",
745 pt_irq_bind->u.pci.bus, PCI_SLOT(device),
746 PCI_FUNC(device), pt_irq_bind->u.pci.intx);
747
748 printk(XENLOG_G_INFO "d%d %s unmap: m_irq=%u%s\n",
749 d->domain_id, what, machine_gsi, buf);
750 }
751
752 return 0;
753 }
754
pt_pirq_init(struct domain * d,struct hvm_pirq_dpci * dpci)755 void pt_pirq_init(struct domain *d, struct hvm_pirq_dpci *dpci)
756 {
757 INIT_LIST_HEAD(&dpci->digl_list);
758 dpci->gmsi.dest_vcpu_id = -1;
759 }
760
pt_pirq_cleanup_check(struct hvm_pirq_dpci * dpci)761 bool pt_pirq_cleanup_check(struct hvm_pirq_dpci *dpci)
762 {
763 if ( !dpci->flags && !pt_pirq_softirq_active(dpci) )
764 {
765 dpci->dom = NULL;
766 return true;
767 }
768 return false;
769 }
770
pt_pirq_iterate(struct domain * d,int (* cb)(struct domain *,struct hvm_pirq_dpci *,void *),void * arg)771 int pt_pirq_iterate(struct domain *d,
772 int (*cb)(struct domain *,
773 struct hvm_pirq_dpci *, void *),
774 void *arg)
775 {
776 int rc = 0;
777 unsigned int pirq = 0, n, i;
778 struct pirq *pirqs[8];
779
780 ASSERT(spin_is_locked(&d->event_lock));
781
782 do {
783 n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq,
784 ARRAY_SIZE(pirqs));
785 for ( i = 0; i < n; ++i )
786 {
787 struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirqs[i]);
788
789 pirq = pirqs[i]->pirq;
790 if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
791 rc = cb(d, pirq_dpci, arg);
792 }
793 } while ( !rc && ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) );
794
795 return rc;
796 }
797
hvm_do_IRQ_dpci(struct domain * d,struct pirq * pirq)798 int hvm_do_IRQ_dpci(struct domain *d, struct pirq *pirq)
799 {
800 struct hvm_irq_dpci *dpci = domain_get_irq_dpci(d);
801 struct hvm_pirq_dpci *pirq_dpci = pirq_dpci(pirq);
802
803 ASSERT(is_hvm_domain(d));
804
805 if ( !iommu_enabled || (!is_hardware_domain(d) && !dpci) ||
806 !pirq_dpci || !(pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) )
807 return 0;
808
809 pirq_dpci->masked = 1;
810 raise_softirq_for(pirq_dpci);
811 return 1;
812 }
813
814 /* called with d->event_lock held */
__msi_pirq_eoi(struct hvm_pirq_dpci * pirq_dpci)815 static void __msi_pirq_eoi(struct hvm_pirq_dpci *pirq_dpci)
816 {
817 irq_desc_t *desc;
818
819 if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MAPPED) &&
820 (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) )
821 {
822 struct pirq *pirq = dpci_pirq(pirq_dpci);
823
824 BUG_ON(!local_irq_is_enabled());
825 desc = pirq_spin_lock_irq_desc(pirq, NULL);
826 if ( !desc )
827 return;
828 desc_guest_eoi(desc, pirq);
829 }
830 }
831
_hvm_dpci_msi_eoi(struct domain * d,struct hvm_pirq_dpci * pirq_dpci,void * arg)832 static int _hvm_dpci_msi_eoi(struct domain *d,
833 struct hvm_pirq_dpci *pirq_dpci, void *arg)
834 {
835 int vector = (long)arg;
836
837 if ( (pirq_dpci->flags & HVM_IRQ_DPCI_MACH_MSI) &&
838 (pirq_dpci->gmsi.gvec == vector) )
839 {
840 unsigned int dest = MASK_EXTR(pirq_dpci->gmsi.gflags,
841 XEN_DOMCTL_VMSI_X86_DEST_ID_MASK);
842 bool dest_mode = pirq_dpci->gmsi.gflags & XEN_DOMCTL_VMSI_X86_DM_MASK;
843
844 if ( vlapic_match_dest(vcpu_vlapic(current), NULL, 0, dest,
845 dest_mode) )
846 {
847 __msi_pirq_eoi(pirq_dpci);
848 return 1;
849 }
850 }
851
852 return 0;
853 }
854
hvm_dpci_msi_eoi(struct domain * d,int vector)855 void hvm_dpci_msi_eoi(struct domain *d, int vector)
856 {
857 if ( !iommu_enabled || !hvm_domain_irq(d)->dpci )
858 return;
859
860 spin_lock(&d->event_lock);
861 pt_pirq_iterate(d, _hvm_dpci_msi_eoi, (void *)(long)vector);
862 spin_unlock(&d->event_lock);
863 }
864
hvm_dirq_assist(struct domain * d,struct hvm_pirq_dpci * pirq_dpci)865 static void hvm_dirq_assist(struct domain *d, struct hvm_pirq_dpci *pirq_dpci)
866 {
867 if ( unlikely(!hvm_domain_irq(d)->dpci) && !is_hardware_domain(d) )
868 {
869 ASSERT_UNREACHABLE();
870 return;
871 }
872
873 spin_lock(&d->event_lock);
874 if ( test_and_clear_bool(pirq_dpci->masked) )
875 {
876 struct pirq *pirq = dpci_pirq(pirq_dpci);
877 const struct dev_intx_gsi_link *digl;
878
879 if ( hvm_domain_use_pirq(d, pirq) )
880 {
881 send_guest_pirq(d, pirq);
882
883 if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
884 {
885 spin_unlock(&d->event_lock);
886 return;
887 }
888 }
889
890 if ( pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI )
891 {
892 vmsi_deliver_pirq(d, pirq_dpci);
893 spin_unlock(&d->event_lock);
894 return;
895 }
896
897 list_for_each_entry ( digl, &pirq_dpci->digl_list, list )
898 {
899 ASSERT(!(pirq_dpci->flags & HVM_IRQ_DPCI_IDENTITY_GSI));
900 hvm_pci_intx_assert(d, digl->device, digl->intx);
901 pirq_dpci->pending++;
902 }
903
904 if ( pirq_dpci->flags & HVM_IRQ_DPCI_IDENTITY_GSI )
905 {
906 hvm_gsi_assert(d, pirq->pirq);
907 pirq_dpci->pending++;
908 }
909
910 if ( pirq_dpci->flags & HVM_IRQ_DPCI_TRANSLATE )
911 {
912 /* for translated MSI to INTx interrupt, eoi as early as possible */
913 __msi_pirq_eoi(pirq_dpci);
914 spin_unlock(&d->event_lock);
915 return;
916 }
917
918 /*
919 * Set a timer to see if the guest can finish the interrupt or not. For
920 * example, the guest OS may unmask the PIC during boot, before the
921 * guest driver is loaded. hvm_pci_intx_assert() may succeed, but the
922 * guest will never deal with the irq, then the physical interrupt line
923 * will never be deasserted.
924 */
925 ASSERT(pt_irq_need_timer(pirq_dpci->flags));
926 set_timer(&pirq_dpci->timer, NOW() + PT_IRQ_TIME_OUT);
927 }
928 spin_unlock(&d->event_lock);
929 }
930
hvm_pirq_eoi(struct pirq * pirq,const union vioapic_redir_entry * ent)931 static void hvm_pirq_eoi(struct pirq *pirq,
932 const union vioapic_redir_entry *ent)
933 {
934 struct hvm_pirq_dpci *pirq_dpci;
935
936 if ( !pirq )
937 {
938 ASSERT_UNREACHABLE();
939 return;
940 }
941
942 pirq_dpci = pirq_dpci(pirq);
943
944 /*
945 * No need to get vector lock for timer
946 * since interrupt is still not EOIed
947 */
948 if ( --pirq_dpci->pending ||
949 (ent && ent->fields.mask) ||
950 !pt_irq_need_timer(pirq_dpci->flags) )
951 return;
952
953 stop_timer(&pirq_dpci->timer);
954 pirq_guest_eoi(pirq);
955 }
956
__hvm_dpci_eoi(struct domain * d,const struct hvm_girq_dpci_mapping * girq,const union vioapic_redir_entry * ent)957 static void __hvm_dpci_eoi(struct domain *d,
958 const struct hvm_girq_dpci_mapping *girq,
959 const union vioapic_redir_entry *ent)
960 {
961 struct pirq *pirq = pirq_info(d, girq->machine_gsi);
962
963 if ( !hvm_domain_use_pirq(d, pirq) )
964 hvm_pci_intx_deassert(d, girq->device, girq->intx);
965
966 hvm_pirq_eoi(pirq, ent);
967 }
968
hvm_gsi_eoi(struct domain * d,unsigned int gsi,const union vioapic_redir_entry * ent)969 static void hvm_gsi_eoi(struct domain *d, unsigned int gsi,
970 const union vioapic_redir_entry *ent)
971 {
972 struct pirq *pirq = pirq_info(d, gsi);
973
974 /* Check if GSI is actually mapped. */
975 if ( !pirq_dpci(pirq) )
976 return;
977
978 hvm_gsi_deassert(d, gsi);
979 hvm_pirq_eoi(pirq, ent);
980 }
981
hvm_dpci_eoi(struct domain * d,unsigned int guest_gsi,const union vioapic_redir_entry * ent)982 void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
983 const union vioapic_redir_entry *ent)
984 {
985 const struct hvm_irq_dpci *hvm_irq_dpci;
986 const struct hvm_girq_dpci_mapping *girq;
987
988 if ( !iommu_enabled )
989 return;
990
991 if ( is_hardware_domain(d) )
992 {
993 spin_lock(&d->event_lock);
994 hvm_gsi_eoi(d, guest_gsi, ent);
995 goto unlock;
996 }
997
998 if ( guest_gsi < NR_ISAIRQS )
999 {
1000 hvm_dpci_isairq_eoi(d, guest_gsi);
1001 return;
1002 }
1003
1004 spin_lock(&d->event_lock);
1005 hvm_irq_dpci = domain_get_irq_dpci(d);
1006
1007 if ( !hvm_irq_dpci )
1008 goto unlock;
1009
1010 list_for_each_entry ( girq, &hvm_irq_dpci->girq[guest_gsi], list )
1011 __hvm_dpci_eoi(d, girq, ent);
1012
1013 unlock:
1014 spin_unlock(&d->event_lock);
1015 }
1016
1017 /*
1018 * Note: 'pt_pirq_softirq_reset' can clear the STATE_SCHED before we get to
1019 * doing it. If that is the case we let 'pt_pirq_softirq_reset' do ref-counting.
1020 */
dpci_softirq(void)1021 static void dpci_softirq(void)
1022 {
1023 unsigned int cpu = smp_processor_id();
1024 LIST_HEAD(our_list);
1025
1026 local_irq_disable();
1027 list_splice_init(&per_cpu(dpci_list, cpu), &our_list);
1028 local_irq_enable();
1029
1030 while ( !list_empty(&our_list) )
1031 {
1032 struct hvm_pirq_dpci *pirq_dpci;
1033 struct domain *d;
1034
1035 pirq_dpci = list_entry(our_list.next, struct hvm_pirq_dpci, softirq_list);
1036 list_del(&pirq_dpci->softirq_list);
1037
1038 d = pirq_dpci->dom;
1039 smp_mb(); /* 'd' MUST be saved before we set/clear the bits. */
1040 if ( test_and_set_bit(STATE_RUN, &pirq_dpci->state) )
1041 {
1042 unsigned long flags;
1043
1044 /* Put back on the list and retry. */
1045 local_irq_save(flags);
1046 list_add_tail(&pirq_dpci->softirq_list, &this_cpu(dpci_list));
1047 local_irq_restore(flags);
1048
1049 raise_softirq(HVM_DPCI_SOFTIRQ);
1050 continue;
1051 }
1052 /*
1053 * The one who clears STATE_SCHED MUST refcount the domain.
1054 */
1055 if ( test_and_clear_bit(STATE_SCHED, &pirq_dpci->state) )
1056 {
1057 hvm_dirq_assist(d, pirq_dpci);
1058 put_domain(d);
1059 }
1060 clear_bit(STATE_RUN, &pirq_dpci->state);
1061 }
1062 }
1063
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)1064 static int cpu_callback(
1065 struct notifier_block *nfb, unsigned long action, void *hcpu)
1066 {
1067 unsigned int cpu = (unsigned long)hcpu;
1068
1069 switch ( action )
1070 {
1071 case CPU_UP_PREPARE:
1072 INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
1073 break;
1074 case CPU_UP_CANCELED:
1075 case CPU_DEAD:
1076 /*
1077 * On CPU_DYING this callback is called (on the CPU that is dying)
1078 * with an possible HVM_DPIC_SOFTIRQ pending - at which point we can
1079 * clear out any outstanding domains (by the virtue of the idle loop
1080 * calling the softirq later). In CPU_DEAD case the CPU is deaf and
1081 * there are no pending softirqs for us to handle so we can chill.
1082 */
1083 ASSERT(list_empty(&per_cpu(dpci_list, cpu)));
1084 break;
1085 }
1086
1087 return NOTIFY_DONE;
1088 }
1089
1090 static struct notifier_block cpu_nfb = {
1091 .notifier_call = cpu_callback,
1092 };
1093
setup_dpci_softirq(void)1094 static int __init setup_dpci_softirq(void)
1095 {
1096 unsigned int cpu;
1097
1098 for_each_online_cpu(cpu)
1099 INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
1100
1101 open_softirq(HVM_DPCI_SOFTIRQ, dpci_softirq);
1102 register_cpu_notifier(&cpu_nfb);
1103 return 0;
1104 }
1105 __initcall(setup_dpci_softirq);
1106