1 /******************************************************************************
2  * arch/x86/irq.c
3  *
4  * Portions of this file are:
5  *  Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
6  */
7 
8 #include <xen/init.h>
9 #include <xen/delay.h>
10 #include <xen/errno.h>
11 #include <xen/event.h>
12 #include <xen/irq.h>
13 #include <xen/perfc.h>
14 #include <xen/sched.h>
15 #include <xen/keyhandler.h>
16 #include <xen/compat.h>
17 #include <xen/iocap.h>
18 #include <xen/iommu.h>
19 #include <xen/symbols.h>
20 #include <xen/trace.h>
21 #include <xen/softirq.h>
22 #include <xsm/xsm.h>
23 #include <asm/msi.h>
24 #include <asm/current.h>
25 #include <asm/flushtlb.h>
26 #include <asm/mach-generic/mach_apic.h>
27 #include <public/physdev.h>
28 
29 static int parse_irq_vector_map_param(const char *s);
30 
31 /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
32 bool __read_mostly opt_noirqbalance;
33 boolean_param("noirqbalance", opt_noirqbalance);
34 
35 unsigned int __read_mostly nr_irqs_gsi = 16;
36 unsigned int __read_mostly nr_irqs;
37 integer_param("nr_irqs", nr_irqs);
38 
39 /* This default may be changed by the AMD IOMMU code */
40 int __read_mostly opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_DEFAULT;
41 custom_param("irq_vector_map", parse_irq_vector_map_param);
42 
43 vmask_t global_used_vector_map;
44 
45 struct irq_desc __read_mostly *irq_desc = NULL;
46 
47 static DECLARE_BITMAP(used_vectors, NR_VECTORS);
48 
49 static DEFINE_SPINLOCK(vector_lock);
50 
51 DEFINE_PER_CPU(vector_irq_t, vector_irq);
52 
53 DEFINE_PER_CPU(struct cpu_user_regs *, __irq_regs);
54 
55 static LIST_HEAD(irq_ratelimit_list);
56 static DEFINE_SPINLOCK(irq_ratelimit_lock);
57 static struct timer irq_ratelimit_timer;
58 
59 /* irq_ratelimit: the max irq rate allowed in every 10ms, set 0 to disable */
60 static unsigned int __read_mostly irq_ratelimit_threshold = 10000;
61 integer_param("irq_ratelimit", irq_ratelimit_threshold);
62 
parse_irq_vector_map_param(const char * s)63 static int __init parse_irq_vector_map_param(const char *s)
64 {
65     const char *ss;
66     int rc = 0;
67 
68     do {
69         ss = strchr(s, ',');
70         if ( !ss )
71             ss = strchr(s, '\0');
72 
73         if ( !strncmp(s, "none", ss - s))
74             opt_irq_vector_map=OPT_IRQ_VECTOR_MAP_NONE;
75         else if ( !strncmp(s, "global", ss - s))
76             opt_irq_vector_map=OPT_IRQ_VECTOR_MAP_GLOBAL;
77         else if ( !strncmp(s, "per-device", ss - s))
78             opt_irq_vector_map=OPT_IRQ_VECTOR_MAP_PERDEV;
79         else
80             rc = -EINVAL;
81 
82         s = ss + 1;
83     } while ( *ss );
84 
85     return rc;
86 }
87 
88 /* Must be called when irq disabled */
lock_vector_lock(void)89 void lock_vector_lock(void)
90 {
91     /* Used to the online set of cpus does not change
92      * during assign_irq_vector.
93      */
94     spin_lock(&vector_lock);
95 }
96 
unlock_vector_lock(void)97 void unlock_vector_lock(void)
98 {
99     spin_unlock(&vector_lock);
100 }
101 
trace_irq_mask(u32 event,int irq,int vector,cpumask_t * mask)102 static void trace_irq_mask(u32 event, int irq, int vector, cpumask_t *mask)
103 {
104     struct {
105         unsigned int irq:16, vec:16;
106         unsigned int mask[6];
107     } d;
108     d.irq = irq;
109     d.vec = vector;
110     memset(d.mask, 0, sizeof(d.mask));
111     memcpy(d.mask, mask, min(sizeof(d.mask), sizeof(cpumask_t)));
112     trace_var(event, 1, sizeof(d), &d);
113 }
114 
__bind_irq_vector(int irq,int vector,const cpumask_t * cpu_mask)115 static int __init __bind_irq_vector(int irq, int vector, const cpumask_t *cpu_mask)
116 {
117     cpumask_t online_mask;
118     int cpu;
119     struct irq_desc *desc = irq_to_desc(irq);
120 
121     BUG_ON((unsigned)irq >= nr_irqs);
122     BUG_ON((unsigned)vector >= NR_VECTORS);
123 
124     cpumask_and(&online_mask, cpu_mask, &cpu_online_map);
125     if (cpumask_empty(&online_mask))
126         return -EINVAL;
127     if ( (desc->arch.vector == vector) &&
128          cpumask_equal(desc->arch.cpu_mask, &online_mask) )
129         return 0;
130     if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
131         return -EBUSY;
132     trace_irq_mask(TRC_HW_IRQ_BIND_VECTOR, irq, vector, &online_mask);
133     for_each_cpu(cpu, &online_mask)
134         per_cpu(vector_irq, cpu)[vector] = irq;
135     desc->arch.vector = vector;
136     cpumask_copy(desc->arch.cpu_mask, &online_mask);
137     if ( desc->arch.used_vectors )
138     {
139         ASSERT(!test_bit(vector, desc->arch.used_vectors));
140         set_bit(vector, desc->arch.used_vectors);
141     }
142     desc->arch.used = IRQ_USED;
143     return 0;
144 }
145 
bind_irq_vector(int irq,int vector,const cpumask_t * cpu_mask)146 int __init bind_irq_vector(int irq, int vector, const cpumask_t *cpu_mask)
147 {
148     unsigned long flags;
149     int ret;
150 
151     spin_lock_irqsave(&vector_lock, flags);
152     ret = __bind_irq_vector(irq, vector, cpu_mask);
153     spin_unlock_irqrestore(&vector_lock, flags);
154     return ret;
155 }
156 
157 /*
158  * Dynamic irq allocate and deallocation for MSI
159  */
create_irq(nodeid_t node)160 int create_irq(nodeid_t node)
161 {
162     int irq, ret;
163     struct irq_desc *desc;
164 
165     for (irq = nr_irqs_gsi; irq < nr_irqs; irq++)
166     {
167         desc = irq_to_desc(irq);
168         if (cmpxchg(&desc->arch.used, IRQ_UNUSED, IRQ_RESERVED) == IRQ_UNUSED)
169            break;
170     }
171 
172     if (irq >= nr_irqs)
173          return -ENOSPC;
174 
175     ret = init_one_irq_desc(desc);
176     if (!ret)
177     {
178         cpumask_t *mask = NULL;
179 
180         if ( node != NUMA_NO_NODE )
181         {
182             mask = &node_to_cpumask(node);
183             if (cpumask_empty(mask))
184                 mask = NULL;
185         }
186         ret = assign_irq_vector(irq, mask);
187     }
188     if (ret < 0)
189     {
190         desc->arch.used = IRQ_UNUSED;
191         irq = ret;
192     }
193     else if ( hardware_domain )
194     {
195         ret = irq_permit_access(hardware_domain, irq);
196         if ( ret )
197             printk(XENLOG_G_ERR
198                    "Could not grant Dom0 access to IRQ%d (error %d)\n",
199                    irq, ret);
200     }
201 
202     return irq;
203 }
204 
destroy_irq(unsigned int irq)205 void destroy_irq(unsigned int irq)
206 {
207     struct irq_desc *desc = irq_to_desc(irq);
208     unsigned long flags;
209     struct irqaction *action;
210 
211     BUG_ON(!MSI_IRQ(irq));
212 
213     if ( hardware_domain )
214     {
215         int err = irq_deny_access(hardware_domain, irq);
216 
217         if ( err )
218             printk(XENLOG_G_ERR
219                    "Could not revoke Dom0 access to IRQ%u (error %d)\n",
220                    irq, err);
221     }
222 
223     spin_lock_irqsave(&desc->lock, flags);
224     desc->status  &= ~IRQ_GUEST;
225     desc->handler->shutdown(desc);
226     desc->status |= IRQ_DISABLED;
227     action = desc->action;
228     desc->action  = NULL;
229     desc->msi_desc = NULL;
230     cpumask_setall(desc->affinity);
231     spin_unlock_irqrestore(&desc->lock, flags);
232 
233     /* Wait to make sure it's not being used on another CPU */
234     do { smp_mb(); } while ( desc->status & IRQ_INPROGRESS );
235 
236     spin_lock_irqsave(&desc->lock, flags);
237     desc->handler = &no_irq_type;
238     clear_irq_vector(irq);
239     desc->arch.used_vectors = NULL;
240     spin_unlock_irqrestore(&desc->lock, flags);
241 
242     xfree(action);
243 }
244 
__clear_irq_vector(int irq)245 static void __clear_irq_vector(int irq)
246 {
247     int cpu, vector, old_vector;
248     cpumask_t tmp_mask;
249     struct irq_desc *desc = irq_to_desc(irq);
250 
251     BUG_ON(!desc->arch.vector);
252 
253     /* Always clear desc->arch.vector */
254     vector = desc->arch.vector;
255     cpumask_and(&tmp_mask, desc->arch.cpu_mask, &cpu_online_map);
256 
257     for_each_cpu(cpu, &tmp_mask) {
258         ASSERT( per_cpu(vector_irq, cpu)[vector] == irq );
259         per_cpu(vector_irq, cpu)[vector] = ~irq;
260     }
261 
262     desc->arch.vector = IRQ_VECTOR_UNASSIGNED;
263     cpumask_clear(desc->arch.cpu_mask);
264 
265     if ( desc->arch.used_vectors )
266     {
267         ASSERT(test_bit(vector, desc->arch.used_vectors));
268         clear_bit(vector, desc->arch.used_vectors);
269     }
270 
271     desc->arch.used = IRQ_UNUSED;
272 
273     trace_irq_mask(TRC_HW_IRQ_CLEAR_VECTOR, irq, vector, &tmp_mask);
274 
275     if ( likely(!desc->arch.move_in_progress) )
276         return;
277 
278     /* If we were in motion, also clear desc->arch.old_vector */
279     old_vector = desc->arch.old_vector;
280     cpumask_and(&tmp_mask, desc->arch.old_cpu_mask, &cpu_online_map);
281 
282     for_each_cpu(cpu, &tmp_mask) {
283         ASSERT( per_cpu(vector_irq, cpu)[old_vector] == irq );
284         TRACE_3D(TRC_HW_IRQ_MOVE_FINISH, irq, old_vector, cpu);
285         per_cpu(vector_irq, cpu)[old_vector] = ~irq;
286     }
287 
288     desc->arch.old_vector = IRQ_VECTOR_UNASSIGNED;
289     cpumask_clear(desc->arch.old_cpu_mask);
290 
291     if ( desc->arch.used_vectors )
292     {
293         ASSERT(test_bit(old_vector, desc->arch.used_vectors));
294         clear_bit(old_vector, desc->arch.used_vectors);
295     }
296 
297     desc->arch.move_in_progress = 0;
298 }
299 
clear_irq_vector(int irq)300 void clear_irq_vector(int irq)
301 {
302     unsigned long flags;
303 
304     spin_lock_irqsave(&vector_lock, flags);
305     __clear_irq_vector(irq);
306     spin_unlock_irqrestore(&vector_lock, flags);
307 }
308 
irq_to_vector(int irq)309 int irq_to_vector(int irq)
310 {
311     int vector = -1;
312 
313     BUG_ON(irq >= nr_irqs || irq < 0);
314 
315     if (IO_APIC_IRQ(irq))
316     {
317         vector = irq_to_desc(irq)->arch.vector;
318         if (vector >= FIRST_LEGACY_VECTOR && vector <= LAST_LEGACY_VECTOR)
319             vector = 0;
320     }
321     else if (MSI_IRQ(irq))
322         vector = irq_to_desc(irq)->arch.vector;
323     else
324         vector = LEGACY_VECTOR(irq);
325 
326     return vector;
327 }
328 
arch_init_one_irq_desc(struct irq_desc * desc)329 int arch_init_one_irq_desc(struct irq_desc *desc)
330 {
331     if ( !zalloc_cpumask_var(&desc->arch.cpu_mask) )
332         return -ENOMEM;
333 
334     if ( !alloc_cpumask_var(&desc->arch.old_cpu_mask) )
335     {
336         free_cpumask_var(desc->arch.cpu_mask);
337         return -ENOMEM;
338     }
339 
340     if ( !alloc_cpumask_var(&desc->arch.pending_mask) )
341     {
342         free_cpumask_var(desc->arch.old_cpu_mask);
343         free_cpumask_var(desc->arch.cpu_mask);
344         return -ENOMEM;
345     }
346 
347     desc->arch.vector = IRQ_VECTOR_UNASSIGNED;
348     desc->arch.old_vector = IRQ_VECTOR_UNASSIGNED;
349 
350     return 0;
351 }
352 
init_irq_data(void)353 int __init init_irq_data(void)
354 {
355     struct irq_desc *desc;
356     int irq, vector;
357 
358     for ( vector = 0; vector < NR_VECTORS; ++vector )
359         this_cpu(vector_irq)[vector] = INT_MIN;
360 
361     irq_desc = xzalloc_array(struct irq_desc, nr_irqs);
362 
363     if ( !irq_desc )
364         return -ENOMEM;
365 
366     for ( irq = 0; irq < nr_irqs_gsi; irq++ )
367     {
368         desc = irq_to_desc(irq);
369         desc->irq = irq;
370         init_one_irq_desc(desc);
371     }
372     for ( ; irq < nr_irqs; irq++ )
373         irq_to_desc(irq)->irq = irq;
374 
375 #ifdef CONFIG_PV
376     /* Never allocate the hypercall vector or Linux/BSD fast-trap vector. */
377     set_bit(LEGACY_SYSCALL_VECTOR, used_vectors);
378     set_bit(HYPERCALL_VECTOR, used_vectors);
379 #endif
380 
381     /* IRQ_MOVE_CLEANUP_VECTOR used for clean up vectors */
382     set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
383 
384     return 0;
385 }
386 
387 static void __do_IRQ_guest(int vector);
388 
ack_none(struct irq_desc * desc)389 static void ack_none(struct irq_desc *desc)
390 {
391     ack_bad_irq(desc->irq);
392 }
393 
394 hw_irq_controller no_irq_type = {
395     "none",
396     irq_startup_none,
397     irq_shutdown_none,
398     irq_enable_none,
399     irq_disable_none,
400     ack_none,
401 };
402 
irq_get_used_vector_mask(int irq)403 static vmask_t *irq_get_used_vector_mask(int irq)
404 {
405     vmask_t *ret = NULL;
406 
407     if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_GLOBAL )
408     {
409         struct irq_desc *desc = irq_to_desc(irq);
410 
411         ret = &global_used_vector_map;
412 
413         if ( desc->arch.used_vectors )
414             printk(XENLOG_INFO "Unassigned IRQ %d already has used_vectors\n",
415                    irq);
416         else
417         {
418             int vector;
419 
420             vector = irq_to_vector(irq);
421             if ( vector > 0 )
422             {
423                 printk(XENLOG_INFO "IRQ %d already assigned vector %d\n",
424                        irq, vector);
425 
426                 ASSERT(!test_bit(vector, ret));
427 
428                 set_bit(vector, ret);
429             }
430         }
431     }
432     else if ( IO_APIC_IRQ(irq) &&
433               opt_irq_vector_map != OPT_IRQ_VECTOR_MAP_NONE )
434     {
435         ret = io_apic_get_used_vector_map(irq);
436     }
437 
438     return ret;
439 }
440 
__assign_irq_vector(int irq,struct irq_desc * desc,const cpumask_t * mask)441 static int __assign_irq_vector(
442     int irq, struct irq_desc *desc, const cpumask_t *mask)
443 {
444     /*
445      * NOTE! The local APIC isn't very good at handling
446      * multiple interrupts at the same interrupt level.
447      * As the interrupt level is determined by taking the
448      * vector number and shifting that right by 4, we
449      * want to spread these out a bit so that they don't
450      * all fall in the same interrupt level.
451      *
452      * Also, we've got to be careful not to trash gate
453      * 0x80, because int 0x80 is hm, kind of importantish. ;)
454      */
455     static int current_vector = FIRST_DYNAMIC_VECTOR, current_offset = 0;
456     int cpu, err, old_vector;
457     cpumask_t tmp_mask;
458     vmask_t *irq_used_vectors = NULL;
459 
460     old_vector = irq_to_vector(irq);
461     if (old_vector > 0) {
462         cpumask_and(&tmp_mask, mask, &cpu_online_map);
463         if (cpumask_intersects(&tmp_mask, desc->arch.cpu_mask)) {
464             desc->arch.vector = old_vector;
465             return 0;
466         }
467     }
468 
469     if ( desc->arch.move_in_progress || desc->arch.move_cleanup_count )
470         return -EAGAIN;
471 
472     err = -ENOSPC;
473 
474     /* This is the only place normal IRQs are ever marked
475      * as "in use".  If they're not in use yet, check to see
476      * if we need to assign a global vector mask. */
477     if ( desc->arch.used == IRQ_USED )
478     {
479         irq_used_vectors = desc->arch.used_vectors;
480     }
481     else
482         irq_used_vectors = irq_get_used_vector_mask(irq);
483 
484     for_each_cpu(cpu, mask) {
485         int new_cpu;
486         int vector, offset;
487 
488         /* Only try and allocate irqs on cpus that are present. */
489         if (!cpu_online(cpu))
490             continue;
491 
492         cpumask_and(&tmp_mask, vector_allocation_cpumask(cpu),
493                     &cpu_online_map);
494 
495         vector = current_vector;
496         offset = current_offset;
497 next:
498         vector += 8;
499         if (vector > LAST_DYNAMIC_VECTOR) {
500             /* If out of vectors on large boxen, must share them. */
501             offset = (offset + 1) % 8;
502             vector = FIRST_DYNAMIC_VECTOR + offset;
503         }
504         if (unlikely(current_vector == vector))
505             continue;
506 
507         if (test_bit(vector, used_vectors))
508             goto next;
509 
510         if (irq_used_vectors
511             && test_bit(vector, irq_used_vectors) )
512             goto next;
513 
514         for_each_cpu(new_cpu, &tmp_mask)
515             if (per_cpu(vector_irq, new_cpu)[vector] >= 0)
516                 goto next;
517         /* Found one! */
518         current_vector = vector;
519         current_offset = offset;
520         if (old_vector > 0) {
521             desc->arch.move_in_progress = 1;
522             cpumask_copy(desc->arch.old_cpu_mask, desc->arch.cpu_mask);
523             desc->arch.old_vector = desc->arch.vector;
524         }
525         trace_irq_mask(TRC_HW_IRQ_ASSIGN_VECTOR, irq, vector, &tmp_mask);
526         for_each_cpu(new_cpu, &tmp_mask)
527             per_cpu(vector_irq, new_cpu)[vector] = irq;
528         desc->arch.vector = vector;
529         cpumask_copy(desc->arch.cpu_mask, &tmp_mask);
530 
531         desc->arch.used = IRQ_USED;
532         ASSERT((desc->arch.used_vectors == NULL)
533                || (desc->arch.used_vectors == irq_used_vectors));
534         desc->arch.used_vectors = irq_used_vectors;
535 
536         if ( desc->arch.used_vectors )
537         {
538             ASSERT(!test_bit(vector, desc->arch.used_vectors));
539 
540             set_bit(vector, desc->arch.used_vectors);
541         }
542 
543         err = 0;
544         break;
545     }
546     return err;
547 }
548 
assign_irq_vector(int irq,const cpumask_t * mask)549 int assign_irq_vector(int irq, const cpumask_t *mask)
550 {
551     int ret;
552     unsigned long flags;
553     struct irq_desc *desc = irq_to_desc(irq);
554 
555     BUG_ON(irq >= nr_irqs || irq <0);
556 
557     spin_lock_irqsave(&vector_lock, flags);
558     ret = __assign_irq_vector(irq, desc, mask ?: TARGET_CPUS);
559     if (!ret) {
560         ret = desc->arch.vector;
561         cpumask_copy(desc->affinity, desc->arch.cpu_mask);
562     }
563     spin_unlock_irqrestore(&vector_lock, flags);
564     return ret;
565 }
566 
567 /*
568  * Initialize vector_irq on a new cpu. This function must be called
569  * with vector_lock held.
570  */
setup_vector_irq(unsigned int cpu)571 void setup_vector_irq(unsigned int cpu)
572 {
573     unsigned int irq, vector;
574 
575     /* Clear vector_irq */
576     for ( vector = 0; vector < NR_VECTORS; ++vector )
577         per_cpu(vector_irq, cpu)[vector] = INT_MIN;
578     /* Mark the inuse vectors */
579     for ( irq = 0; irq < nr_irqs; ++irq )
580     {
581         struct irq_desc *desc = irq_to_desc(irq);
582 
583         if ( !irq_desc_initialized(desc) )
584             continue;
585         vector = irq_to_vector(irq);
586         if ( vector >= FIRST_HIPRIORITY_VECTOR &&
587              vector <= LAST_HIPRIORITY_VECTOR )
588             cpumask_set_cpu(cpu, desc->arch.cpu_mask);
589         else if ( !cpumask_test_cpu(cpu, desc->arch.cpu_mask) )
590             continue;
591         per_cpu(vector_irq, cpu)[vector] = irq;
592     }
593 }
594 
move_masked_irq(struct irq_desc * desc)595 void move_masked_irq(struct irq_desc *desc)
596 {
597     cpumask_t *pending_mask = desc->arch.pending_mask;
598 
599     if (likely(!(desc->status & IRQ_MOVE_PENDING)))
600         return;
601 
602     desc->status &= ~IRQ_MOVE_PENDING;
603 
604     if (unlikely(cpumask_empty(pending_mask)))
605         return;
606 
607     if (!desc->handler->set_affinity)
608         return;
609 
610     /*
611      * If there was a valid mask to work with, please do the disable,
612      * re-program, enable sequence. This is *not* particularly important for
613      * level triggered but in a edge trigger case, we might be setting rte when
614      * an active trigger is comming in. This could cause some ioapics to
615      * mal-function. Being paranoid i guess!
616      *
617      * For correct operation this depends on the caller masking the irqs.
618      */
619     if ( likely(cpumask_intersects(pending_mask, &cpu_online_map)) )
620         desc->handler->set_affinity(desc, pending_mask);
621 
622     cpumask_clear(pending_mask);
623 }
624 
move_native_irq(struct irq_desc * desc)625 void move_native_irq(struct irq_desc *desc)
626 {
627     if (likely(!(desc->status & IRQ_MOVE_PENDING)))
628         return;
629 
630     if (unlikely(desc->status & IRQ_DISABLED))
631         return;
632 
633     desc->handler->disable(desc);
634     move_masked_irq(desc);
635     desc->handler->enable(desc);
636 }
637 
irq_move_cleanup_interrupt(struct cpu_user_regs * regs)638 void irq_move_cleanup_interrupt(struct cpu_user_regs *regs)
639 {
640     unsigned vector, me;
641 
642     ack_APIC_irq();
643 
644     me = smp_processor_id();
645     for ( vector = FIRST_DYNAMIC_VECTOR;
646           vector <= LAST_HIPRIORITY_VECTOR; vector++)
647     {
648         unsigned int irq;
649         unsigned int irr;
650         struct irq_desc *desc;
651         irq = __get_cpu_var(vector_irq)[vector];
652 
653         if ((int)irq < 0)
654             continue;
655 
656         if ( vector >= FIRST_LEGACY_VECTOR && vector <= LAST_LEGACY_VECTOR )
657             continue;
658 
659         desc = irq_to_desc(irq);
660         if (!desc)
661             continue;
662 
663         spin_lock(&desc->lock);
664         if (!desc->arch.move_cleanup_count)
665             goto unlock;
666 
667         if ( vector == desc->arch.vector &&
668              cpumask_test_cpu(me, desc->arch.cpu_mask) )
669             goto unlock;
670 
671         irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
672         /*
673          * Check if the vector that needs to be cleanedup is
674          * registered at the cpu's IRR. If so, then this is not
675          * the best time to clean it up. Lets clean it up in the
676          * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
677          * to myself.
678          */
679         if (irr  & (1 << (vector % 32))) {
680             send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
681             TRACE_3D(TRC_HW_IRQ_MOVE_CLEANUP_DELAY,
682                      irq, vector, smp_processor_id());
683             goto unlock;
684         }
685 
686         TRACE_3D(TRC_HW_IRQ_MOVE_CLEANUP,
687                  irq, vector, smp_processor_id());
688 
689         __get_cpu_var(vector_irq)[vector] = ~irq;
690         desc->arch.move_cleanup_count--;
691 
692         if ( desc->arch.move_cleanup_count == 0 )
693         {
694             desc->arch.old_vector = IRQ_VECTOR_UNASSIGNED;
695             cpumask_clear(desc->arch.old_cpu_mask);
696 
697             if ( desc->arch.used_vectors )
698             {
699                 ASSERT(test_bit(vector, desc->arch.used_vectors));
700                 clear_bit(vector, desc->arch.used_vectors);
701             }
702         }
703 unlock:
704         spin_unlock(&desc->lock);
705     }
706 }
707 
send_cleanup_vector(struct irq_desc * desc)708 static void send_cleanup_vector(struct irq_desc *desc)
709 {
710     cpumask_t cleanup_mask;
711 
712     cpumask_and(&cleanup_mask, desc->arch.old_cpu_mask, &cpu_online_map);
713     desc->arch.move_cleanup_count = cpumask_weight(&cleanup_mask);
714     send_IPI_mask(&cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
715 
716     desc->arch.move_in_progress = 0;
717 }
718 
irq_complete_move(struct irq_desc * desc)719 void irq_complete_move(struct irq_desc *desc)
720 {
721     unsigned vector, me;
722 
723     if (likely(!desc->arch.move_in_progress))
724         return;
725 
726     vector = (u8)get_irq_regs()->entry_vector;
727     me = smp_processor_id();
728 
729     if ( vector == desc->arch.vector &&
730          cpumask_test_cpu(me, desc->arch.cpu_mask) )
731         send_cleanup_vector(desc);
732 }
733 
set_desc_affinity(struct irq_desc * desc,const cpumask_t * mask)734 unsigned int set_desc_affinity(struct irq_desc *desc, const cpumask_t *mask)
735 {
736     unsigned int irq;
737     int ret;
738     unsigned long flags;
739     cpumask_t dest_mask;
740 
741     if (!cpumask_intersects(mask, &cpu_online_map))
742         return BAD_APICID;
743 
744     irq = desc->irq;
745 
746     spin_lock_irqsave(&vector_lock, flags);
747     ret = __assign_irq_vector(irq, desc, mask);
748     spin_unlock_irqrestore(&vector_lock, flags);
749 
750     if (ret < 0)
751         return BAD_APICID;
752 
753     cpumask_copy(desc->affinity, mask);
754     cpumask_and(&dest_mask, mask, desc->arch.cpu_mask);
755 
756     return cpu_mask_to_apicid(&dest_mask);
757 }
758 
759 /* For re-setting irq interrupt affinity for specific irq */
irq_set_affinity(struct irq_desc * desc,const cpumask_t * mask)760 void irq_set_affinity(struct irq_desc *desc, const cpumask_t *mask)
761 {
762     if (!desc->handler->set_affinity)
763         return;
764 
765     ASSERT(spin_is_locked(&desc->lock));
766     desc->status &= ~IRQ_MOVE_PENDING;
767     wmb();
768     cpumask_copy(desc->arch.pending_mask, mask);
769     wmb();
770     desc->status |= IRQ_MOVE_PENDING;
771 }
772 
pirq_set_affinity(struct domain * d,int pirq,const cpumask_t * mask)773 void pirq_set_affinity(struct domain *d, int pirq, const cpumask_t *mask)
774 {
775     unsigned long flags;
776     struct irq_desc *desc = domain_spin_lock_irq_desc(d, pirq, &flags);
777 
778     if ( !desc )
779         return;
780     irq_set_affinity(desc, mask);
781     spin_unlock_irqrestore(&desc->lock, flags);
782 }
783 
784 DEFINE_PER_CPU(unsigned int, irq_count);
785 
alloc_hipriority_vector(void)786 uint8_t alloc_hipriority_vector(void)
787 {
788     static uint8_t next = FIRST_HIPRIORITY_VECTOR;
789     BUG_ON(next < FIRST_HIPRIORITY_VECTOR);
790     BUG_ON(next > LAST_HIPRIORITY_VECTOR);
791     return next++;
792 }
793 
794 static void (*direct_apic_vector[NR_VECTORS])(struct cpu_user_regs *);
set_direct_apic_vector(uint8_t vector,void (* handler)(struct cpu_user_regs *))795 void set_direct_apic_vector(
796     uint8_t vector, void (*handler)(struct cpu_user_regs *))
797 {
798     BUG_ON(direct_apic_vector[vector] != NULL);
799     direct_apic_vector[vector] = handler;
800 }
801 
alloc_direct_apic_vector(uint8_t * vector,void (* handler)(struct cpu_user_regs *))802 void alloc_direct_apic_vector(
803     uint8_t *vector, void (*handler)(struct cpu_user_regs *))
804 {
805     static DEFINE_SPINLOCK(lock);
806 
807     spin_lock(&lock);
808     if (*vector == 0) {
809         *vector = alloc_hipriority_vector();
810         set_direct_apic_vector(*vector, handler);
811     }
812     spin_unlock(&lock);
813 }
814 
do_IRQ(struct cpu_user_regs * regs)815 void do_IRQ(struct cpu_user_regs *regs)
816 {
817     struct irqaction *action;
818     uint32_t          tsc_in;
819     struct irq_desc  *desc;
820     unsigned int      vector = (u8)regs->entry_vector;
821     int irq = __get_cpu_var(vector_irq[vector]);
822     struct cpu_user_regs *old_regs = set_irq_regs(regs);
823 
824     perfc_incr(irqs);
825     this_cpu(irq_count)++;
826     irq_enter();
827 
828     if (irq < 0) {
829         if (direct_apic_vector[vector] != NULL) {
830             (*direct_apic_vector[vector])(regs);
831         } else {
832             const char *kind = ", LAPIC";
833 
834             if ( apic_isr_read(vector) )
835                 ack_APIC_irq();
836             else
837                 kind = "";
838             if ( ! ( vector >= FIRST_LEGACY_VECTOR &&
839                      vector <= LAST_LEGACY_VECTOR &&
840                      bogus_8259A_irq(vector - FIRST_LEGACY_VECTOR) ) )
841             {
842                 printk("CPU%u: No irq handler for vector %02x (IRQ %d%s)\n",
843                        smp_processor_id(), vector, irq, kind);
844                 desc = irq_to_desc(~irq);
845                 if ( ~irq < nr_irqs && irq_desc_initialized(desc) )
846                 {
847                     spin_lock(&desc->lock);
848                     printk("IRQ%d a=%04lx[%04lx,%04lx] v=%02x[%02x] t=%s s=%08x\n",
849                            ~irq, *cpumask_bits(desc->affinity),
850                            *cpumask_bits(desc->arch.cpu_mask),
851                            *cpumask_bits(desc->arch.old_cpu_mask),
852                            desc->arch.vector, desc->arch.old_vector,
853                            desc->handler->typename, desc->status);
854                     spin_unlock(&desc->lock);
855                 }
856             }
857             TRACE_1D(TRC_HW_IRQ_UNMAPPED_VECTOR, vector);
858         }
859         goto out_no_unlock;
860     }
861 
862     desc = irq_to_desc(irq);
863 
864     spin_lock(&desc->lock);
865     desc->handler->ack(desc);
866 
867     if ( likely(desc->status & IRQ_GUEST) )
868     {
869         if ( irq_ratelimit_timer.function && /* irq rate limiting enabled? */
870              unlikely(desc->rl_cnt++ >= irq_ratelimit_threshold) )
871         {
872             s_time_t now = NOW();
873             if ( now < (desc->rl_quantum_start + MILLISECS(10)) )
874             {
875                 desc->handler->disable(desc);
876                 /*
877                  * If handler->disable doesn't actually mask the interrupt, a
878                  * disabled irq still can fire. This check also avoids possible
879                  * deadlocks if ratelimit_timer_fn runs at the same time.
880                  */
881                 if ( likely(list_empty(&desc->rl_link)) )
882                 {
883                     spin_lock(&irq_ratelimit_lock);
884                     if ( list_empty(&irq_ratelimit_list) )
885                         set_timer(&irq_ratelimit_timer, now + MILLISECS(10));
886                     list_add(&desc->rl_link, &irq_ratelimit_list);
887                     spin_unlock(&irq_ratelimit_lock);
888                 }
889                 goto out;
890             }
891             desc->rl_cnt = 0;
892             desc->rl_quantum_start = now;
893         }
894 
895         tsc_in = tb_init_done ? get_cycles() : 0;
896         __do_IRQ_guest(irq);
897         TRACE_3D(TRC_HW_IRQ_HANDLED, irq, tsc_in, get_cycles());
898         goto out_no_end;
899     }
900 
901     desc->status &= ~IRQ_REPLAY;
902     desc->status |= IRQ_PENDING;
903 
904     /*
905      * Since we set PENDING, if another processor is handling a different
906      * instance of this same irq, the other processor will take care of it.
907      */
908     if ( desc->status & (IRQ_DISABLED | IRQ_INPROGRESS) )
909         goto out;
910 
911     desc->status |= IRQ_INPROGRESS;
912 
913     action = desc->action;
914     while ( desc->status & IRQ_PENDING )
915     {
916         desc->status &= ~IRQ_PENDING;
917         spin_unlock_irq(&desc->lock);
918         tsc_in = tb_init_done ? get_cycles() : 0;
919         action->handler(irq, action->dev_id, regs);
920         TRACE_3D(TRC_HW_IRQ_HANDLED, irq, tsc_in, get_cycles());
921         spin_lock_irq(&desc->lock);
922     }
923 
924     desc->status &= ~IRQ_INPROGRESS;
925 
926  out:
927     if ( desc->handler->end )
928         desc->handler->end(desc, vector);
929  out_no_end:
930     spin_unlock(&desc->lock);
931  out_no_unlock:
932     irq_exit();
933     set_irq_regs(old_regs);
934 }
935 
irq_ratelimit_timer_fn(void * data)936 static void irq_ratelimit_timer_fn(void *data)
937 {
938     struct irq_desc *desc, *tmp;
939     unsigned long flags;
940 
941     spin_lock_irqsave(&irq_ratelimit_lock, flags);
942 
943     list_for_each_entry_safe ( desc, tmp, &irq_ratelimit_list, rl_link )
944     {
945         spin_lock(&desc->lock);
946         desc->handler->enable(desc);
947         list_del(&desc->rl_link);
948         INIT_LIST_HEAD(&desc->rl_link);
949         spin_unlock(&desc->lock);
950     }
951 
952     spin_unlock_irqrestore(&irq_ratelimit_lock, flags);
953 }
954 
irq_ratelimit_init(void)955 static int __init irq_ratelimit_init(void)
956 {
957     if ( irq_ratelimit_threshold )
958         init_timer(&irq_ratelimit_timer, irq_ratelimit_timer_fn, NULL, 0);
959     return 0;
960 }
961 __initcall(irq_ratelimit_init);
962 
request_irq(unsigned int irq,unsigned int irqflags,void (* handler)(int,void *,struct cpu_user_regs *),const char * devname,void * dev_id)963 int __init request_irq(unsigned int irq, unsigned int irqflags,
964         void (*handler)(int, void *, struct cpu_user_regs *),
965         const char * devname, void *dev_id)
966 {
967     struct irqaction * action;
968     int retval;
969 
970     /*
971      * Sanity-check: shared interrupts must pass in a real dev-ID,
972      * otherwise we'll have trouble later trying to figure out
973      * which interrupt is which (messes up the interrupt freeing
974      * logic etc).
975      */
976     if (irq >= nr_irqs)
977         return -EINVAL;
978     if (!handler)
979         return -EINVAL;
980 
981     action = xmalloc(struct irqaction);
982     if (!action)
983         return -ENOMEM;
984 
985     action->handler = handler;
986     action->name = devname;
987     action->dev_id = dev_id;
988     action->free_on_release = 1;
989 
990     retval = setup_irq(irq, irqflags, action);
991     if (retval)
992         xfree(action);
993 
994     return retval;
995 }
996 
release_irq(unsigned int irq,const void * dev_id)997 void __init release_irq(unsigned int irq, const void *dev_id)
998 {
999     struct irq_desc *desc;
1000     unsigned long flags;
1001     struct irqaction *action;
1002 
1003     desc = irq_to_desc(irq);
1004 
1005     spin_lock_irqsave(&desc->lock,flags);
1006     action = desc->action;
1007     desc->action  = NULL;
1008     desc->handler->shutdown(desc);
1009     desc->status |= IRQ_DISABLED;
1010     spin_unlock_irqrestore(&desc->lock,flags);
1011 
1012     /* Wait to make sure it's not being used on another CPU */
1013     do { smp_mb(); } while ( desc->status & IRQ_INPROGRESS );
1014 
1015     if (action && action->free_on_release)
1016         xfree(action);
1017 }
1018 
setup_irq(unsigned int irq,unsigned int irqflags,struct irqaction * new)1019 int __init setup_irq(unsigned int irq, unsigned int irqflags,
1020                      struct irqaction *new)
1021 {
1022     struct irq_desc *desc;
1023     unsigned long flags;
1024 
1025     ASSERT(irqflags == 0);
1026 
1027     desc = irq_to_desc(irq);
1028 
1029     spin_lock_irqsave(&desc->lock,flags);
1030 
1031     if ( desc->action != NULL )
1032     {
1033         spin_unlock_irqrestore(&desc->lock,flags);
1034         return -EBUSY;
1035     }
1036 
1037     desc->action  = new;
1038     desc->status &= ~IRQ_DISABLED;
1039     desc->handler->startup(desc);
1040 
1041     spin_unlock_irqrestore(&desc->lock,flags);
1042 
1043     return 0;
1044 }
1045 
1046 
1047 /*
1048  * HANDLING OF GUEST-BOUND PHYSICAL IRQS
1049  */
1050 
1051 #define IRQ_MAX_GUESTS 7
1052 typedef struct {
1053     u8 nr_guests;
1054     u8 in_flight;
1055     u8 shareable;
1056     u8 ack_type;
1057 #define ACKTYPE_NONE   0     /* No final acknowledgement is required */
1058 #define ACKTYPE_UNMASK 1     /* Unmask PIC hardware (from any CPU)   */
1059 #define ACKTYPE_EOI    2     /* EOI on the CPU that was interrupted  */
1060     cpumask_var_t cpu_eoi_map; /* CPUs that need to EOI this interrupt */
1061     struct timer eoi_timer;
1062     struct domain *guest[IRQ_MAX_GUESTS];
1063 } irq_guest_action_t;
1064 
1065 /*
1066  * Stack of interrupts awaiting EOI on each CPU. These must be popped in
1067  * order, as only the current highest-priority pending irq can be EOIed.
1068  */
1069 struct pending_eoi {
1070     u32 ready:1;  /* Ready for EOI now?  */
1071     u32 irq:23;   /* irq of the vector */
1072     u32 vector:8; /* vector awaiting EOI */
1073 };
1074 
1075 static DEFINE_PER_CPU(struct pending_eoi, pending_eoi[NR_DYNAMIC_VECTORS]);
1076 #define pending_eoi_sp(p) ((p)[NR_DYNAMIC_VECTORS-1].vector)
1077 
cpu_has_pending_apic_eoi(void)1078 bool cpu_has_pending_apic_eoi(void)
1079 {
1080     return pending_eoi_sp(this_cpu(pending_eoi)) != 0;
1081 }
1082 
set_pirq_eoi(struct domain * d,unsigned int irq)1083 static inline void set_pirq_eoi(struct domain *d, unsigned int irq)
1084 {
1085     if ( d->arch.pirq_eoi_map )
1086     {
1087         ASSERT(irq < PAGE_SIZE * BITS_PER_BYTE);
1088         set_bit(irq, d->arch.pirq_eoi_map);
1089     }
1090 }
1091 
clear_pirq_eoi(struct domain * d,unsigned int irq)1092 static inline void clear_pirq_eoi(struct domain *d, unsigned int irq)
1093 {
1094     if ( d->arch.pirq_eoi_map )
1095     {
1096         ASSERT(irq < PAGE_SIZE * BITS_PER_BYTE);
1097         clear_bit(irq, d->arch.pirq_eoi_map);
1098     }
1099 }
1100 
1101 static void set_eoi_ready(void *data);
1102 
irq_guest_eoi_timer_fn(void * data)1103 static void irq_guest_eoi_timer_fn(void *data)
1104 {
1105     struct irq_desc *desc = data;
1106     unsigned int irq = desc - irq_desc;
1107     irq_guest_action_t *action;
1108     cpumask_t cpu_eoi_map;
1109     unsigned long flags;
1110 
1111     spin_lock_irqsave(&desc->lock, flags);
1112 
1113     if ( !(desc->status & IRQ_GUEST) )
1114         goto out;
1115 
1116     action = (irq_guest_action_t *)desc->action;
1117 
1118     if ( action->ack_type != ACKTYPE_NONE )
1119     {
1120         unsigned int i;
1121         for ( i = 0; i < action->nr_guests; i++ )
1122         {
1123             struct domain *d = action->guest[i];
1124             unsigned int pirq = domain_irq_to_pirq(d, irq);
1125             if ( test_and_clear_bool(pirq_info(d, pirq)->masked) )
1126                 action->in_flight--;
1127         }
1128     }
1129 
1130     if ( action->in_flight != 0 )
1131         goto out;
1132 
1133     switch ( action->ack_type )
1134     {
1135     case ACKTYPE_UNMASK:
1136         if ( desc->handler->end )
1137             desc->handler->end(desc, 0);
1138         break;
1139     case ACKTYPE_EOI:
1140         cpumask_copy(&cpu_eoi_map, action->cpu_eoi_map);
1141         spin_unlock_irq(&desc->lock);
1142         on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 0);
1143         spin_lock_irq(&desc->lock);
1144         break;
1145     }
1146 
1147  out:
1148     spin_unlock_irqrestore(&desc->lock, flags);
1149 }
1150 
__do_IRQ_guest(int irq)1151 static void __do_IRQ_guest(int irq)
1152 {
1153     struct irq_desc         *desc = irq_to_desc(irq);
1154     irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
1155     struct domain      *d;
1156     int                 i, sp;
1157     struct pending_eoi *peoi = this_cpu(pending_eoi);
1158     unsigned int        vector = (u8)get_irq_regs()->entry_vector;
1159 
1160     if ( unlikely(action->nr_guests == 0) )
1161     {
1162         /* An interrupt may slip through while freeing an ACKTYPE_EOI irq. */
1163         ASSERT(action->ack_type == ACKTYPE_EOI);
1164         ASSERT(desc->status & IRQ_DISABLED);
1165         if ( desc->handler->end )
1166             desc->handler->end(desc, vector);
1167         return;
1168     }
1169 
1170     if ( action->ack_type == ACKTYPE_EOI )
1171     {
1172         sp = pending_eoi_sp(peoi);
1173         ASSERT((sp == 0) || (peoi[sp-1].vector < vector));
1174         ASSERT(sp < (NR_DYNAMIC_VECTORS-1));
1175         peoi[sp].irq = irq;
1176         peoi[sp].vector = vector;
1177         peoi[sp].ready = 0;
1178         pending_eoi_sp(peoi) = sp+1;
1179         cpumask_set_cpu(smp_processor_id(), action->cpu_eoi_map);
1180     }
1181 
1182     for ( i = 0; i < action->nr_guests; i++ )
1183     {
1184         struct pirq *pirq;
1185 
1186         d = action->guest[i];
1187         pirq = pirq_info(d, domain_irq_to_pirq(d, irq));
1188         if ( (action->ack_type != ACKTYPE_NONE) &&
1189              !test_and_set_bool(pirq->masked) )
1190             action->in_flight++;
1191         if ( !is_hvm_domain(d) || !hvm_do_IRQ_dpci(d, pirq) )
1192             send_guest_pirq(d, pirq);
1193     }
1194 
1195     if ( action->ack_type != ACKTYPE_NONE )
1196     {
1197         stop_timer(&action->eoi_timer);
1198         migrate_timer(&action->eoi_timer, smp_processor_id());
1199         set_timer(&action->eoi_timer, NOW() + MILLISECS(1));
1200     }
1201 }
1202 
1203 /*
1204  * Retrieve Xen irq-descriptor corresponding to a domain-specific irq.
1205  * The descriptor is returned locked. This function is safe against changes
1206  * to the per-domain irq-to-vector mapping.
1207  */
domain_spin_lock_irq_desc(struct domain * d,int pirq,unsigned long * pflags)1208 struct irq_desc *domain_spin_lock_irq_desc(
1209     struct domain *d, int pirq, unsigned long *pflags)
1210 {
1211     const struct pirq *info = pirq_info(d, pirq);
1212 
1213     return info ? pirq_spin_lock_irq_desc(info, pflags) : NULL;
1214 }
1215 
1216 /*
1217  * Same with struct pirq already looked up.
1218  */
pirq_spin_lock_irq_desc(const struct pirq * pirq,unsigned long * pflags)1219 struct irq_desc *pirq_spin_lock_irq_desc(
1220     const struct pirq *pirq, unsigned long *pflags)
1221 {
1222     struct irq_desc *desc;
1223     unsigned long flags;
1224 
1225     for ( ; ; )
1226     {
1227         int irq = pirq->arch.irq;
1228 
1229         if ( irq <= 0 )
1230             return NULL;
1231 
1232         desc = irq_to_desc(irq);
1233         spin_lock_irqsave(&desc->lock, flags);
1234         if ( irq == pirq->arch.irq )
1235             break;
1236         spin_unlock_irqrestore(&desc->lock, flags);
1237     }
1238 
1239     if ( pflags )
1240         *pflags = flags;
1241 
1242     return desc;
1243 }
1244 
prepare_domain_irq_pirq(struct domain * d,int irq,int pirq,struct pirq ** pinfo)1245 static int prepare_domain_irq_pirq(struct domain *d, int irq, int pirq,
1246                                 struct pirq **pinfo)
1247 {
1248     int err = radix_tree_insert(&d->arch.irq_pirq, irq,
1249                                 radix_tree_int_to_ptr(0));
1250     struct pirq *info;
1251 
1252     if ( err && err != -EEXIST )
1253         return err;
1254     info = pirq_get_info(d, pirq);
1255     if ( !info )
1256     {
1257         if ( !err )
1258             radix_tree_delete(&d->arch.irq_pirq, irq);
1259         return -ENOMEM;
1260     }
1261     *pinfo = info;
1262 
1263     return !!err;
1264 }
1265 
set_domain_irq_pirq(struct domain * d,int irq,struct pirq * pirq)1266 static void set_domain_irq_pirq(struct domain *d, int irq, struct pirq *pirq)
1267 {
1268     radix_tree_replace_slot(
1269         radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
1270         radix_tree_int_to_ptr(pirq->pirq));
1271     pirq->arch.irq = irq;
1272 }
1273 
clear_domain_irq_pirq(struct domain * d,int irq,struct pirq * pirq)1274 static void clear_domain_irq_pirq(struct domain *d, int irq, struct pirq *pirq)
1275 {
1276     pirq->arch.irq = 0;
1277     radix_tree_replace_slot(
1278         radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
1279         radix_tree_int_to_ptr(0));
1280 }
1281 
cleanup_domain_irq_pirq(struct domain * d,int irq,struct pirq * pirq)1282 static void cleanup_domain_irq_pirq(struct domain *d, int irq,
1283                                     struct pirq *pirq)
1284 {
1285     pirq_cleanup_check(pirq, d);
1286     radix_tree_delete(&d->arch.irq_pirq, irq);
1287 }
1288 
init_domain_irq_mapping(struct domain * d)1289 int init_domain_irq_mapping(struct domain *d)
1290 {
1291     unsigned int i;
1292     int err = 0;
1293 
1294     radix_tree_init(&d->arch.irq_pirq);
1295     if ( is_hvm_domain(d) )
1296         radix_tree_init(&d->arch.hvm_domain.emuirq_pirq);
1297 
1298     for ( i = 1; platform_legacy_irq(i); ++i )
1299     {
1300         struct pirq *info;
1301 
1302         if ( IO_APIC_IRQ(i) )
1303             continue;
1304         err = prepare_domain_irq_pirq(d, i, i, &info);
1305         if ( err )
1306         {
1307             ASSERT(err < 0);
1308             break;
1309         }
1310         set_domain_irq_pirq(d, i, info);
1311     }
1312 
1313     if ( err )
1314         cleanup_domain_irq_mapping(d);
1315     return err;
1316 }
1317 
cleanup_domain_irq_mapping(struct domain * d)1318 void cleanup_domain_irq_mapping(struct domain *d)
1319 {
1320     radix_tree_destroy(&d->arch.irq_pirq, NULL);
1321     if ( is_hvm_domain(d) )
1322         radix_tree_destroy(&d->arch.hvm_domain.emuirq_pirq, NULL);
1323 }
1324 
alloc_pirq_struct(struct domain * d)1325 struct pirq *alloc_pirq_struct(struct domain *d)
1326 {
1327     size_t sz = is_hvm_domain(d) ? sizeof(struct pirq) :
1328                                    offsetof(struct pirq, arch.hvm);
1329     struct pirq *pirq = xzalloc_bytes(sz);
1330 
1331     if ( pirq )
1332     {
1333         if ( is_hvm_domain(d) )
1334         {
1335             pirq->arch.hvm.emuirq = IRQ_UNBOUND;
1336             pt_pirq_init(d, &pirq->arch.hvm.dpci);
1337         }
1338     }
1339 
1340     return pirq;
1341 }
1342 
1343 void (pirq_cleanup_check)(struct pirq *pirq, struct domain *d)
1344 {
1345     /*
1346      * Check whether all fields have their default values, and delete
1347      * the entry from the tree if so.
1348      *
1349      * NB: Common parts were already checked.
1350      */
1351     if ( pirq->arch.irq )
1352         return;
1353 
1354     if ( is_hvm_domain(d) )
1355     {
1356         if ( pirq->arch.hvm.emuirq != IRQ_UNBOUND )
1357             return;
1358         if ( !pt_pirq_cleanup_check(&pirq->arch.hvm.dpci) )
1359             return;
1360     }
1361 
1362     if ( radix_tree_delete(&d->pirq_tree, pirq->pirq) != pirq )
1363         BUG();
1364 }
1365 
1366 /* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */
flush_ready_eoi(void)1367 static void flush_ready_eoi(void)
1368 {
1369     struct pending_eoi *peoi = this_cpu(pending_eoi);
1370     struct irq_desc         *desc;
1371     int                irq, sp;
1372 
1373     ASSERT(!local_irq_is_enabled());
1374 
1375     sp = pending_eoi_sp(peoi);
1376 
1377     while ( (--sp >= 0) && peoi[sp].ready )
1378     {
1379         irq = peoi[sp].irq;
1380         ASSERT(irq > 0);
1381         desc = irq_to_desc(irq);
1382         spin_lock(&desc->lock);
1383         if ( desc->handler->end )
1384             desc->handler->end(desc, peoi[sp].vector);
1385         spin_unlock(&desc->lock);
1386     }
1387 
1388     pending_eoi_sp(peoi) = sp+1;
1389 }
1390 
__set_eoi_ready(struct irq_desc * desc)1391 static void __set_eoi_ready(struct irq_desc *desc)
1392 {
1393     irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
1394     struct pending_eoi *peoi = this_cpu(pending_eoi);
1395     int                 irq, sp;
1396 
1397     irq = desc - irq_desc;
1398 
1399     if ( !(desc->status & IRQ_GUEST) ||
1400          (action->in_flight != 0) ||
1401          !cpumask_test_and_clear_cpu(smp_processor_id(),
1402                                      action->cpu_eoi_map) )
1403         return;
1404 
1405     sp = pending_eoi_sp(peoi);
1406 
1407     do {
1408         ASSERT(sp > 0);
1409     } while ( peoi[--sp].irq != irq );
1410     ASSERT(!peoi[sp].ready);
1411     peoi[sp].ready = 1;
1412 }
1413 
1414 /* Mark specified IRQ as ready-for-EOI (if it really is) and attempt to EOI. */
set_eoi_ready(void * data)1415 static void set_eoi_ready(void *data)
1416 {
1417     struct irq_desc *desc = data;
1418 
1419     ASSERT(!local_irq_is_enabled());
1420 
1421     spin_lock(&desc->lock);
1422     __set_eoi_ready(desc);
1423     spin_unlock(&desc->lock);
1424 
1425     flush_ready_eoi();
1426 }
1427 
pirq_guest_eoi(struct pirq * pirq)1428 void pirq_guest_eoi(struct pirq *pirq)
1429 {
1430     struct irq_desc *desc;
1431 
1432     ASSERT(local_irq_is_enabled());
1433     desc = pirq_spin_lock_irq_desc(pirq, NULL);
1434     if ( desc )
1435         desc_guest_eoi(desc, pirq);
1436 }
1437 
desc_guest_eoi(struct irq_desc * desc,struct pirq * pirq)1438 void desc_guest_eoi(struct irq_desc *desc, struct pirq *pirq)
1439 {
1440     irq_guest_action_t *action;
1441     cpumask_t           cpu_eoi_map;
1442     int                 irq;
1443 
1444     if ( !(desc->status & IRQ_GUEST) )
1445     {
1446         spin_unlock_irq(&desc->lock);
1447         return;
1448     }
1449 
1450     action = (irq_guest_action_t *)desc->action;
1451     irq = desc - irq_desc;
1452 
1453     if ( unlikely(!test_and_clear_bool(pirq->masked)) ||
1454          unlikely(--action->in_flight != 0) )
1455     {
1456         spin_unlock_irq(&desc->lock);
1457         return;
1458     }
1459 
1460     if ( action->ack_type == ACKTYPE_UNMASK )
1461     {
1462         ASSERT(cpumask_empty(action->cpu_eoi_map));
1463         if ( desc->handler->end )
1464             desc->handler->end(desc, 0);
1465         spin_unlock_irq(&desc->lock);
1466         return;
1467     }
1468 
1469     ASSERT(action->ack_type == ACKTYPE_EOI);
1470 
1471     cpumask_copy(&cpu_eoi_map, action->cpu_eoi_map);
1472 
1473     if ( __cpumask_test_and_clear_cpu(smp_processor_id(), &cpu_eoi_map) )
1474     {
1475         __set_eoi_ready(desc);
1476         spin_unlock(&desc->lock);
1477         flush_ready_eoi();
1478         local_irq_enable();
1479     }
1480     else
1481     {
1482         spin_unlock_irq(&desc->lock);
1483     }
1484 
1485     if ( !cpumask_empty(&cpu_eoi_map) )
1486         on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 0);
1487 }
1488 
pirq_guest_unmask(struct domain * d)1489 int pirq_guest_unmask(struct domain *d)
1490 {
1491     unsigned int pirq = 0, n, i;
1492     struct pirq *pirqs[16];
1493 
1494     do {
1495         n = radix_tree_gang_lookup(&d->pirq_tree, (void **)pirqs, pirq,
1496                                    ARRAY_SIZE(pirqs));
1497         for ( i = 0; i < n; ++i )
1498         {
1499             pirq = pirqs[i]->pirq;
1500             if ( pirqs[i]->masked &&
1501                  !evtchn_port_is_masked(d, pirqs[i]->evtchn) )
1502                 pirq_guest_eoi(pirqs[i]);
1503         }
1504     } while ( ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) );
1505 
1506     return 0;
1507 }
1508 
pirq_acktype(struct domain * d,int pirq)1509 static int pirq_acktype(struct domain *d, int pirq)
1510 {
1511     struct irq_desc  *desc;
1512     int irq;
1513 
1514     irq = domain_pirq_to_irq(d, pirq);
1515     if ( irq <= 0 )
1516         return ACKTYPE_NONE;
1517 
1518     desc = irq_to_desc(irq);
1519 
1520     if ( desc->handler == &no_irq_type )
1521         return ACKTYPE_NONE;
1522 
1523     /*
1524      * Edge-triggered IO-APIC and LAPIC interrupts need no final
1525      * acknowledgement: we ACK early during interrupt processing.
1526      */
1527     if ( !strcmp(desc->handler->typename, "IO-APIC-edge") ||
1528          !strcmp(desc->handler->typename, "local-APIC-edge") )
1529         return ACKTYPE_NONE;
1530 
1531     /*
1532      * MSIs are treated as edge-triggered interrupts, except
1533      * when there is no proper way to mask them.
1534      */
1535     if ( desc->msi_desc )
1536         return msi_maskable_irq(desc->msi_desc) ? ACKTYPE_NONE : ACKTYPE_EOI;
1537 
1538     /*
1539      * Level-triggered IO-APIC interrupts need to be acknowledged on the CPU
1540      * on which they were received. This is because we tickle the LAPIC to EOI.
1541      */
1542     if ( !strcmp(desc->handler->typename, "IO-APIC-level") )
1543         return desc->handler->ack == irq_complete_move ?
1544                ACKTYPE_EOI : ACKTYPE_UNMASK;
1545 
1546     /* Legacy PIC interrupts can be acknowledged from any CPU. */
1547     if ( !strcmp(desc->handler->typename, "XT-PIC") )
1548         return ACKTYPE_UNMASK;
1549 
1550     printk("Unknown PIC type '%s' for IRQ %d\n", desc->handler->typename, irq);
1551     BUG();
1552 
1553     return 0;
1554 }
1555 
pirq_shared(struct domain * d,int pirq)1556 int pirq_shared(struct domain *d, int pirq)
1557 {
1558     struct irq_desc         *desc;
1559     irq_guest_action_t *action;
1560     unsigned long       flags;
1561     int                 shared;
1562 
1563     desc = domain_spin_lock_irq_desc(d, pirq, &flags);
1564     if ( desc == NULL )
1565         return 0;
1566 
1567     action = (irq_guest_action_t *)desc->action;
1568     shared = ((desc->status & IRQ_GUEST) && (action->nr_guests > 1));
1569 
1570     spin_unlock_irqrestore(&desc->lock, flags);
1571 
1572     return shared;
1573 }
1574 
pirq_guest_bind(struct vcpu * v,struct pirq * pirq,int will_share)1575 int pirq_guest_bind(struct vcpu *v, struct pirq *pirq, int will_share)
1576 {
1577     unsigned int        irq;
1578     struct irq_desc         *desc;
1579     irq_guest_action_t *action, *newaction = NULL;
1580     int                 rc = 0;
1581 
1582     WARN_ON(!spin_is_locked(&v->domain->event_lock));
1583     BUG_ON(!local_irq_is_enabled());
1584 
1585  retry:
1586     desc = pirq_spin_lock_irq_desc(pirq, NULL);
1587     if ( desc == NULL )
1588     {
1589         rc = -EINVAL;
1590         goto out;
1591     }
1592 
1593     action = (irq_guest_action_t *)desc->action;
1594     irq = desc - irq_desc;
1595 
1596     if ( !(desc->status & IRQ_GUEST) )
1597     {
1598         if ( desc->action != NULL )
1599         {
1600             printk(XENLOG_G_INFO
1601                    "Cannot bind IRQ%d to dom%d. In use by '%s'.\n",
1602                    pirq->pirq, v->domain->domain_id, desc->action->name);
1603             rc = -EBUSY;
1604             goto unlock_out;
1605         }
1606 
1607         if ( newaction == NULL )
1608         {
1609             spin_unlock_irq(&desc->lock);
1610             if ( (newaction = xmalloc(irq_guest_action_t)) != NULL &&
1611                  zalloc_cpumask_var(&newaction->cpu_eoi_map) )
1612                 goto retry;
1613             xfree(newaction);
1614             printk(XENLOG_G_INFO
1615                    "Cannot bind IRQ%d to dom%d. Out of memory.\n",
1616                    pirq->pirq, v->domain->domain_id);
1617             return -ENOMEM;
1618         }
1619 
1620         action = newaction;
1621         desc->action = (struct irqaction *)action;
1622         newaction = NULL;
1623 
1624         action->nr_guests   = 0;
1625         action->in_flight   = 0;
1626         action->shareable   = will_share;
1627         action->ack_type    = pirq_acktype(v->domain, pirq->pirq);
1628         init_timer(&action->eoi_timer, irq_guest_eoi_timer_fn, desc, 0);
1629 
1630         desc->status |= IRQ_GUEST;
1631 
1632         /* Attempt to bind the interrupt target to the correct CPU. */
1633         if ( !opt_noirqbalance && (desc->handler->set_affinity != NULL) )
1634             desc->handler->set_affinity(desc, cpumask_of(v->processor));
1635 
1636         desc->status &= ~IRQ_DISABLED;
1637         desc->handler->startup(desc);
1638     }
1639     else if ( !will_share || !action->shareable )
1640     {
1641         printk(XENLOG_G_INFO "Cannot bind IRQ%d to dom%d. %s.\n",
1642                pirq->pirq, v->domain->domain_id,
1643                will_share ? "Others do not share"
1644                           : "Will not share with others");
1645         rc = -EBUSY;
1646         goto unlock_out;
1647     }
1648     else if ( action->nr_guests == 0 )
1649     {
1650         /*
1651          * Indicates that an ACKTYPE_EOI interrupt is being released.
1652          * Wait for that to happen before continuing.
1653          */
1654         ASSERT(action->ack_type == ACKTYPE_EOI);
1655         ASSERT(desc->status & IRQ_DISABLED);
1656         spin_unlock_irq(&desc->lock);
1657         cpu_relax();
1658         goto retry;
1659     }
1660 
1661     if ( action->nr_guests == IRQ_MAX_GUESTS )
1662     {
1663         printk(XENLOG_G_INFO "Cannot bind IRQ%d to dom%d. "
1664                "Already at max share.\n",
1665                pirq->pirq, v->domain->domain_id);
1666         rc = -EBUSY;
1667         goto unlock_out;
1668     }
1669 
1670     action->guest[action->nr_guests++] = v->domain;
1671 
1672     if ( action->ack_type != ACKTYPE_NONE )
1673         set_pirq_eoi(v->domain, pirq->pirq);
1674     else
1675         clear_pirq_eoi(v->domain, pirq->pirq);
1676 
1677  unlock_out:
1678     spin_unlock_irq(&desc->lock);
1679  out:
1680     if ( newaction != NULL )
1681     {
1682         free_cpumask_var(newaction->cpu_eoi_map);
1683         xfree(newaction);
1684     }
1685     return rc;
1686 }
1687 
__pirq_guest_unbind(struct domain * d,struct pirq * pirq,struct irq_desc * desc)1688 static irq_guest_action_t *__pirq_guest_unbind(
1689     struct domain *d, struct pirq *pirq, struct irq_desc *desc)
1690 {
1691     unsigned int        irq;
1692     irq_guest_action_t *action;
1693     cpumask_t           cpu_eoi_map;
1694     int                 i;
1695 
1696     action = (irq_guest_action_t *)desc->action;
1697     irq = desc - irq_desc;
1698 
1699     if ( unlikely(action == NULL) )
1700     {
1701         dprintk(XENLOG_G_WARNING, "dom%d: pirq %d: desc->action is NULL!\n",
1702                 d->domain_id, pirq->pirq);
1703         return NULL;
1704     }
1705 
1706     BUG_ON(!(desc->status & IRQ_GUEST));
1707 
1708     for ( i = 0; (i < action->nr_guests) && (action->guest[i] != d); i++ )
1709         continue;
1710     BUG_ON(i == action->nr_guests);
1711     memmove(&action->guest[i], &action->guest[i+1],
1712             (action->nr_guests-i-1) * sizeof(action->guest[0]));
1713     action->nr_guests--;
1714 
1715     switch ( action->ack_type )
1716     {
1717     case ACKTYPE_UNMASK:
1718         if ( test_and_clear_bool(pirq->masked) &&
1719              (--action->in_flight == 0) &&
1720              desc->handler->end )
1721                 desc->handler->end(desc, 0);
1722         break;
1723     case ACKTYPE_EOI:
1724         /* NB. If #guests == 0 then we clear the eoi_map later on. */
1725         if ( test_and_clear_bool(pirq->masked) &&
1726              (--action->in_flight == 0) &&
1727              (action->nr_guests != 0) )
1728         {
1729             cpumask_copy(&cpu_eoi_map, action->cpu_eoi_map);
1730             spin_unlock_irq(&desc->lock);
1731             on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 0);
1732             spin_lock_irq(&desc->lock);
1733         }
1734         break;
1735     }
1736 
1737     /*
1738      * The guest cannot re-bind to this IRQ until this function returns. So,
1739      * when we have flushed this IRQ from ->masked, it should remain flushed.
1740      */
1741     BUG_ON(pirq->masked);
1742 
1743     if ( action->nr_guests != 0 )
1744         return NULL;
1745 
1746     BUG_ON(action->in_flight != 0);
1747 
1748     /* Disabling IRQ before releasing the desc_lock avoids an IRQ storm. */
1749     desc->handler->disable(desc);
1750     desc->status |= IRQ_DISABLED;
1751 
1752     /*
1753      * Mark any remaining pending EOIs as ready to flush.
1754      * NOTE: We will need to make this a stronger barrier if in future we allow
1755      * an interrupt vectors to be re-bound to a different PIC. In that case we
1756      * would need to flush all ready EOIs before returning as otherwise the
1757      * desc->handler could change and we would call the wrong 'end' hook.
1758      */
1759     cpumask_copy(&cpu_eoi_map, action->cpu_eoi_map);
1760     if ( !cpumask_empty(&cpu_eoi_map) )
1761     {
1762         BUG_ON(action->ack_type != ACKTYPE_EOI);
1763         spin_unlock_irq(&desc->lock);
1764         on_selected_cpus(&cpu_eoi_map, set_eoi_ready, desc, 1);
1765         spin_lock_irq(&desc->lock);
1766     }
1767 
1768     BUG_ON(!cpumask_empty(action->cpu_eoi_map));
1769 
1770     desc->action = NULL;
1771     desc->status &= ~(IRQ_GUEST|IRQ_INPROGRESS);
1772     desc->handler->shutdown(desc);
1773 
1774     /* Caller frees the old guest descriptor block. */
1775     return action;
1776 }
1777 
pirq_guest_unbind(struct domain * d,struct pirq * pirq)1778 void pirq_guest_unbind(struct domain *d, struct pirq *pirq)
1779 {
1780     irq_guest_action_t *oldaction = NULL;
1781     struct irq_desc *desc;
1782     int irq = 0;
1783 
1784     WARN_ON(!spin_is_locked(&d->event_lock));
1785 
1786     BUG_ON(!local_irq_is_enabled());
1787     desc = pirq_spin_lock_irq_desc(pirq, NULL);
1788 
1789     if ( desc == NULL )
1790     {
1791         irq = -pirq->arch.irq;
1792         BUG_ON(irq <= 0);
1793         desc = irq_to_desc(irq);
1794         spin_lock_irq(&desc->lock);
1795         clear_domain_irq_pirq(d, irq, pirq);
1796     }
1797     else
1798     {
1799         oldaction = __pirq_guest_unbind(d, pirq, desc);
1800     }
1801 
1802     spin_unlock_irq(&desc->lock);
1803 
1804     if ( oldaction != NULL )
1805     {
1806         kill_timer(&oldaction->eoi_timer);
1807         free_cpumask_var(oldaction->cpu_eoi_map);
1808         xfree(oldaction);
1809     }
1810     else if ( irq > 0 )
1811         cleanup_domain_irq_pirq(d, irq, pirq);
1812 }
1813 
pirq_guest_force_unbind(struct domain * d,struct pirq * pirq)1814 static bool pirq_guest_force_unbind(struct domain *d, struct pirq *pirq)
1815 {
1816     struct irq_desc *desc;
1817     irq_guest_action_t *action, *oldaction = NULL;
1818     unsigned int i;
1819     bool bound = false;
1820 
1821     WARN_ON(!spin_is_locked(&d->event_lock));
1822 
1823     BUG_ON(!local_irq_is_enabled());
1824     desc = pirq_spin_lock_irq_desc(pirq, NULL);
1825     BUG_ON(desc == NULL);
1826 
1827     if ( !(desc->status & IRQ_GUEST) )
1828         goto out;
1829 
1830     action = (irq_guest_action_t *)desc->action;
1831     if ( unlikely(action == NULL) )
1832     {
1833         dprintk(XENLOG_G_WARNING, "dom%d: pirq %d: desc->action is NULL!\n",
1834             d->domain_id, pirq->pirq);
1835         goto out;
1836     }
1837 
1838     for ( i = 0; (i < action->nr_guests) && (action->guest[i] != d); i++ )
1839         continue;
1840     if ( i == action->nr_guests )
1841         goto out;
1842 
1843     bound = true;
1844     oldaction = __pirq_guest_unbind(d, pirq, desc);
1845 
1846  out:
1847     spin_unlock_irq(&desc->lock);
1848 
1849     if ( oldaction != NULL )
1850     {
1851         kill_timer(&oldaction->eoi_timer);
1852         free_cpumask_var(oldaction->cpu_eoi_map);
1853         xfree(oldaction);
1854     }
1855 
1856     return bound;
1857 }
1858 
is_free_pirq(const struct domain * d,const struct pirq * pirq)1859 static inline bool is_free_pirq(const struct domain *d,
1860                                 const struct pirq *pirq)
1861 {
1862     return !pirq || (!pirq->arch.irq && (!is_hvm_domain(d) ||
1863         pirq->arch.hvm.emuirq == IRQ_UNBOUND));
1864 }
1865 
get_free_pirq(struct domain * d,int type)1866 int get_free_pirq(struct domain *d, int type)
1867 {
1868     int i;
1869 
1870     ASSERT(spin_is_locked(&d->event_lock));
1871 
1872     if ( type == MAP_PIRQ_TYPE_GSI )
1873     {
1874         for ( i = 16; i < nr_irqs_gsi; i++ )
1875             if ( is_free_pirq(d, pirq_info(d, i)) )
1876             {
1877                 pirq_get_info(d, i);
1878                 return i;
1879             }
1880     }
1881     for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; i-- )
1882         if ( is_free_pirq(d, pirq_info(d, i)) )
1883         {
1884             pirq_get_info(d, i);
1885             return i;
1886         }
1887 
1888     return -ENOSPC;
1889 }
1890 
get_free_pirqs(struct domain * d,unsigned int nr)1891 int get_free_pirqs(struct domain *d, unsigned int nr)
1892 {
1893     unsigned int i, found = 0;
1894 
1895     ASSERT(spin_is_locked(&d->event_lock));
1896 
1897     for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i )
1898         if ( is_free_pirq(d, pirq_info(d, i)) )
1899         {
1900             pirq_get_info(d, i);
1901             if ( ++found == nr )
1902                 return i;
1903         }
1904         else
1905             found = 0;
1906 
1907     return -ENOSPC;
1908 }
1909 
1910 #define MAX_MSI_IRQS 32 /* limited by MSI capability struct properties */
1911 
map_domain_pirq(struct domain * d,int pirq,int irq,int type,void * data)1912 int map_domain_pirq(
1913     struct domain *d, int pirq, int irq, int type, void *data)
1914 {
1915     int ret = 0;
1916     int old_irq, old_pirq;
1917     struct pirq *info;
1918     struct irq_desc *desc;
1919     unsigned long flags;
1920     DECLARE_BITMAP(prepared, MAX_MSI_IRQS) = {};
1921 
1922     ASSERT(spin_is_locked(&d->event_lock));
1923 
1924     if ( !irq_access_permitted(current->domain, irq))
1925         return -EPERM;
1926 
1927     if ( pirq < 0 || pirq >= d->nr_pirqs || irq <= 0 || irq >= nr_irqs )
1928     {
1929         dprintk(XENLOG_G_ERR, "dom%d: invalid pirq %d or irq %d\n",
1930                 d->domain_id, pirq, irq);
1931         return -EINVAL;
1932     }
1933 
1934     old_irq = domain_pirq_to_irq(d, pirq);
1935     old_pirq = domain_irq_to_pirq(d, irq);
1936 
1937     if ( (old_irq > 0 && (old_irq != irq) ) ||
1938          (old_pirq && (old_pirq != pirq)) )
1939     {
1940         dprintk(XENLOG_G_WARNING,
1941                 "dom%d: pirq %d or irq %d already mapped (%d,%d)\n",
1942                 d->domain_id, pirq, irq, old_pirq, old_irq);
1943         return 0;
1944     }
1945 
1946     ret = xsm_map_domain_irq(XSM_HOOK, d, irq, data);
1947     if ( ret )
1948     {
1949         dprintk(XENLOG_G_ERR, "dom%d: could not permit access to irq %d mapping to pirq %d\n",
1950                 d->domain_id, irq, pirq);
1951         return ret;
1952     }
1953 
1954     ret = irq_permit_access(d, irq);
1955     if ( ret )
1956     {
1957         printk(XENLOG_G_ERR
1958                "dom%d: could not permit access to IRQ%d (pirq %d)\n",
1959                d->domain_id, irq, pirq);
1960         return ret;
1961     }
1962 
1963     ret = prepare_domain_irq_pirq(d, irq, pirq, &info);
1964     if ( ret < 0 )
1965         goto revoke;
1966     if ( !ret )
1967         __set_bit(0, prepared);
1968 
1969     desc = irq_to_desc(irq);
1970 
1971     if ( type == MAP_PIRQ_TYPE_MSI || type == MAP_PIRQ_TYPE_MULTI_MSI )
1972     {
1973         struct msi_info *msi = (struct msi_info *)data;
1974         struct msi_desc *msi_desc;
1975         struct pci_dev *pdev;
1976         unsigned int nr = 0;
1977 
1978         ASSERT(pcidevs_locked());
1979 
1980         ret = -ENODEV;
1981         if ( !cpu_has_apic )
1982             goto done;
1983 
1984         pdev = pci_get_pdev_by_domain(d, msi->seg, msi->bus, msi->devfn);
1985         if ( !pdev )
1986             goto done;
1987 
1988         ret = pci_enable_msi(msi, &msi_desc);
1989         if ( ret )
1990         {
1991             if ( ret > 0 )
1992             {
1993                 msi->entry_nr = ret;
1994                 ret = -ENFILE;
1995             }
1996             goto done;
1997         }
1998 
1999         spin_lock_irqsave(&desc->lock, flags);
2000 
2001         if ( desc->handler != &no_irq_type )
2002         {
2003             spin_unlock_irqrestore(&desc->lock, flags);
2004             dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n",
2005                     d->domain_id, irq);
2006             pci_disable_msi(msi_desc);
2007             msi_desc->irq = -1;
2008             msi_free_irq(msi_desc);
2009             ret = -EBUSY;
2010             goto done;
2011         }
2012 
2013         while ( !(ret = setup_msi_irq(desc, msi_desc + nr)) )
2014         {
2015             if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV &&
2016                  !desc->arch.used_vectors )
2017             {
2018                 desc->arch.used_vectors = &pdev->arch.used_vectors;
2019                 if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
2020                 {
2021                     int vector = desc->arch.vector;
2022 
2023                     ASSERT(!test_bit(vector, desc->arch.used_vectors));
2024                     set_bit(vector, desc->arch.used_vectors);
2025                 }
2026             }
2027             if ( type == MAP_PIRQ_TYPE_MSI ||
2028                  msi_desc->msi_attrib.type != PCI_CAP_ID_MSI ||
2029                  ++nr == msi->entry_nr )
2030                 break;
2031 
2032             set_domain_irq_pirq(d, irq, info);
2033             spin_unlock_irqrestore(&desc->lock, flags);
2034 
2035             info = NULL;
2036             irq = create_irq(NUMA_NO_NODE);
2037             ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info)
2038                            : irq;
2039             if ( ret < 0 )
2040                 break;
2041             if ( !ret )
2042                 __set_bit(nr, prepared);
2043             msi_desc[nr].irq = irq;
2044 
2045             if ( irq_permit_access(d, irq) != 0 )
2046                 printk(XENLOG_G_WARNING
2047                        "dom%d: could not permit access to IRQ%d (pirq %d)\n",
2048                        d->domain_id, irq, pirq);
2049 
2050             desc = irq_to_desc(irq);
2051             spin_lock_irqsave(&desc->lock, flags);
2052 
2053             if ( desc->handler != &no_irq_type )
2054             {
2055                 dprintk(XENLOG_G_ERR, "dom%d: irq %d (pirq %u) in use (%s)\n",
2056                         d->domain_id, irq, pirq + nr, desc->handler->typename);
2057                 ret = -EBUSY;
2058                 break;
2059             }
2060         }
2061 
2062         if ( ret )
2063         {
2064             spin_unlock_irqrestore(&desc->lock, flags);
2065             pci_disable_msi(msi_desc);
2066             if ( nr )
2067             {
2068                 ASSERT(msi_desc->irq >= 0);
2069                 desc = irq_to_desc(msi_desc->irq);
2070                 spin_lock_irqsave(&desc->lock, flags);
2071                 desc->handler = &no_irq_type;
2072                 desc->msi_desc = NULL;
2073                 spin_unlock_irqrestore(&desc->lock, flags);
2074             }
2075             while ( nr )
2076             {
2077                 if ( irq >= 0 && irq_deny_access(d, irq) )
2078                     printk(XENLOG_G_ERR
2079                            "dom%d: could not revoke access to IRQ%d (pirq %d)\n",
2080                            d->domain_id, irq, pirq);
2081                 if ( info && test_bit(nr, prepared) )
2082                     cleanup_domain_irq_pirq(d, irq, info);
2083                 info = pirq_info(d, pirq + --nr);
2084                 irq = info->arch.irq;
2085             }
2086             msi_desc->irq = -1;
2087             msi_free_irq(msi_desc);
2088             goto done;
2089         }
2090 
2091         set_domain_irq_pirq(d, irq, info);
2092         spin_unlock_irqrestore(&desc->lock, flags);
2093     }
2094     else
2095     {
2096         spin_lock_irqsave(&desc->lock, flags);
2097         set_domain_irq_pirq(d, irq, info);
2098         spin_unlock_irqrestore(&desc->lock, flags);
2099         ret = 0;
2100     }
2101 
2102 done:
2103     if ( ret )
2104     {
2105         if ( test_bit(0, prepared) )
2106             cleanup_domain_irq_pirq(d, irq, info);
2107  revoke:
2108         if ( irq_deny_access(d, irq) )
2109             printk(XENLOG_G_ERR
2110                    "dom%d: could not revoke access to IRQ%d (pirq %d)\n",
2111                    d->domain_id, irq, pirq);
2112     }
2113     return ret;
2114 }
2115 
2116 /* The pirq should have been unbound before this call. */
unmap_domain_pirq(struct domain * d,int pirq)2117 int unmap_domain_pirq(struct domain *d, int pirq)
2118 {
2119     unsigned long flags;
2120     struct irq_desc *desc;
2121     int irq, ret = 0, rc;
2122     unsigned int i, nr = 1;
2123     bool forced_unbind;
2124     struct pirq *info;
2125     struct msi_desc *msi_desc = NULL;
2126 
2127     if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
2128         return -EINVAL;
2129 
2130     ASSERT(pcidevs_locked());
2131     ASSERT(spin_is_locked(&d->event_lock));
2132 
2133     info = pirq_info(d, pirq);
2134     if ( !info || (irq = info->arch.irq) <= 0 )
2135     {
2136         dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n",
2137                 d->domain_id, pirq);
2138         ret = -EINVAL;
2139         goto done;
2140     }
2141 
2142     desc = irq_to_desc(irq);
2143     msi_desc = desc->msi_desc;
2144     if ( msi_desc && msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
2145     {
2146         if ( msi_desc->msi_attrib.entry_nr )
2147         {
2148             printk(XENLOG_G_ERR
2149                    "dom%d: trying to unmap secondary MSI pirq %d\n",
2150                    d->domain_id, pirq);
2151             ret = -EBUSY;
2152             goto done;
2153         }
2154         nr = msi_desc->msi.nvec;
2155     }
2156 
2157     ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq,
2158                                msi_desc ? msi_desc->dev : NULL);
2159     if ( ret )
2160         goto done;
2161 
2162     forced_unbind = pirq_guest_force_unbind(d, info);
2163     if ( forced_unbind )
2164         dprintk(XENLOG_G_WARNING, "dom%d: forcing unbind of pirq %d\n",
2165                 d->domain_id, pirq);
2166 
2167     if ( msi_desc != NULL )
2168         pci_disable_msi(msi_desc);
2169 
2170     spin_lock_irqsave(&desc->lock, flags);
2171 
2172     for ( i = 0; ; )
2173     {
2174         BUG_ON(irq != domain_pirq_to_irq(d, pirq + i));
2175 
2176         if ( !forced_unbind )
2177             clear_domain_irq_pirq(d, irq, info);
2178         else
2179         {
2180             info->arch.irq = -irq;
2181             radix_tree_replace_slot(
2182                 radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
2183                 radix_tree_int_to_ptr(-pirq));
2184         }
2185 
2186         if ( msi_desc )
2187         {
2188             desc->handler = &no_irq_type;
2189             desc->msi_desc = NULL;
2190         }
2191 
2192         if ( ++i == nr )
2193             break;
2194 
2195         spin_unlock_irqrestore(&desc->lock, flags);
2196 
2197         if ( !forced_unbind )
2198            cleanup_domain_irq_pirq(d, irq, info);
2199 
2200         rc = irq_deny_access(d, irq);
2201         if ( rc )
2202         {
2203             printk(XENLOG_G_ERR
2204                    "dom%d: could not deny access to IRQ%d (pirq %d)\n",
2205                    d->domain_id, irq, pirq + i);
2206             ret = rc;
2207         }
2208 
2209         do {
2210             info = pirq_info(d, pirq + i);
2211             if ( info && (irq = info->arch.irq) > 0 )
2212                 break;
2213             printk(XENLOG_G_ERR "dom%d: MSI pirq %d not mapped\n",
2214                    d->domain_id, pirq + i);
2215         } while ( ++i < nr );
2216 
2217         if ( i == nr )
2218         {
2219             desc = NULL;
2220             break;
2221         }
2222 
2223         desc = irq_to_desc(irq);
2224         BUG_ON(desc->msi_desc != msi_desc + i);
2225 
2226         spin_lock_irqsave(&desc->lock, flags);
2227     }
2228 
2229     if ( desc )
2230     {
2231         spin_unlock_irqrestore(&desc->lock, flags);
2232 
2233         if ( !forced_unbind )
2234             cleanup_domain_irq_pirq(d, irq, info);
2235 
2236         rc = irq_deny_access(d, irq);
2237         if ( rc )
2238         {
2239             printk(XENLOG_G_ERR
2240                    "dom%d: could not deny access to IRQ%d (pirq %d)\n",
2241                    d->domain_id, irq, pirq + nr - 1);
2242             ret = rc;
2243         }
2244     }
2245 
2246     if (msi_desc)
2247         msi_free_irq(msi_desc);
2248 
2249  done:
2250     return ret;
2251 }
2252 
free_domain_pirqs(struct domain * d)2253 void free_domain_pirqs(struct domain *d)
2254 {
2255     int i;
2256 
2257     pcidevs_lock();
2258     spin_lock(&d->event_lock);
2259 
2260     for ( i = 0; i < d->nr_pirqs; i++ )
2261         if ( domain_pirq_to_irq(d, i) > 0 )
2262             unmap_domain_pirq(d, i);
2263 
2264     spin_unlock(&d->event_lock);
2265     pcidevs_unlock();
2266 }
2267 
dump_irqs(unsigned char key)2268 static void dump_irqs(unsigned char key)
2269 {
2270     int i, irq, pirq;
2271     struct irq_desc *desc;
2272     irq_guest_action_t *action;
2273     struct domain *d;
2274     const struct pirq *info;
2275     unsigned long flags;
2276     char *ssid;
2277 
2278     printk("IRQ information:\n");
2279 
2280     for ( irq = 0; irq < nr_irqs; irq++ )
2281     {
2282         if ( !(irq & 0x1f) )
2283             process_pending_softirqs();
2284 
2285         desc = irq_to_desc(irq);
2286 
2287         if ( !irq_desc_initialized(desc) || desc->handler == &no_irq_type )
2288             continue;
2289 
2290         ssid = in_irq() ? NULL : xsm_show_irq_sid(irq);
2291 
2292         spin_lock_irqsave(&desc->lock, flags);
2293 
2294         cpumask_scnprintf(keyhandler_scratch, sizeof(keyhandler_scratch),
2295                           desc->affinity);
2296         printk("   IRQ:%4d affinity:%s vec:%02x type=%-15s"
2297                " status=%08x ",
2298                irq, keyhandler_scratch, desc->arch.vector,
2299                desc->handler->typename, desc->status);
2300 
2301         if ( ssid )
2302             printk("Z=%-25s ", ssid);
2303 
2304         if ( desc->status & IRQ_GUEST )
2305         {
2306             action = (irq_guest_action_t *)desc->action;
2307 
2308             printk("in-flight=%d domain-list=", action->in_flight);
2309 
2310             for ( i = 0; i < action->nr_guests; i++ )
2311             {
2312                 d = action->guest[i];
2313                 pirq = domain_irq_to_pirq(d, irq);
2314                 info = pirq_info(d, pirq);
2315                 printk("%u:%3d(%c%c%c)",
2316                        d->domain_id, pirq,
2317                        evtchn_port_is_pending(d, info->evtchn) ? 'P' : '-',
2318                        evtchn_port_is_masked(d, info->evtchn) ? 'M' : '-',
2319                        (info->masked ? 'M' : '-'));
2320                 if ( i != action->nr_guests )
2321                     printk(",");
2322             }
2323 
2324             printk("\n");
2325         }
2326         else if ( desc->action )
2327             printk("%ps()\n", desc->action->handler);
2328         else
2329             printk("mapped, unbound\n");
2330 
2331         spin_unlock_irqrestore(&desc->lock, flags);
2332 
2333         xfree(ssid);
2334     }
2335 
2336     process_pending_softirqs();
2337     printk("Direct vector information:\n");
2338     for ( i = FIRST_DYNAMIC_VECTOR; i < NR_VECTORS; ++i )
2339         if ( direct_apic_vector[i] )
2340             printk("   %#02x -> %ps()\n", i, direct_apic_vector[i]);
2341 
2342     dump_ioapic_irq_info();
2343 }
2344 
setup_dump_irqs(void)2345 static int __init setup_dump_irqs(void)
2346 {
2347     register_keyhandler('i', dump_irqs, "dump interrupt bindings", 1);
2348     return 0;
2349 }
2350 __initcall(setup_dump_irqs);
2351 
2352 /* Reset irq affinities to match the given CPU mask. */
fixup_irqs(const cpumask_t * mask,bool verbose)2353 void fixup_irqs(const cpumask_t *mask, bool verbose)
2354 {
2355     unsigned int irq;
2356     static int warned;
2357     struct irq_desc *desc;
2358 
2359     for ( irq = 0; irq < nr_irqs; irq++ )
2360     {
2361         bool break_affinity = false, set_affinity = true;
2362         unsigned int vector;
2363         cpumask_t affinity;
2364 
2365         if ( irq == 2 )
2366             continue;
2367 
2368         desc = irq_to_desc(irq);
2369         if ( !irq_desc_initialized(desc) )
2370             continue;
2371 
2372         spin_lock(&desc->lock);
2373 
2374         vector = irq_to_vector(irq);
2375         if ( vector >= FIRST_HIPRIORITY_VECTOR &&
2376              vector <= LAST_HIPRIORITY_VECTOR )
2377             cpumask_and(desc->arch.cpu_mask, desc->arch.cpu_mask, mask);
2378 
2379         cpumask_copy(&affinity, desc->affinity);
2380         if ( !desc->action || cpumask_subset(&affinity, mask) )
2381         {
2382             spin_unlock(&desc->lock);
2383             continue;
2384         }
2385 
2386         cpumask_and(&affinity, &affinity, mask);
2387         if ( cpumask_empty(&affinity) )
2388         {
2389             break_affinity = true;
2390             cpumask_copy(&affinity, mask);
2391         }
2392 
2393         if ( desc->handler->disable )
2394             desc->handler->disable(desc);
2395 
2396         if ( desc->handler->set_affinity )
2397             desc->handler->set_affinity(desc, &affinity);
2398         else if ( !(warned++) )
2399             set_affinity = false;
2400 
2401         if ( desc->handler->enable )
2402             desc->handler->enable(desc);
2403 
2404         spin_unlock(&desc->lock);
2405 
2406         if ( !verbose )
2407             continue;
2408 
2409         if ( break_affinity && set_affinity )
2410             printk("Broke affinity for irq %i\n", irq);
2411         else if ( !set_affinity )
2412             printk("Cannot set affinity for irq %i\n", irq);
2413     }
2414 
2415     /* That doesn't seem sufficient.  Give it 1ms. */
2416     local_irq_enable();
2417     mdelay(1);
2418     local_irq_disable();
2419 }
2420 
fixup_eoi(void)2421 void fixup_eoi(void)
2422 {
2423     unsigned int irq, sp;
2424     struct irq_desc *desc;
2425     irq_guest_action_t *action;
2426     struct pending_eoi *peoi;
2427 
2428     /* Clean up cpu_eoi_map of every interrupt to exclude this CPU. */
2429     for ( irq = 0; irq < nr_irqs; irq++ )
2430     {
2431         desc = irq_to_desc(irq);
2432         if ( !(desc->status & IRQ_GUEST) )
2433             continue;
2434         action = (irq_guest_action_t *)desc->action;
2435         cpumask_clear_cpu(smp_processor_id(), action->cpu_eoi_map);
2436     }
2437 
2438     /* Flush the interrupt EOI stack. */
2439     peoi = this_cpu(pending_eoi);
2440     for ( sp = 0; sp < pending_eoi_sp(peoi); sp++ )
2441         peoi[sp].ready = 1;
2442     flush_ready_eoi();
2443 }
2444 
map_domain_emuirq_pirq(struct domain * d,int pirq,int emuirq)2445 int map_domain_emuirq_pirq(struct domain *d, int pirq, int emuirq)
2446 {
2447     int old_emuirq = IRQ_UNBOUND, old_pirq = IRQ_UNBOUND;
2448     struct pirq *info;
2449 
2450     ASSERT(spin_is_locked(&d->event_lock));
2451 
2452     if ( !is_hvm_domain(d) )
2453         return -EINVAL;
2454 
2455     if ( pirq < 0 || pirq >= d->nr_pirqs ||
2456             emuirq == IRQ_UNBOUND || emuirq >= (int) nr_irqs )
2457     {
2458         dprintk(XENLOG_G_ERR, "dom%d: invalid pirq %d or emuirq %d\n",
2459                 d->domain_id, pirq, emuirq);
2460         return -EINVAL;
2461     }
2462 
2463     old_emuirq = domain_pirq_to_emuirq(d, pirq);
2464     if ( emuirq != IRQ_PT )
2465         old_pirq = domain_emuirq_to_pirq(d, emuirq);
2466 
2467     if ( (old_emuirq != IRQ_UNBOUND && (old_emuirq != emuirq) ) ||
2468          (old_pirq != IRQ_UNBOUND && (old_pirq != pirq)) )
2469     {
2470         dprintk(XENLOG_G_WARNING, "dom%d: pirq %d or emuirq %d already mapped\n",
2471                 d->domain_id, pirq, emuirq);
2472         return 0;
2473     }
2474 
2475     info = pirq_get_info(d, pirq);
2476     if ( !info )
2477         return -ENOMEM;
2478 
2479     /* do not store emuirq mappings for pt devices */
2480     if ( emuirq != IRQ_PT )
2481     {
2482         int err = radix_tree_insert(&d->arch.hvm_domain.emuirq_pirq, emuirq,
2483                                     radix_tree_int_to_ptr(pirq));
2484 
2485         switch ( err )
2486         {
2487         case 0:
2488             break;
2489         case -EEXIST:
2490             radix_tree_replace_slot(
2491                 radix_tree_lookup_slot(
2492                     &d->arch.hvm_domain.emuirq_pirq, emuirq),
2493                 radix_tree_int_to_ptr(pirq));
2494             break;
2495         default:
2496             pirq_cleanup_check(info, d);
2497             return err;
2498         }
2499     }
2500     info->arch.hvm.emuirq = emuirq;
2501 
2502     return 0;
2503 }
2504 
unmap_domain_pirq_emuirq(struct domain * d,int pirq)2505 int unmap_domain_pirq_emuirq(struct domain *d, int pirq)
2506 {
2507     int emuirq, ret = 0;
2508     struct pirq *info;
2509 
2510     if ( !is_hvm_domain(d) )
2511         return -EINVAL;
2512 
2513     if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
2514         return -EINVAL;
2515 
2516     ASSERT(spin_is_locked(&d->event_lock));
2517 
2518     emuirq = domain_pirq_to_emuirq(d, pirq);
2519     if ( emuirq == IRQ_UNBOUND )
2520     {
2521         dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n",
2522                 d->domain_id, pirq);
2523         ret = -EINVAL;
2524         goto done;
2525     }
2526 
2527     info = pirq_info(d, pirq);
2528     if ( info )
2529     {
2530         info->arch.hvm.emuirq = IRQ_UNBOUND;
2531         pirq_cleanup_check(info, d);
2532     }
2533     if ( emuirq != IRQ_PT )
2534         radix_tree_delete(&d->arch.hvm_domain.emuirq_pirq, emuirq);
2535 
2536  done:
2537     return ret;
2538 }
2539 
arch_evtchn_bind_pirq(struct domain * d,int pirq)2540 void arch_evtchn_bind_pirq(struct domain *d, int pirq)
2541 {
2542     int irq = domain_pirq_to_irq(d, pirq);
2543     struct irq_desc *desc;
2544     unsigned long flags;
2545 
2546     if ( irq <= 0 )
2547         return;
2548 
2549     if ( is_hvm_domain(d) )
2550         map_domain_emuirq_pirq(d, pirq, IRQ_PT);
2551 
2552     desc = irq_to_desc(irq);
2553     spin_lock_irqsave(&desc->lock, flags);
2554     if ( desc->msi_desc )
2555         guest_mask_msi_irq(desc, 0);
2556     spin_unlock_irqrestore(&desc->lock, flags);
2557 }
2558 
hvm_domain_use_pirq(const struct domain * d,const struct pirq * pirq)2559 bool hvm_domain_use_pirq(const struct domain *d, const struct pirq *pirq)
2560 {
2561     return is_hvm_domain(d) && pirq && pirq->arch.hvm.emuirq != IRQ_UNBOUND;
2562 }
2563 
allocate_pirq(struct domain * d,int index,int pirq,int irq,int type,int * nr)2564 static int allocate_pirq(struct domain *d, int index, int pirq, int irq,
2565                          int type, int *nr)
2566 {
2567     int current_pirq;
2568 
2569     ASSERT(spin_is_locked(&d->event_lock));
2570     current_pirq = domain_irq_to_pirq(d, irq);
2571     if ( pirq < 0 )
2572     {
2573         if ( current_pirq )
2574         {
2575             dprintk(XENLOG_G_ERR, "dom%d: %d:%d already mapped to %d\n",
2576                     d->domain_id, index, pirq, current_pirq);
2577             if ( current_pirq < 0 )
2578                 return -EBUSY;
2579         }
2580         else if ( type == MAP_PIRQ_TYPE_MULTI_MSI )
2581         {
2582             if ( *nr <= 0 || *nr > MAX_MSI_IRQS )
2583                 return -EDOM;
2584             if ( *nr != 1 && !iommu_intremap )
2585                 return -EOPNOTSUPP;
2586 
2587             while ( *nr & (*nr - 1) )
2588                 *nr += *nr & -*nr;
2589             pirq = get_free_pirqs(d, *nr);
2590             if ( pirq < 0 )
2591             {
2592                 while ( (*nr >>= 1) > 1 )
2593                     if ( get_free_pirqs(d, *nr) > 0 )
2594                         break;
2595                 dprintk(XENLOG_G_ERR, "dom%d: no block of %d free pirqs\n",
2596                         d->domain_id, *nr << 1);
2597             }
2598         }
2599         else
2600         {
2601             pirq = get_free_pirq(d, type);
2602             if ( pirq < 0 )
2603                 dprintk(XENLOG_G_ERR, "dom%d: no free pirq\n", d->domain_id);
2604         }
2605     }
2606     else if ( current_pirq && pirq != current_pirq )
2607     {
2608         dprintk(XENLOG_G_ERR, "dom%d: irq %d already mapped to pirq %d\n",
2609                 d->domain_id, irq, current_pirq);
2610         return -EEXIST;
2611     }
2612 
2613     return pirq;
2614 }
2615 
allocate_and_map_gsi_pirq(struct domain * d,int index,int * pirq_p)2616 int allocate_and_map_gsi_pirq(struct domain *d, int index, int *pirq_p)
2617 {
2618     int irq, pirq, ret;
2619 
2620     if ( index < 0 || index >= nr_irqs_gsi )
2621     {
2622         dprintk(XENLOG_G_ERR, "dom%d: map invalid irq %d\n", d->domain_id,
2623                 index);
2624         return -EINVAL;
2625     }
2626 
2627     irq = domain_pirq_to_irq(current->domain, index);
2628     if ( irq <= 0 )
2629     {
2630         if ( is_hardware_domain(current->domain) )
2631             irq = index;
2632         else
2633         {
2634             dprintk(XENLOG_G_ERR, "dom%d: map pirq with incorrect irq!\n",
2635                     d->domain_id);
2636             return -EINVAL;
2637         }
2638     }
2639 
2640     /* Verify or get pirq. */
2641     spin_lock(&d->event_lock);
2642     pirq = allocate_pirq(d, index, *pirq_p, irq, MAP_PIRQ_TYPE_GSI, NULL);
2643     if ( pirq < 0 )
2644     {
2645         ret = pirq;
2646         goto done;
2647     }
2648 
2649     ret = map_domain_pirq(d, pirq, irq, MAP_PIRQ_TYPE_GSI, NULL);
2650     if ( !ret )
2651         *pirq_p = pirq;
2652 
2653  done:
2654     spin_unlock(&d->event_lock);
2655 
2656     return ret;
2657 }
2658 
allocate_and_map_msi_pirq(struct domain * d,int index,int * pirq_p,int type,struct msi_info * msi)2659 int allocate_and_map_msi_pirq(struct domain *d, int index, int *pirq_p,
2660                               int type, struct msi_info *msi)
2661 {
2662     int irq, pirq, ret;
2663 
2664     switch ( type )
2665     {
2666     case MAP_PIRQ_TYPE_MSI:
2667         if ( !msi->table_base )
2668             msi->entry_nr = 1;
2669         irq = index;
2670         if ( irq == -1 )
2671         {
2672     case MAP_PIRQ_TYPE_MULTI_MSI:
2673             irq = create_irq(NUMA_NO_NODE);
2674         }
2675 
2676         if ( irq < nr_irqs_gsi || irq >= nr_irqs )
2677         {
2678             dprintk(XENLOG_G_ERR, "dom%d: can't create irq for msi!\n",
2679                     d->domain_id);
2680             return -EINVAL;
2681         }
2682 
2683         msi->irq = irq;
2684         break;
2685 
2686     default:
2687         dprintk(XENLOG_G_ERR, "dom%d: wrong pirq type %x\n",
2688                 d->domain_id, type);
2689         ASSERT_UNREACHABLE();
2690         return -EINVAL;
2691     }
2692 
2693     msi->irq = irq;
2694 
2695     pcidevs_lock();
2696     /* Verify or get pirq. */
2697     spin_lock(&d->event_lock);
2698     pirq = allocate_pirq(d, index, *pirq_p, irq, type, &msi->entry_nr);
2699     if ( pirq < 0 )
2700     {
2701         ret = pirq;
2702         goto done;
2703     }
2704 
2705     ret = map_domain_pirq(d, pirq, irq, type, msi);
2706     if ( !ret )
2707         *pirq_p = pirq;
2708 
2709  done:
2710     spin_unlock(&d->event_lock);
2711     pcidevs_unlock();
2712     if ( ret )
2713     {
2714         switch ( type )
2715         {
2716         case MAP_PIRQ_TYPE_MSI:
2717             if ( index == -1 )
2718         case MAP_PIRQ_TYPE_MULTI_MSI:
2719                 destroy_irq(irq);
2720             break;
2721         }
2722     }
2723 
2724     return ret;
2725 }
2726