1 /*
2  * File:    msi.c
3  * Purpose: PCI Message Signaled Interrupt (MSI)
4  *
5  * Copyright (C) 2003-2004 Intel
6  * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
7  */
8 
9 #include <xen/lib.h>
10 #include <xen/init.h>
11 #include <xen/irq.h>
12 #include <xen/delay.h>
13 #include <xen/sched.h>
14 #include <xen/acpi.h>
15 #include <xen/cpu.h>
16 #include <xen/errno.h>
17 #include <xen/pci.h>
18 #include <xen/pci_regs.h>
19 #include <xen/iocap.h>
20 #include <xen/keyhandler.h>
21 #include <xen/pfn.h>
22 #include <asm/io.h>
23 #include <asm/smp.h>
24 #include <asm/desc.h>
25 #include <asm/msi.h>
26 #include <asm/fixmap.h>
27 #include <asm/p2m.h>
28 #include <mach_apic.h>
29 #include <io_ports.h>
30 #include <public/physdev.h>
31 #include <xen/iommu.h>
32 #include <xsm/xsm.h>
33 
34 static s8 __read_mostly use_msi = -1;
35 boolean_param("msi", use_msi);
36 
37 static void __pci_disable_msix(struct msi_desc *);
38 
39 /* bitmap indicate which fixed map is free */
40 static DEFINE_SPINLOCK(msix_fixmap_lock);
41 static DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES);
42 
msix_fixmap_alloc(void)43 static int msix_fixmap_alloc(void)
44 {
45     int i, rc = -ENOMEM;
46 
47     spin_lock(&msix_fixmap_lock);
48     for ( i = 0; i < FIX_MSIX_MAX_PAGES; i++ )
49         if ( !test_bit(i, &msix_fixmap_pages) )
50             break;
51     if ( i == FIX_MSIX_MAX_PAGES )
52         goto out;
53     rc = FIX_MSIX_IO_RESERV_BASE + i;
54     set_bit(i, &msix_fixmap_pages);
55 
56  out:
57     spin_unlock(&msix_fixmap_lock);
58     return rc;
59 }
60 
msix_fixmap_free(int idx)61 static void msix_fixmap_free(int idx)
62 {
63     spin_lock(&msix_fixmap_lock);
64     if ( idx >= FIX_MSIX_IO_RESERV_BASE )
65         clear_bit(idx - FIX_MSIX_IO_RESERV_BASE, &msix_fixmap_pages);
66     spin_unlock(&msix_fixmap_lock);
67 }
68 
msix_get_fixmap(struct arch_msix * msix,u64 table_paddr,u64 entry_paddr)69 static int msix_get_fixmap(struct arch_msix *msix, u64 table_paddr,
70                            u64 entry_paddr)
71 {
72     long nr_page;
73     int idx;
74 
75     nr_page = (entry_paddr >> PAGE_SHIFT) - (table_paddr >> PAGE_SHIFT);
76 
77     if ( nr_page < 0 || nr_page >= MAX_MSIX_TABLE_PAGES )
78         return -EINVAL;
79 
80     spin_lock(&msix->table_lock);
81     if ( msix->table_refcnt[nr_page]++ == 0 )
82     {
83         idx = msix_fixmap_alloc();
84         if ( idx < 0 )
85         {
86             msix->table_refcnt[nr_page]--;
87             goto out;
88         }
89         set_fixmap_nocache(idx, entry_paddr);
90         msix->table_idx[nr_page] = idx;
91     }
92     else
93         idx = msix->table_idx[nr_page];
94 
95  out:
96     spin_unlock(&msix->table_lock);
97     return idx;
98 }
99 
msix_put_fixmap(struct arch_msix * msix,int idx)100 static void msix_put_fixmap(struct arch_msix *msix, int idx)
101 {
102     int i;
103 
104     spin_lock(&msix->table_lock);
105     for ( i = 0; i < MAX_MSIX_TABLE_PAGES; i++ )
106     {
107         if ( msix->table_idx[i] == idx )
108             break;
109     }
110     if ( i == MAX_MSIX_TABLE_PAGES )
111         goto out;
112 
113     if ( --msix->table_refcnt[i] == 0 )
114     {
115         clear_fixmap(idx);
116         msix_fixmap_free(idx);
117         msix->table_idx[i] = 0;
118     }
119 
120  out:
121     spin_unlock(&msix->table_lock);
122 }
123 
memory_decoded(const struct pci_dev * dev)124 static bool memory_decoded(const struct pci_dev *dev)
125 {
126     u8 bus, slot, func;
127 
128     if ( !dev->info.is_virtfn )
129     {
130         bus = dev->bus;
131         slot = PCI_SLOT(dev->devfn);
132         func = PCI_FUNC(dev->devfn);
133     }
134     else
135     {
136         bus = dev->info.physfn.bus;
137         slot = PCI_SLOT(dev->info.physfn.devfn);
138         func = PCI_FUNC(dev->info.physfn.devfn);
139     }
140 
141     return !!(pci_conf_read16(dev->seg, bus, slot, func, PCI_COMMAND) &
142               PCI_COMMAND_MEMORY);
143 }
144 
msix_memory_decoded(const struct pci_dev * dev,unsigned int pos)145 static bool msix_memory_decoded(const struct pci_dev *dev, unsigned int pos)
146 {
147     u16 control = pci_conf_read16(dev->seg, dev->bus, PCI_SLOT(dev->devfn),
148                                   PCI_FUNC(dev->devfn), msix_control_reg(pos));
149 
150     if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
151         return false;
152 
153     return memory_decoded(dev);
154 }
155 
156 /*
157  * MSI message composition
158  */
msi_compose_msg(unsigned vector,const cpumask_t * cpu_mask,struct msi_msg * msg)159 void msi_compose_msg(unsigned vector, const cpumask_t *cpu_mask, struct msi_msg *msg)
160 {
161     memset(msg, 0, sizeof(*msg));
162 
163     if ( vector < FIRST_DYNAMIC_VECTOR )
164         return;
165 
166     if ( cpu_mask )
167     {
168         cpumask_t *mask = this_cpu(scratch_cpumask);
169 
170         if ( !cpumask_intersects(cpu_mask, &cpu_online_map) )
171             return;
172 
173         cpumask_and(mask, cpu_mask, &cpu_online_map);
174         msg->dest32 = cpu_mask_to_apicid(mask);
175     }
176 
177     msg->address_hi = MSI_ADDR_BASE_HI;
178     msg->address_lo = MSI_ADDR_BASE_LO |
179                       (INT_DEST_MODE ? MSI_ADDR_DESTMODE_LOGIC
180                                      : MSI_ADDR_DESTMODE_PHYS) |
181                       ((INT_DELIVERY_MODE != dest_LowestPrio)
182                        ? MSI_ADDR_REDIRECTION_CPU
183                        : MSI_ADDR_REDIRECTION_LOWPRI) |
184                       MSI_ADDR_DEST_ID(msg->dest32);
185 
186     msg->data = MSI_DATA_TRIGGER_EDGE |
187                 MSI_DATA_LEVEL_ASSERT |
188                 ((INT_DELIVERY_MODE != dest_LowestPrio)
189                  ? MSI_DATA_DELIVERY_FIXED
190                  : MSI_DATA_DELIVERY_LOWPRI) |
191                 MSI_DATA_VECTOR(vector);
192 }
193 
read_msi_msg(struct msi_desc * entry,struct msi_msg * msg)194 static bool read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
195 {
196     switch ( entry->msi_attrib.type )
197     {
198     case PCI_CAP_ID_MSI:
199     {
200         struct pci_dev *dev = entry->dev;
201         int pos = entry->msi_attrib.pos;
202         u16 data, seg = dev->seg;
203         u8 bus = dev->bus;
204         u8 slot = PCI_SLOT(dev->devfn);
205         u8 func = PCI_FUNC(dev->devfn);
206 
207         msg->address_lo = pci_conf_read32(seg, bus, slot, func,
208                                           msi_lower_address_reg(pos));
209         if ( entry->msi_attrib.is_64 )
210         {
211             msg->address_hi = pci_conf_read32(seg, bus, slot, func,
212                                               msi_upper_address_reg(pos));
213             data = pci_conf_read16(seg, bus, slot, func,
214                                    msi_data_reg(pos, 1));
215         }
216         else
217         {
218             msg->address_hi = 0;
219             data = pci_conf_read16(seg, bus, slot, func,
220                                    msi_data_reg(pos, 0));
221         }
222         msg->data = data;
223         break;
224     }
225     case PCI_CAP_ID_MSIX:
226     {
227         void __iomem *base = entry->mask_base;
228 
229         if ( unlikely(!msix_memory_decoded(entry->dev,
230                                            entry->msi_attrib.pos)) )
231             return false;
232         msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
233         msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
234         msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
235         break;
236     }
237     default:
238         BUG();
239     }
240 
241     if ( iommu_intremap )
242         iommu_read_msi_from_ire(entry, msg);
243 
244     return true;
245 }
246 
write_msi_msg(struct msi_desc * entry,struct msi_msg * msg)247 static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
248 {
249     entry->msg = *msg;
250 
251     if ( iommu_intremap )
252     {
253         int rc;
254 
255         ASSERT(msg != &entry->msg);
256         rc = iommu_update_ire_from_msi(entry, msg);
257         if ( rc )
258             return rc;
259     }
260 
261     switch ( entry->msi_attrib.type )
262     {
263     case PCI_CAP_ID_MSI:
264     {
265         struct pci_dev *dev = entry->dev;
266         int pos = entry->msi_attrib.pos;
267         u16 seg = dev->seg;
268         u8 bus = dev->bus;
269         u8 slot = PCI_SLOT(dev->devfn);
270         u8 func = PCI_FUNC(dev->devfn);
271         int nr = entry->msi_attrib.entry_nr;
272 
273         ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr);
274         if ( nr )
275             return 0;
276 
277         pci_conf_write32(seg, bus, slot, func, msi_lower_address_reg(pos),
278                          msg->address_lo);
279         if ( entry->msi_attrib.is_64 )
280         {
281             pci_conf_write32(seg, bus, slot, func, msi_upper_address_reg(pos),
282                              msg->address_hi);
283             pci_conf_write16(seg, bus, slot, func, msi_data_reg(pos, 1),
284                              msg->data);
285         }
286         else
287             pci_conf_write16(seg, bus, slot, func, msi_data_reg(pos, 0),
288                              msg->data);
289         break;
290     }
291     case PCI_CAP_ID_MSIX:
292     {
293         void __iomem *base = entry->mask_base;
294 
295         if ( unlikely(!msix_memory_decoded(entry->dev,
296                                            entry->msi_attrib.pos)) )
297             return -ENXIO;
298         writel(msg->address_lo,
299                base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
300         writel(msg->address_hi,
301                base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
302         writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
303         break;
304     }
305     default:
306         BUG();
307     }
308 
309     return 0;
310 }
311 
set_msi_affinity(struct irq_desc * desc,const cpumask_t * mask)312 void set_msi_affinity(struct irq_desc *desc, const cpumask_t *mask)
313 {
314     struct msi_msg msg;
315     unsigned int dest;
316     struct msi_desc *msi_desc = desc->msi_desc;
317 
318     dest = set_desc_affinity(desc, mask);
319     if ( dest == BAD_APICID || !msi_desc )
320         return;
321 
322     ASSERT(spin_is_locked(&desc->lock));
323 
324     memset(&msg, 0, sizeof(msg));
325     if ( !read_msi_msg(msi_desc, &msg) )
326         return;
327 
328     msg.data &= ~MSI_DATA_VECTOR_MASK;
329     msg.data |= MSI_DATA_VECTOR(desc->arch.vector);
330     msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
331     msg.address_lo |= MSI_ADDR_DEST_ID(dest);
332     msg.dest32 = dest;
333 
334     write_msi_msg(msi_desc, &msg);
335 }
336 
__msi_set_enable(u16 seg,u8 bus,u8 slot,u8 func,int pos,int enable)337 void __msi_set_enable(u16 seg, u8 bus, u8 slot, u8 func, int pos, int enable)
338 {
339     u16 control = pci_conf_read16(seg, bus, slot, func, pos + PCI_MSI_FLAGS);
340 
341     control &= ~PCI_MSI_FLAGS_ENABLE;
342     if ( enable )
343         control |= PCI_MSI_FLAGS_ENABLE;
344     pci_conf_write16(seg, bus, slot, func, pos + PCI_MSI_FLAGS, control);
345 }
346 
msi_set_enable(struct pci_dev * dev,int enable)347 static void msi_set_enable(struct pci_dev *dev, int enable)
348 {
349     int pos;
350     u16 seg = dev->seg;
351     u8 bus = dev->bus;
352     u8 slot = PCI_SLOT(dev->devfn);
353     u8 func = PCI_FUNC(dev->devfn);
354 
355     pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
356     if ( pos )
357         __msi_set_enable(seg, bus, slot, func, pos, enable);
358 }
359 
msix_set_enable(struct pci_dev * dev,int enable)360 static void msix_set_enable(struct pci_dev *dev, int enable)
361 {
362     int pos;
363     u16 control, seg = dev->seg;
364     u8 bus = dev->bus;
365     u8 slot = PCI_SLOT(dev->devfn);
366     u8 func = PCI_FUNC(dev->devfn);
367 
368     pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSIX);
369     if ( pos )
370     {
371         control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
372         control &= ~PCI_MSIX_FLAGS_ENABLE;
373         if ( enable )
374             control |= PCI_MSIX_FLAGS_ENABLE;
375         pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
376     }
377 }
378 
msi_maskable_irq(const struct msi_desc * entry)379 int msi_maskable_irq(const struct msi_desc *entry)
380 {
381     BUG_ON(!entry);
382     return entry->msi_attrib.type != PCI_CAP_ID_MSI
383            || entry->msi_attrib.maskbit;
384 }
385 
msi_set_mask_bit(struct irq_desc * desc,bool host,bool guest)386 static bool msi_set_mask_bit(struct irq_desc *desc, bool host, bool guest)
387 {
388     struct msi_desc *entry = desc->msi_desc;
389     struct pci_dev *pdev;
390     u16 seg, control;
391     u8 bus, slot, func;
392     bool flag = host || guest, maskall;
393 
394     ASSERT(spin_is_locked(&desc->lock));
395     BUG_ON(!entry || !entry->dev);
396     pdev = entry->dev;
397     seg = pdev->seg;
398     bus = pdev->bus;
399     slot = PCI_SLOT(pdev->devfn);
400     func = PCI_FUNC(pdev->devfn);
401     switch ( entry->msi_attrib.type )
402     {
403     case PCI_CAP_ID_MSI:
404         if ( entry->msi_attrib.maskbit )
405         {
406             u32 mask_bits;
407 
408             mask_bits = pci_conf_read32(seg, bus, slot, func, entry->msi.mpos);
409             mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr);
410             mask_bits |= (u32)flag << entry->msi_attrib.entry_nr;
411             pci_conf_write32(seg, bus, slot, func, entry->msi.mpos, mask_bits);
412         }
413         break;
414     case PCI_CAP_ID_MSIX:
415         maskall = pdev->msix->host_maskall;
416         control = pci_conf_read16(seg, bus, slot, func,
417                                   msix_control_reg(entry->msi_attrib.pos));
418         if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
419         {
420             pdev->msix->host_maskall = 1;
421             pci_conf_write16(seg, bus, slot, func,
422                              msix_control_reg(entry->msi_attrib.pos),
423                              control | (PCI_MSIX_FLAGS_ENABLE |
424                                         PCI_MSIX_FLAGS_MASKALL));
425         }
426         if ( likely(memory_decoded(pdev)) )
427         {
428             writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
429             readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
430 
431             if ( likely(control & PCI_MSIX_FLAGS_ENABLE) )
432                 break;
433 
434             entry->msi_attrib.host_masked = host;
435             entry->msi_attrib.guest_masked = guest;
436 
437             flag = true;
438         }
439         else if ( flag && !(control & PCI_MSIX_FLAGS_MASKALL) )
440         {
441             domid_t domid = pdev->domain->domain_id;
442 
443             maskall = true;
444             if ( pdev->msix->warned != domid )
445             {
446                 pdev->msix->warned = domid;
447                 printk(XENLOG_G_WARNING
448                        "cannot mask IRQ %d: masking MSI-X on Dom%d's %04x:%02x:%02x.%u\n",
449                        desc->irq, domid, pdev->seg, pdev->bus,
450                        PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
451             }
452         }
453         pdev->msix->host_maskall = maskall;
454         if ( maskall || pdev->msix->guest_maskall )
455             control |= PCI_MSIX_FLAGS_MASKALL;
456         pci_conf_write16(seg, bus, slot, func,
457                          msix_control_reg(entry->msi_attrib.pos), control);
458         return flag;
459     default:
460         return 0;
461     }
462     entry->msi_attrib.host_masked = host;
463     entry->msi_attrib.guest_masked = guest;
464 
465     return 1;
466 }
467 
msi_get_mask_bit(const struct msi_desc * entry)468 static int msi_get_mask_bit(const struct msi_desc *entry)
469 {
470     if ( !entry->dev )
471         return -1;
472 
473     switch ( entry->msi_attrib.type )
474     {
475     case PCI_CAP_ID_MSI:
476         if ( !entry->msi_attrib.maskbit )
477             break;
478         return (pci_conf_read32(entry->dev->seg, entry->dev->bus,
479                                 PCI_SLOT(entry->dev->devfn),
480                                 PCI_FUNC(entry->dev->devfn),
481                                 entry->msi.mpos) >>
482                 entry->msi_attrib.entry_nr) & 1;
483     case PCI_CAP_ID_MSIX:
484         if ( unlikely(!msix_memory_decoded(entry->dev,
485                                            entry->msi_attrib.pos)) )
486             break;
487         return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
488     }
489     return -1;
490 }
491 
mask_msi_irq(struct irq_desc * desc)492 void mask_msi_irq(struct irq_desc *desc)
493 {
494     if ( unlikely(!msi_set_mask_bit(desc, 1,
495                                     desc->msi_desc->msi_attrib.guest_masked)) )
496         BUG_ON(!(desc->status & IRQ_DISABLED));
497 }
498 
unmask_msi_irq(struct irq_desc * desc)499 void unmask_msi_irq(struct irq_desc *desc)
500 {
501     if ( unlikely(!msi_set_mask_bit(desc, 0,
502                                     desc->msi_desc->msi_attrib.guest_masked)) )
503         WARN();
504 }
505 
guest_mask_msi_irq(struct irq_desc * desc,bool mask)506 void guest_mask_msi_irq(struct irq_desc *desc, bool mask)
507 {
508     msi_set_mask_bit(desc, desc->msi_desc->msi_attrib.host_masked, mask);
509 }
510 
startup_msi_irq(struct irq_desc * desc)511 static unsigned int startup_msi_irq(struct irq_desc *desc)
512 {
513     if ( unlikely(!msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST))) )
514         WARN();
515     return 0;
516 }
517 
shutdown_msi_irq(struct irq_desc * desc)518 static void shutdown_msi_irq(struct irq_desc *desc)
519 {
520     if ( unlikely(!msi_set_mask_bit(desc, 1, 1)) )
521         BUG_ON(!(desc->status & IRQ_DISABLED));
522 }
523 
ack_nonmaskable_msi_irq(struct irq_desc * desc)524 void ack_nonmaskable_msi_irq(struct irq_desc *desc)
525 {
526     irq_complete_move(desc);
527     move_native_irq(desc);
528 }
529 
ack_maskable_msi_irq(struct irq_desc * desc)530 static void ack_maskable_msi_irq(struct irq_desc *desc)
531 {
532     ack_nonmaskable_msi_irq(desc);
533     ack_APIC_irq(); /* ACKTYPE_NONE */
534 }
535 
end_nonmaskable_msi_irq(struct irq_desc * desc,u8 vector)536 void end_nonmaskable_msi_irq(struct irq_desc *desc, u8 vector)
537 {
538     ack_APIC_irq(); /* ACKTYPE_EOI */
539 }
540 
541 /*
542  * IRQ chip for MSI PCI/PCI-X/PCI-Express devices,
543  * which implement the MSI or MSI-X capability structure.
544  */
545 static hw_irq_controller pci_msi_maskable = {
546     .typename     = "PCI-MSI/-X",
547     .startup      = startup_msi_irq,
548     .shutdown     = shutdown_msi_irq,
549     .enable       = unmask_msi_irq,
550     .disable      = mask_msi_irq,
551     .ack          = ack_maskable_msi_irq,
552     .set_affinity = set_msi_affinity
553 };
554 
555 /* As above, but without having masking capability. */
556 static hw_irq_controller pci_msi_nonmaskable = {
557     .typename     = "PCI-MSI",
558     .startup      = irq_startup_none,
559     .shutdown     = irq_shutdown_none,
560     .enable       = irq_enable_none,
561     .disable      = irq_disable_none,
562     .ack          = ack_nonmaskable_msi_irq,
563     .end          = end_nonmaskable_msi_irq,
564     .set_affinity = set_msi_affinity
565 };
566 
alloc_msi_entry(unsigned int nr)567 static struct msi_desc *alloc_msi_entry(unsigned int nr)
568 {
569     struct msi_desc *entry;
570 
571     entry = xmalloc_array(struct msi_desc, nr);
572     if ( !entry )
573         return NULL;
574 
575     INIT_LIST_HEAD(&entry->list);
576     while ( nr-- )
577     {
578         entry[nr].dev = NULL;
579         entry[nr].irq = -1;
580         entry[nr].remap_index = -1;
581         entry[nr].pi_desc = NULL;
582         entry[nr].irte_initialized = false;
583     }
584 
585     return entry;
586 }
587 
setup_msi_irq(struct irq_desc * desc,struct msi_desc * msidesc)588 int setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc)
589 {
590     const struct pci_dev *pdev = msidesc->dev;
591     unsigned int cpos = msix_control_reg(msidesc->msi_attrib.pos);
592     u16 control = ~0;
593     int rc;
594 
595     if ( msidesc->msi_attrib.type == PCI_CAP_ID_MSIX )
596     {
597         control = pci_conf_read16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
598                                   PCI_FUNC(pdev->devfn), cpos);
599         if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
600             pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
601                              PCI_FUNC(pdev->devfn), cpos,
602                              control | (PCI_MSIX_FLAGS_ENABLE |
603                                         PCI_MSIX_FLAGS_MASKALL));
604     }
605 
606     rc = __setup_msi_irq(desc, msidesc,
607                          msi_maskable_irq(msidesc) ? &pci_msi_maskable
608                                                    : &pci_msi_nonmaskable);
609 
610     if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
611         pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
612                          PCI_FUNC(pdev->devfn), cpos, control);
613 
614     return rc;
615 }
616 
__setup_msi_irq(struct irq_desc * desc,struct msi_desc * msidesc,hw_irq_controller * handler)617 int __setup_msi_irq(struct irq_desc *desc, struct msi_desc *msidesc,
618                     hw_irq_controller *handler)
619 {
620     struct msi_msg msg;
621     int ret;
622 
623     desc->msi_desc = msidesc;
624     desc->handler = handler;
625     msi_compose_msg(desc->arch.vector, desc->arch.cpu_mask, &msg);
626     ret = write_msi_msg(msidesc, &msg);
627     if ( unlikely(ret) )
628     {
629         desc->handler = &no_irq_type;
630         desc->msi_desc = NULL;
631     }
632 
633     return ret;
634 }
635 
msi_free_irq(struct msi_desc * entry)636 int msi_free_irq(struct msi_desc *entry)
637 {
638     unsigned int nr = entry->msi_attrib.type != PCI_CAP_ID_MSIX
639                       ? entry->msi.nvec : 1;
640 
641     while ( nr-- )
642     {
643         if ( entry[nr].irq >= 0 )
644             destroy_irq(entry[nr].irq);
645 
646         /* Free the unused IRTE if intr remap enabled */
647         if ( iommu_intremap )
648             iommu_update_ire_from_msi(entry + nr, NULL);
649     }
650 
651     if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
652         msix_put_fixmap(entry->dev->msix,
653                         virt_to_fix((unsigned long)entry->mask_base));
654 
655     list_del(&entry->list);
656     xfree(entry);
657     return 0;
658 }
659 
find_msi_entry(struct pci_dev * dev,int irq,int cap_id)660 static struct msi_desc *find_msi_entry(struct pci_dev *dev,
661                                        int irq, int cap_id)
662 {
663     struct msi_desc *entry;
664 
665     list_for_each_entry( entry, &dev->msi_list, list )
666     {
667         if ( entry->msi_attrib.type == cap_id &&
668              (irq == -1 || entry->irq == irq) )
669             return entry;
670     }
671 
672     return NULL;
673 }
674 
675 /**
676  * msi_capability_init - configure device's MSI capability structure
677  * @dev: pointer to the pci_dev data structure of MSI device function
678  *
679  * Setup the MSI capability structure of device function with a single
680  * MSI irq, regardless of device function is capable of handling
681  * multiple messages. A return of zero indicates the successful setup
682  * of an entry zero with the new MSI irq or non-zero for otherwise.
683  **/
msi_capability_init(struct pci_dev * dev,int irq,struct msi_desc ** desc,unsigned int nvec)684 static int msi_capability_init(struct pci_dev *dev,
685                                int irq,
686                                struct msi_desc **desc,
687                                unsigned int nvec)
688 {
689     struct msi_desc *entry;
690     int pos;
691     unsigned int i, maxvec, mpos;
692     u16 control, seg = dev->seg;
693     u8 bus = dev->bus;
694     u8 slot = PCI_SLOT(dev->devfn);
695     u8 func = PCI_FUNC(dev->devfn);
696 
697     ASSERT(pcidevs_locked());
698     pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
699     if ( !pos )
700         return -ENODEV;
701     control = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos));
702     maxvec = multi_msi_capable(control);
703     if ( nvec > maxvec )
704         return maxvec;
705     control &= ~PCI_MSI_FLAGS_QSIZE;
706     multi_msi_enable(control, nvec);
707 
708     /* MSI Entry Initialization */
709     msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */
710 
711     entry = alloc_msi_entry(nvec);
712     if ( !entry )
713         return -ENOMEM;
714 
715     mpos = msi_mask_bits_reg(pos, is_64bit_address(control));
716     for ( i = 0; i < nvec; ++i )
717     {
718         entry[i].msi_attrib.type = PCI_CAP_ID_MSI;
719         entry[i].msi_attrib.is_64 = is_64bit_address(control);
720         entry[i].msi_attrib.entry_nr = i;
721         entry[i].msi_attrib.host_masked =
722         entry[i].msi_attrib.maskbit = is_mask_bit_support(control);
723         entry[i].msi_attrib.guest_masked = 0;
724         entry[i].msi_attrib.pos = pos;
725         if ( entry[i].msi_attrib.maskbit )
726             entry[i].msi.mpos = mpos;
727         entry[i].msi.nvec = 0;
728         entry[i].dev = dev;
729     }
730     entry->msi.nvec = nvec;
731     entry->irq = irq;
732     if ( entry->msi_attrib.maskbit )
733     {
734         u32 maskbits;
735 
736         /* All MSIs are unmasked by default, Mask them all */
737         maskbits = pci_conf_read32(seg, bus, slot, func, mpos);
738         maskbits |= ~(u32)0 >> (32 - maxvec);
739         pci_conf_write32(seg, bus, slot, func, mpos, maskbits);
740     }
741     list_add_tail(&entry->list, &dev->msi_list);
742 
743     *desc = entry;
744     /* Restore the original MSI enabled bits  */
745     pci_conf_write16(seg, bus, slot, func, msi_control_reg(pos), control);
746 
747     return 0;
748 }
749 
read_pci_mem_bar(u16 seg,u8 bus,u8 slot,u8 func,u8 bir,int vf)750 static u64 read_pci_mem_bar(u16 seg, u8 bus, u8 slot, u8 func, u8 bir, int vf)
751 {
752     u8 limit;
753     u32 addr, base = PCI_BASE_ADDRESS_0;
754     u64 disp = 0;
755 
756     if ( vf >= 0 )
757     {
758         struct pci_dev *pdev = pci_get_pdev(seg, bus, PCI_DEVFN(slot, func));
759         unsigned int pos = pci_find_ext_capability(seg, bus,
760                                                    PCI_DEVFN(slot, func),
761                                                    PCI_EXT_CAP_ID_SRIOV);
762         u16 ctrl = pci_conf_read16(seg, bus, slot, func, pos + PCI_SRIOV_CTRL);
763         u16 num_vf = pci_conf_read16(seg, bus, slot, func,
764                                      pos + PCI_SRIOV_NUM_VF);
765         u16 offset = pci_conf_read16(seg, bus, slot, func,
766                                      pos + PCI_SRIOV_VF_OFFSET);
767         u16 stride = pci_conf_read16(seg, bus, slot, func,
768                                      pos + PCI_SRIOV_VF_STRIDE);
769 
770         if ( !pdev || !pos ||
771              !(ctrl & PCI_SRIOV_CTRL_VFE) ||
772              !(ctrl & PCI_SRIOV_CTRL_MSE) ||
773              !num_vf || !offset || (num_vf > 1 && !stride) ||
774              bir >= PCI_SRIOV_NUM_BARS ||
775              !pdev->vf_rlen[bir] )
776             return 0;
777         base = pos + PCI_SRIOV_BAR;
778         vf -= PCI_BDF(bus, slot, func) + offset;
779         if ( vf < 0 )
780             return 0;
781         if ( stride )
782         {
783             if ( vf % stride )
784                 return 0;
785             vf /= stride;
786         }
787         if ( vf >= num_vf )
788             return 0;
789         BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
790         disp = vf * pdev->vf_rlen[bir];
791         limit = PCI_SRIOV_NUM_BARS;
792     }
793     else switch ( pci_conf_read8(seg, bus, slot, func,
794                                  PCI_HEADER_TYPE) & 0x7f )
795     {
796     case PCI_HEADER_TYPE_NORMAL:
797         limit = 6;
798         break;
799     case PCI_HEADER_TYPE_BRIDGE:
800         limit = 2;
801         break;
802     case PCI_HEADER_TYPE_CARDBUS:
803         limit = 1;
804         break;
805     default:
806         return 0;
807     }
808 
809     if ( bir >= limit )
810         return 0;
811     addr = pci_conf_read32(seg, bus, slot, func, base + bir * 4);
812     if ( (addr & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO )
813         return 0;
814     if ( (addr & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64 )
815     {
816         addr &= PCI_BASE_ADDRESS_MEM_MASK;
817         if ( ++bir >= limit )
818             return 0;
819         return addr + disp +
820                ((u64)pci_conf_read32(seg, bus, slot, func,
821                                      base + bir * 4) << 32);
822     }
823     return (addr & PCI_BASE_ADDRESS_MEM_MASK) + disp;
824 }
825 
826 /**
827  * msix_capability_init - configure device's MSI-X capability
828  * @dev: pointer to the pci_dev data structure of MSI-X device function
829  * @entries: pointer to an array of struct msix_entry entries
830  * @nvec: number of @entries
831  *
832  * Setup the MSI-X capability structure of device function with the requested
833  * number MSI-X irqs. A return of zero indicates the successful setup of
834  * requested MSI-X entries with allocated irqs or non-zero for otherwise.
835  **/
msix_capability_init(struct pci_dev * dev,unsigned int pos,struct msi_info * msi,struct msi_desc ** desc,unsigned int nr_entries)836 static int msix_capability_init(struct pci_dev *dev,
837                                 unsigned int pos,
838                                 struct msi_info *msi,
839                                 struct msi_desc **desc,
840                                 unsigned int nr_entries)
841 {
842     struct arch_msix *msix = dev->msix;
843     struct msi_desc *entry = NULL;
844     int vf;
845     u16 control;
846     u64 table_paddr;
847     u32 table_offset;
848     u8 bir, pbus, pslot, pfunc;
849     u16 seg = dev->seg;
850     u8 bus = dev->bus;
851     u8 slot = PCI_SLOT(dev->devfn);
852     u8 func = PCI_FUNC(dev->devfn);
853     bool maskall = msix->host_maskall;
854 
855     ASSERT(pcidevs_locked());
856 
857     control = pci_conf_read16(seg, bus, slot, func, msix_control_reg(pos));
858     /*
859      * Ensure MSI-X interrupts are masked during setup. Some devices require
860      * MSI-X to be enabled before we can touch the MSI-X registers. We need
861      * to mask all the vectors to prevent interrupts coming in before they're
862      * fully set up.
863      */
864     msix->host_maskall = 1;
865     pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
866                      control | (PCI_MSIX_FLAGS_ENABLE |
867                                 PCI_MSIX_FLAGS_MASKALL));
868 
869     if ( unlikely(!memory_decoded(dev)) )
870     {
871         pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
872                          control & ~PCI_MSIX_FLAGS_ENABLE);
873         return -ENXIO;
874     }
875 
876     if ( desc )
877     {
878         entry = alloc_msi_entry(1);
879         if ( !entry )
880         {
881             pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
882                              control & ~PCI_MSIX_FLAGS_ENABLE);
883             return -ENOMEM;
884         }
885         ASSERT(msi);
886     }
887 
888     /* Locate MSI-X table region */
889     table_offset = pci_conf_read32(seg, bus, slot, func,
890                                    msix_table_offset_reg(pos));
891     bir = (u8)(table_offset & PCI_MSIX_BIRMASK);
892     table_offset &= ~PCI_MSIX_BIRMASK;
893 
894     if ( !dev->info.is_virtfn )
895     {
896         pbus = bus;
897         pslot = slot;
898         pfunc = func;
899         vf = -1;
900     }
901     else
902     {
903         pbus = dev->info.physfn.bus;
904         pslot = PCI_SLOT(dev->info.physfn.devfn);
905         pfunc = PCI_FUNC(dev->info.physfn.devfn);
906         vf = PCI_BDF2(dev->bus, dev->devfn);
907     }
908 
909     table_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf);
910     WARN_ON(msi && msi->table_base != table_paddr);
911     if ( !table_paddr )
912     {
913         if ( !msi || !msi->table_base )
914         {
915             pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
916                              control & ~PCI_MSIX_FLAGS_ENABLE);
917             xfree(entry);
918             return -ENXIO;
919         }
920         table_paddr = msi->table_base;
921     }
922     table_paddr += table_offset;
923 
924     if ( !msix->used_entries )
925     {
926         u64 pba_paddr;
927         u32 pba_offset;
928 
929         msix->nr_entries = nr_entries;
930         msix->table.first = PFN_DOWN(table_paddr);
931         msix->table.last = PFN_DOWN(table_paddr +
932                                     nr_entries * PCI_MSIX_ENTRY_SIZE - 1);
933         WARN_ON(rangeset_overlaps_range(mmio_ro_ranges, msix->table.first,
934                                         msix->table.last));
935 
936         pba_offset = pci_conf_read32(seg, bus, slot, func,
937                                      msix_pba_offset_reg(pos));
938         bir = (u8)(pba_offset & PCI_MSIX_BIRMASK);
939         pba_paddr = read_pci_mem_bar(seg, pbus, pslot, pfunc, bir, vf);
940         WARN_ON(!pba_paddr);
941         pba_paddr += pba_offset & ~PCI_MSIX_BIRMASK;
942 
943         msix->pba.first = PFN_DOWN(pba_paddr);
944         msix->pba.last = PFN_DOWN(pba_paddr +
945                                   BITS_TO_LONGS(nr_entries) - 1);
946         WARN_ON(rangeset_overlaps_range(mmio_ro_ranges, msix->pba.first,
947                                         msix->pba.last));
948     }
949 
950     if ( entry )
951     {
952         /* Map MSI-X table region */
953         u64 entry_paddr = table_paddr + msi->entry_nr * PCI_MSIX_ENTRY_SIZE;
954         int idx = msix_get_fixmap(msix, table_paddr, entry_paddr);
955         void __iomem *base;
956 
957         if ( idx < 0 )
958         {
959             pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
960                              control & ~PCI_MSIX_FLAGS_ENABLE);
961             xfree(entry);
962             return idx;
963         }
964         base = fix_to_virt(idx) + (entry_paddr & (PAGE_SIZE - 1));
965 
966         /* Mask interrupt here */
967         writel(1, base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
968 
969         entry->msi_attrib.type = PCI_CAP_ID_MSIX;
970         entry->msi_attrib.is_64 = 1;
971         entry->msi_attrib.entry_nr = msi->entry_nr;
972         entry->msi_attrib.maskbit = 1;
973         entry->msi_attrib.host_masked = 1;
974         entry->msi_attrib.guest_masked = 1;
975         entry->msi_attrib.pos = pos;
976         entry->irq = msi->irq;
977         entry->dev = dev;
978         entry->mask_base = base;
979 
980         list_add_tail(&entry->list, &dev->msi_list);
981         *desc = entry;
982     }
983 
984     if ( !msix->used_entries )
985     {
986         maskall = false;
987         if ( !msix->guest_maskall )
988             control &= ~PCI_MSIX_FLAGS_MASKALL;
989         else
990             control |= PCI_MSIX_FLAGS_MASKALL;
991 
992         if ( rangeset_add_range(mmio_ro_ranges, msix->table.first,
993                                 msix->table.last) )
994             WARN();
995         if ( rangeset_add_range(mmio_ro_ranges, msix->pba.first,
996                                 msix->pba.last) )
997             WARN();
998 
999         if ( desc )
1000         {
1001             struct domain *currd = current->domain;
1002             struct domain *d = dev->domain ?: currd;
1003 
1004             if ( !is_hardware_domain(currd) || d != currd )
1005                 printk("%s use of MSI-X on %04x:%02x:%02x.%u by Dom%d\n",
1006                        is_hardware_domain(currd)
1007                        ? XENLOG_WARNING "Potentially insecure"
1008                        : XENLOG_ERR "Insecure",
1009                        seg, bus, slot, func, d->domain_id);
1010             if ( !is_hardware_domain(d) &&
1011                  /* Assume a domain without memory has no mappings yet. */
1012                  (!is_hardware_domain(currd) || d->tot_pages) )
1013                 domain_crash(d);
1014             /* XXX How to deal with existing mappings? */
1015         }
1016     }
1017     WARN_ON(msix->nr_entries != nr_entries);
1018     WARN_ON(msix->table.first != (table_paddr >> PAGE_SHIFT));
1019     ++msix->used_entries;
1020 
1021     /* Restore MSI-X enabled bits */
1022     msix->host_maskall = maskall;
1023     pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
1024 
1025     return 0;
1026 }
1027 
1028 /**
1029  * pci_enable_msi - configure device's MSI capability structure
1030  * @dev: pointer to the pci_dev data structure of MSI device function
1031  *
1032  * Setup the MSI capability structure of device function with
1033  * a single MSI irq upon its software driver call to request for
1034  * MSI mode enabled on its hardware device function. A return of zero
1035  * indicates the successful setup of an entry zero with the new MSI
1036  * irq or non-zero for otherwise.
1037  **/
1038 
__pci_enable_msi(struct msi_info * msi,struct msi_desc ** desc)1039 static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
1040 {
1041     struct pci_dev *pdev;
1042     struct msi_desc *old_desc;
1043 
1044     ASSERT(pcidevs_locked());
1045     pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
1046     if ( !pdev )
1047         return -ENODEV;
1048 
1049     old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSI);
1050     if ( old_desc )
1051     {
1052         printk(XENLOG_ERR "irq %d already mapped to MSI on %04x:%02x:%02x.%u\n",
1053                msi->irq, msi->seg, msi->bus,
1054                PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
1055         return -EEXIST;
1056     }
1057 
1058     old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX);
1059     if ( old_desc )
1060     {
1061         printk(XENLOG_WARNING "MSI-X already in use on %04x:%02x:%02x.%u\n",
1062                msi->seg, msi->bus,
1063                PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
1064         __pci_disable_msix(old_desc);
1065     }
1066 
1067     return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr);
1068 }
1069 
__pci_disable_msi(struct msi_desc * entry)1070 static void __pci_disable_msi(struct msi_desc *entry)
1071 {
1072     struct pci_dev *dev;
1073 
1074     dev = entry->dev;
1075     msi_set_enable(dev, 0);
1076 
1077     BUG_ON(list_empty(&dev->msi_list));
1078 }
1079 
1080 /**
1081  * pci_enable_msix - configure device's MSI-X capability structure
1082  * @dev: pointer to the pci_dev data structure of MSI-X device function
1083  * @entries: pointer to an array of MSI-X entries
1084  * @nvec: number of MSI-X irqs requested for allocation by device driver
1085  *
1086  * Setup the MSI-X capability structure of device function with the number
1087  * of requested irqs upon its software driver call to request for
1088  * MSI-X mode enabled on its hardware device function. A return of zero
1089  * indicates the successful configuration of MSI-X capability structure
1090  * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
1091  * Or a return of > 0 indicates that driver request is exceeding the number
1092  * of irqs available. Driver should use the returned value to re-send
1093  * its request.
1094  **/
__pci_enable_msix(struct msi_info * msi,struct msi_desc ** desc)1095 static int __pci_enable_msix(struct msi_info *msi, struct msi_desc **desc)
1096 {
1097     int pos, nr_entries;
1098     struct pci_dev *pdev;
1099     u16 control;
1100     u8 slot = PCI_SLOT(msi->devfn);
1101     u8 func = PCI_FUNC(msi->devfn);
1102     struct msi_desc *old_desc;
1103 
1104     ASSERT(pcidevs_locked());
1105     pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
1106     pos = pci_find_cap_offset(msi->seg, msi->bus, slot, func, PCI_CAP_ID_MSIX);
1107     if ( !pdev || !pos )
1108         return -ENODEV;
1109 
1110     control = pci_conf_read16(msi->seg, msi->bus, slot, func,
1111                               msix_control_reg(pos));
1112     nr_entries = multi_msix_capable(control);
1113     if ( msi->entry_nr >= nr_entries )
1114         return -EINVAL;
1115 
1116     old_desc = find_msi_entry(pdev, msi->irq, PCI_CAP_ID_MSIX);
1117     if ( old_desc )
1118     {
1119         printk(XENLOG_ERR "irq %d already mapped to MSI-X on %04x:%02x:%02x.%u\n",
1120                msi->irq, msi->seg, msi->bus,
1121                PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
1122         return -EEXIST;
1123     }
1124 
1125     old_desc = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
1126     if ( old_desc )
1127     {
1128         printk(XENLOG_WARNING "MSI already in use on %04x:%02x:%02x.%u\n",
1129                msi->seg, msi->bus,
1130                PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
1131         __pci_disable_msi(old_desc);
1132     }
1133 
1134     return msix_capability_init(pdev, pos, msi, desc, nr_entries);
1135 }
1136 
_pci_cleanup_msix(struct arch_msix * msix)1137 static void _pci_cleanup_msix(struct arch_msix *msix)
1138 {
1139     if ( !--msix->used_entries )
1140     {
1141         if ( rangeset_remove_range(mmio_ro_ranges, msix->table.first,
1142                                    msix->table.last) )
1143             WARN();
1144         if ( rangeset_remove_range(mmio_ro_ranges, msix->pba.first,
1145                                    msix->pba.last) )
1146             WARN();
1147     }
1148 }
1149 
__pci_disable_msix(struct msi_desc * entry)1150 static void __pci_disable_msix(struct msi_desc *entry)
1151 {
1152     struct pci_dev *dev = entry->dev;
1153     u16 seg = dev->seg;
1154     u8 bus = dev->bus;
1155     u8 slot = PCI_SLOT(dev->devfn);
1156     u8 func = PCI_FUNC(dev->devfn);
1157     unsigned int pos = pci_find_cap_offset(seg, bus, slot, func,
1158                                            PCI_CAP_ID_MSIX);
1159     u16 control = pci_conf_read16(seg, bus, slot, func,
1160                                   msix_control_reg(entry->msi_attrib.pos));
1161     bool maskall = dev->msix->host_maskall;
1162 
1163     if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
1164     {
1165         dev->msix->host_maskall = 1;
1166         pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos),
1167                          control | (PCI_MSIX_FLAGS_ENABLE |
1168                                     PCI_MSIX_FLAGS_MASKALL));
1169     }
1170 
1171     BUG_ON(list_empty(&dev->msi_list));
1172 
1173     if ( likely(memory_decoded(dev)) )
1174         writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
1175     else if ( !(control & PCI_MSIX_FLAGS_MASKALL) )
1176     {
1177         printk(XENLOG_WARNING
1178                "cannot disable IRQ %d: masking MSI-X on %04x:%02x:%02x.%u\n",
1179                entry->irq, dev->seg, dev->bus,
1180                PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
1181         maskall = true;
1182     }
1183     dev->msix->host_maskall = maskall;
1184     if ( maskall || dev->msix->guest_maskall )
1185         control |= PCI_MSIX_FLAGS_MASKALL;
1186     pci_conf_write16(seg, bus, slot, func, msix_control_reg(pos), control);
1187 
1188     _pci_cleanup_msix(dev->msix);
1189 }
1190 
pci_prepare_msix(u16 seg,u8 bus,u8 devfn,bool off)1191 int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool off)
1192 {
1193     int rc;
1194     struct pci_dev *pdev;
1195     u8 slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn);
1196     unsigned int pos = pci_find_cap_offset(seg, bus, slot, func,
1197                                            PCI_CAP_ID_MSIX);
1198 
1199     if ( !use_msi )
1200         return 0;
1201 
1202     if ( !pos )
1203         return -ENODEV;
1204 
1205     pcidevs_lock();
1206     pdev = pci_get_pdev(seg, bus, devfn);
1207     if ( !pdev )
1208         rc = -ENODEV;
1209     else if ( pdev->msix->used_entries != !!off )
1210         rc = -EBUSY;
1211     else if ( off )
1212     {
1213         _pci_cleanup_msix(pdev->msix);
1214         rc = 0;
1215     }
1216     else
1217     {
1218         u16 control = pci_conf_read16(seg, bus, slot, func,
1219                                       msix_control_reg(pos));
1220 
1221         rc = msix_capability_init(pdev, pos, NULL, NULL,
1222                                   multi_msix_capable(control));
1223     }
1224     pcidevs_unlock();
1225 
1226     return rc;
1227 }
1228 
1229 /*
1230  * Notice: only construct the msi_desc
1231  * no change to irq_desc here, and the interrupt is masked
1232  */
pci_enable_msi(struct msi_info * msi,struct msi_desc ** desc)1233 int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
1234 {
1235     ASSERT(pcidevs_locked());
1236 
1237     if ( !use_msi )
1238         return -EPERM;
1239 
1240     return msi->table_base ? __pci_enable_msix(msi, desc) :
1241                              __pci_enable_msi(msi, desc);
1242 }
1243 
1244 /*
1245  * Device only, no irq_desc
1246  */
pci_disable_msi(struct msi_desc * msi_desc)1247 void pci_disable_msi(struct msi_desc *msi_desc)
1248 {
1249     if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
1250         __pci_disable_msi(msi_desc);
1251     else if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX )
1252         __pci_disable_msix(msi_desc);
1253 }
1254 
msi_free_irqs(struct pci_dev * dev)1255 static void msi_free_irqs(struct pci_dev* dev)
1256 {
1257     struct msi_desc *entry, *tmp;
1258 
1259     list_for_each_entry_safe( entry, tmp, &dev->msi_list, list )
1260     {
1261         pci_disable_msi(entry);
1262         msi_free_irq(entry);
1263     }
1264 }
1265 
pci_cleanup_msi(struct pci_dev * pdev)1266 void pci_cleanup_msi(struct pci_dev *pdev)
1267 {
1268     /* Disable MSI and/or MSI-X */
1269     msi_set_enable(pdev, 0);
1270     msix_set_enable(pdev, 0);
1271     msi_free_irqs(pdev);
1272 }
1273 
pci_msi_conf_write_intercept(struct pci_dev * pdev,unsigned int reg,unsigned int size,uint32_t * data)1274 int pci_msi_conf_write_intercept(struct pci_dev *pdev, unsigned int reg,
1275                                  unsigned int size, uint32_t *data)
1276 {
1277     u16 seg = pdev->seg;
1278     u8 bus = pdev->bus;
1279     u8 slot = PCI_SLOT(pdev->devfn);
1280     u8 func = PCI_FUNC(pdev->devfn);
1281     struct msi_desc *entry;
1282     unsigned int pos;
1283 
1284     if ( pdev->msix )
1285     {
1286         entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSIX);
1287         pos = entry ? entry->msi_attrib.pos
1288                     : pci_find_cap_offset(seg, bus, slot, func,
1289                                           PCI_CAP_ID_MSIX);
1290         ASSERT(pos);
1291 
1292         if ( reg >= pos && reg < msix_pba_offset_reg(pos) + 4 )
1293         {
1294             if ( reg != msix_control_reg(pos) || size != 2 )
1295                 return -EACCES;
1296 
1297             pdev->msix->guest_maskall = !!(*data & PCI_MSIX_FLAGS_MASKALL);
1298             if ( pdev->msix->host_maskall )
1299                 *data |= PCI_MSIX_FLAGS_MASKALL;
1300 
1301             return 1;
1302         }
1303     }
1304 
1305     entry = find_msi_entry(pdev, -1, PCI_CAP_ID_MSI);
1306     if ( entry && entry->msi_attrib.maskbit )
1307     {
1308         uint16_t cntl;
1309         uint32_t unused;
1310 
1311         pos = entry->msi_attrib.pos;
1312         if ( reg < pos || reg >= entry->msi.mpos + 8 )
1313             return 0;
1314 
1315         if ( reg == msi_control_reg(pos) )
1316             return size == 2 ? 1 : -EACCES;
1317         if ( reg < entry->msi.mpos || reg >= entry->msi.mpos + 4 || size != 4 )
1318             return -EACCES;
1319 
1320         cntl = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos));
1321         unused = ~(uint32_t)0 >> (32 - multi_msi_capable(cntl));
1322         for ( pos = 0; pos < entry->msi.nvec; ++pos, ++entry )
1323         {
1324             entry->msi_attrib.guest_masked =
1325                 *data >> entry->msi_attrib.entry_nr;
1326             if ( entry->msi_attrib.host_masked )
1327                 *data |= 1 << pos;
1328             unused &= ~(1 << pos);
1329         }
1330 
1331         *data |= unused;
1332 
1333         return 1;
1334     }
1335 
1336     return 0;
1337 }
1338 
pci_restore_msi_state(struct pci_dev * pdev)1339 int pci_restore_msi_state(struct pci_dev *pdev)
1340 {
1341     unsigned long flags;
1342     int irq;
1343     int ret;
1344     struct msi_desc *entry, *tmp;
1345     struct irq_desc *desc;
1346     struct msi_msg msg;
1347     u8 slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
1348     unsigned int type = 0, pos = 0;
1349     u16 control = 0;
1350 
1351     ASSERT(pcidevs_locked());
1352 
1353     if ( !use_msi )
1354         return -EOPNOTSUPP;
1355 
1356     ret = xsm_resource_setup_pci(XSM_PRIV,
1357                                 (pdev->seg << 16) | (pdev->bus << 8) |
1358                                 pdev->devfn);
1359     if ( ret )
1360         return ret;
1361 
1362     list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
1363     {
1364         unsigned int i = 0, nr = 1;
1365 
1366         irq = entry->irq;
1367         desc = &irq_desc[irq];
1368 
1369         spin_lock_irqsave(&desc->lock, flags);
1370 
1371         ASSERT(desc->msi_desc == entry);
1372 
1373         if (desc->msi_desc != entry)
1374         {
1375     bogus:
1376             dprintk(XENLOG_ERR,
1377                     "Restore MSI for %04x:%02x:%02x:%u entry %u not set?\n",
1378                     pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
1379                     PCI_FUNC(pdev->devfn), i);
1380             spin_unlock_irqrestore(&desc->lock, flags);
1381             if ( type == PCI_CAP_ID_MSIX )
1382                 pci_conf_write16(pdev->seg, pdev->bus, slot, func,
1383                                  msix_control_reg(pos),
1384                                  control & ~PCI_MSIX_FLAGS_ENABLE);
1385             return -EINVAL;
1386         }
1387 
1388         ASSERT(!type || type == entry->msi_attrib.type);
1389         pos = entry->msi_attrib.pos;
1390         if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
1391         {
1392             msi_set_enable(pdev, 0);
1393             nr = entry->msi.nvec;
1394         }
1395         else if ( !type && entry->msi_attrib.type == PCI_CAP_ID_MSIX )
1396         {
1397             control = pci_conf_read16(pdev->seg, pdev->bus, slot, func,
1398                                       msix_control_reg(pos));
1399             pci_conf_write16(pdev->seg, pdev->bus, slot, func,
1400                              msix_control_reg(pos),
1401                              control | (PCI_MSIX_FLAGS_ENABLE |
1402                                         PCI_MSIX_FLAGS_MASKALL));
1403             if ( unlikely(!memory_decoded(pdev)) )
1404             {
1405                 spin_unlock_irqrestore(&desc->lock, flags);
1406                 pci_conf_write16(pdev->seg, pdev->bus, slot, func,
1407                                  msix_control_reg(pos),
1408                                  control & ~PCI_MSIX_FLAGS_ENABLE);
1409                 return -ENXIO;
1410             }
1411         }
1412         type = entry->msi_attrib.type;
1413 
1414         msg = entry->msg;
1415         write_msi_msg(entry, &msg);
1416 
1417         for ( i = 0; ; )
1418         {
1419             if ( unlikely(!msi_set_mask_bit(desc,
1420                                             entry[i].msi_attrib.host_masked,
1421                                             entry[i].msi_attrib.guest_masked)) )
1422                 BUG();
1423 
1424             if ( !--nr )
1425                 break;
1426 
1427             spin_unlock_irqrestore(&desc->lock, flags);
1428             desc = &irq_desc[entry[++i].irq];
1429             spin_lock_irqsave(&desc->lock, flags);
1430             if ( desc->msi_desc != entry + i )
1431                 goto bogus;
1432         }
1433 
1434         spin_unlock_irqrestore(&desc->lock, flags);
1435 
1436         if ( type == PCI_CAP_ID_MSI )
1437         {
1438             unsigned int cpos = msi_control_reg(pos);
1439 
1440             control = pci_conf_read16(pdev->seg, pdev->bus, slot, func, cpos) &
1441                       ~PCI_MSI_FLAGS_QSIZE;
1442             multi_msi_enable(control, entry->msi.nvec);
1443             pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
1444                              PCI_FUNC(pdev->devfn), cpos, control);
1445 
1446             msi_set_enable(pdev, 1);
1447         }
1448     }
1449 
1450     if ( type == PCI_CAP_ID_MSIX )
1451         pci_conf_write16(pdev->seg, pdev->bus, slot, func,
1452                          msix_control_reg(pos),
1453                          control | PCI_MSIX_FLAGS_ENABLE);
1454 
1455     return 0;
1456 }
1457 
early_msi_init(void)1458 void __init early_msi_init(void)
1459 {
1460     if ( use_msi < 0 )
1461         use_msi = !(acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_MSI);
1462     if ( !use_msi )
1463         return;
1464 }
1465 
dump_msi(unsigned char key)1466 static void dump_msi(unsigned char key)
1467 {
1468     unsigned int irq;
1469 
1470     printk("MSI information:\n");
1471 
1472     for ( irq = 0; irq < nr_irqs; irq++ )
1473     {
1474         struct irq_desc *desc = irq_to_desc(irq);
1475         const struct msi_desc *entry;
1476         u32 addr, data, dest32;
1477         signed char mask;
1478         struct msi_attrib attr;
1479         unsigned long flags;
1480         const char *type = "???";
1481 
1482         if ( !irq_desc_initialized(desc) )
1483             continue;
1484 
1485         spin_lock_irqsave(&desc->lock, flags);
1486 
1487         entry = desc->msi_desc;
1488         if ( !entry )
1489         {
1490             spin_unlock_irqrestore(&desc->lock, flags);
1491             continue;
1492         }
1493 
1494         switch ( entry->msi_attrib.type )
1495         {
1496         case PCI_CAP_ID_MSI: type = "MSI"; break;
1497         case PCI_CAP_ID_MSIX: type = "MSI-X"; break;
1498         case 0:
1499             switch ( entry->msi_attrib.pos )
1500             {
1501             case MSI_TYPE_HPET: type = "HPET"; break;
1502             case MSI_TYPE_IOMMU: type = "IOMMU"; break;
1503             }
1504             break;
1505         }
1506 
1507         data = entry->msg.data;
1508         addr = entry->msg.address_lo;
1509         dest32 = entry->msg.dest32;
1510         attr = entry->msi_attrib;
1511         if ( entry->msi_attrib.type )
1512             mask = msi_get_mask_bit(entry);
1513         else
1514             mask = -1;
1515 
1516         spin_unlock_irqrestore(&desc->lock, flags);
1517 
1518         if ( mask >= 0 )
1519             mask += '0';
1520         else
1521             mask = '?';
1522         printk(" %-6s%4u vec=%02x%7s%6s%3sassert%5s%7s"
1523                " dest=%08x mask=%d/%c%c/%c\n",
1524                type, irq,
1525                (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT,
1526                data & MSI_DATA_DELIVERY_LOWPRI ? "lowest" : "fixed",
1527                data & MSI_DATA_TRIGGER_LEVEL ? "level" : "edge",
1528                data & MSI_DATA_LEVEL_ASSERT ? "" : "de",
1529                addr & MSI_ADDR_DESTMODE_LOGIC ? "log" : "phys",
1530                addr & MSI_ADDR_REDIRECTION_LOWPRI ? "lowest" : "cpu",
1531                dest32, attr.maskbit,
1532                attr.host_masked ? 'H' : ' ',
1533                attr.guest_masked ? 'G' : ' ',
1534                mask);
1535     }
1536 }
1537 
msi_setup_keyhandler(void)1538 static int __init msi_setup_keyhandler(void)
1539 {
1540     register_keyhandler('M', dump_msi, "dump MSI state", 1);
1541     return 0;
1542 }
1543 __initcall(msi_setup_keyhandler);
1544