1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; If not, see <http://www.gnu.org/licenses/>.
15  *
16  * Copyright (C) Allen Kay <allen.m.kay@intel.com>
17  * Copyright (C) Xiaohui Xin <xiaohui.xin@intel.com>
18  */
19 
20 #include <xen/irq.h>
21 #include <xen/sched.h>
22 #include <xen/iommu.h>
23 #include <xen/time.h>
24 #include <xen/list.h>
25 #include <xen/pci.h>
26 #include <xen/pci_regs.h>
27 #include "iommu.h"
28 #include "dmar.h"
29 #include "vtd.h"
30 #include "extern.h"
31 
32 #include <asm/apic.h>
33 #include <asm/io_apic.h>
34 #define nr_ioapic_entries(i)  nr_ioapic_entries[i]
35 
36 /*
37  * source validation type (SVT)
38  */
39 #define SVT_NO_VERIFY       0x0  /* no verification is required */
40 #define SVT_VERIFY_SID_SQ   0x1  /* verify using SID and SQ fiels */
41 #define SVT_VERIFY_BUS      0x2  /* verify bus of request-id */
42 
43 /*
44  * source-id qualifier (SQ)
45  */
46 #define SQ_ALL_16           0x0  /* verify all 16 bits of request-id */
47 #define SQ_13_IGNORE_1      0x1  /* verify most significant 13 bits, ignore
48                                   * the third least significant bit
49                                   */
50 #define SQ_13_IGNORE_2      0x2  /* verify most significant 13 bits, ignore
51                                   * the second and third least significant bits
52                                   */
53 #define SQ_13_IGNORE_3      0x3  /* verify most significant 13 bits, ignore
54                                   * the least three significant bits
55                                   */
56 
57 /* apic_pin_2_ir_idx[apicid][pin] = interrupt remapping table index */
58 static int **apic_pin_2_ir_idx;
59 
init_apic_pin_2_ir_idx(void)60 static int init_apic_pin_2_ir_idx(void)
61 {
62     int *_apic_pin_2_ir_idx;
63     unsigned int nr_pins, i;
64 
65     /* Here we shouldn't need to re-init when resuming from S3. */
66     if ( apic_pin_2_ir_idx != NULL )
67         return 0;
68 
69     nr_pins = 0;
70     for ( i = 0; i < nr_ioapics; i++ )
71         nr_pins += nr_ioapic_entries(i);
72 
73     _apic_pin_2_ir_idx = xmalloc_array(int, nr_pins);
74     apic_pin_2_ir_idx = xmalloc_array(int *, nr_ioapics);
75     if ( (_apic_pin_2_ir_idx == NULL) || (apic_pin_2_ir_idx == NULL) )
76     {
77         xfree(_apic_pin_2_ir_idx);
78         xfree(apic_pin_2_ir_idx);
79         return -ENOMEM;
80     }
81 
82     for ( i = 0; i < nr_pins; i++ )
83         _apic_pin_2_ir_idx[i] = -1;
84 
85     nr_pins = 0;
86     for ( i = 0; i < nr_ioapics; i++ )
87     {
88         apic_pin_2_ir_idx[i] = &_apic_pin_2_ir_idx[nr_pins];
89         nr_pins += nr_ioapic_entries(i);
90     }
91 
92     return 0;
93 }
94 
apicid_to_bdf(int apic_id)95 static u16 apicid_to_bdf(int apic_id)
96 {
97     struct acpi_drhd_unit *drhd = ioapic_to_drhd(apic_id);
98     struct acpi_ioapic_unit *acpi_ioapic_unit;
99 
100     list_for_each_entry ( acpi_ioapic_unit, &drhd->ioapic_list, list )
101         if ( acpi_ioapic_unit->apic_id == apic_id )
102             return acpi_ioapic_unit->ioapic.info;
103 
104     dprintk(XENLOG_ERR VTDPREFIX, "Didn't find the bdf for the apic_id!\n");
105     return 0;
106 }
107 
hpetid_to_bdf(unsigned int hpet_id)108 static u16 hpetid_to_bdf(unsigned int hpet_id)
109 {
110     struct acpi_drhd_unit *drhd = hpet_to_drhd(hpet_id);
111     struct acpi_hpet_unit *acpi_hpet_unit;
112 
113     list_for_each_entry ( acpi_hpet_unit, &drhd->hpet_list, list )
114         if ( acpi_hpet_unit->id == hpet_id )
115             return acpi_hpet_unit->bdf;
116 
117     dprintk(XENLOG_ERR VTDPREFIX, "Didn't find the bdf for HPET %u!\n", hpet_id);
118     return 0;
119 }
120 
set_ire_sid(struct iremap_entry * ire,unsigned int svt,unsigned int sq,unsigned int sid)121 static void set_ire_sid(struct iremap_entry *ire,
122                         unsigned int svt, unsigned int sq, unsigned int sid)
123 {
124     ire->remap.svt = svt;
125     ire->remap.sq = sq;
126     ire->remap.sid = sid;
127 }
128 
set_ioapic_source_id(int apic_id,struct iremap_entry * ire)129 static void set_ioapic_source_id(int apic_id, struct iremap_entry *ire)
130 {
131     set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16,
132                 apicid_to_bdf(apic_id));
133 }
134 
set_hpet_source_id(unsigned int id,struct iremap_entry * ire)135 static void set_hpet_source_id(unsigned int id, struct iremap_entry *ire)
136 {
137     /*
138      * Should really use SQ_ALL_16. Some platforms are broken.
139      * While we figure out the right quirks for these broken platforms, use
140      * SQ_13_IGNORE_3 for now.
141      */
142     set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_13_IGNORE_3, hpetid_to_bdf(id));
143 }
144 
iommu_supports_eim(void)145 bool_t __init iommu_supports_eim(void)
146 {
147     struct acpi_drhd_unit *drhd;
148     unsigned int apic;
149 
150     if ( !iommu_qinval || !iommu_intremap || list_empty(&acpi_drhd_units) )
151         return 0;
152 
153     /* We MUST have a DRHD unit for each IOAPIC. */
154     for ( apic = 0; apic < nr_ioapics; apic++ )
155         if ( !ioapic_to_drhd(IO_APIC_ID(apic)) )
156         {
157             dprintk(XENLOG_WARNING VTDPREFIX,
158                     "There is not a DRHD for IOAPIC %#x (id: %#x)!\n",
159                     apic, IO_APIC_ID(apic));
160             return 0;
161         }
162 
163     for_each_drhd_unit ( drhd )
164         if ( !ecap_queued_inval(drhd->iommu->ecap) ||
165              !ecap_intr_remap(drhd->iommu->ecap) ||
166              !ecap_eim(drhd->iommu->ecap) )
167             return 0;
168 
169     return 1;
170 }
171 
172 /*
173  * Assume iremap_lock has been acquired. It is to make sure software will not
174  * change the same IRTE behind us. With this assumption, if only high qword or
175  * low qword in IRTE is to be updated, this function's atomic variant can
176  * present an atomic update to VT-d hardware even when cmpxchg16b
177  * instruction is not supported.
178  */
update_irte(struct iommu * iommu,struct iremap_entry * entry,const struct iremap_entry * new_ire,bool atomic)179 static void update_irte(struct iommu *iommu, struct iremap_entry *entry,
180                         const struct iremap_entry *new_ire, bool atomic)
181 {
182     ASSERT(spin_is_locked(&iommu_ir_ctrl(iommu)->iremap_lock));
183 
184     if ( cpu_has_cx16 )
185     {
186         __uint128_t ret;
187         struct iremap_entry old_ire;
188 
189         old_ire = *entry;
190         ret = cmpxchg16b(entry, &old_ire, new_ire);
191 
192         /*
193          * In the above, we use cmpxchg16 to atomically update the 128-bit
194          * IRTE, and the hardware cannot update the IRTE behind us, so
195          * the return value of cmpxchg16 should be the same as old_ire.
196          * This ASSERT validate it.
197          */
198         ASSERT(ret == old_ire.val);
199     }
200     else
201     {
202         /*
203          * VT-d hardware doesn't update IRTEs behind us, nor the software
204          * since we hold iremap_lock. If the caller wants VT-d hardware to
205          * always see a consistent entry, but we can't meet it, a bug will
206          * be raised.
207          */
208         if ( entry->lo == new_ire->lo )
209             write_atomic(&entry->hi, new_ire->hi);
210         else if ( entry->hi == new_ire->hi )
211             write_atomic(&entry->lo, new_ire->lo);
212         else if ( !atomic )
213             *entry = *new_ire;
214         else
215             BUG();
216     }
217 }
218 
219 /* Mark specified intr remap entry as free */
free_remap_entry(struct iommu * iommu,int index)220 static void free_remap_entry(struct iommu *iommu, int index)
221 {
222     struct iremap_entry *iremap_entry = NULL, *iremap_entries, new_ire = { };
223     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
224 
225     if ( index < 0 || index > IREMAP_ENTRY_NR - 1 )
226         return;
227 
228     ASSERT( spin_is_locked(&ir_ctrl->iremap_lock) );
229 
230     GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
231                      iremap_entries, iremap_entry);
232 
233     update_irte(iommu, iremap_entry, &new_ire, false);
234     iommu_flush_cache_entry(iremap_entry, sizeof(*iremap_entry));
235     iommu_flush_iec_index(iommu, 0, index);
236 
237     unmap_vtd_domain_page(iremap_entries);
238     ir_ctrl->iremap_num--;
239 }
240 
241 /*
242  * Look for a free intr remap entry (or a contiguous set thereof).
243  * Need hold iremap_lock, and setup returned entry before releasing lock.
244  */
alloc_remap_entry(struct iommu * iommu,unsigned int nr)245 static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned int nr)
246 {
247     struct iremap_entry *iremap_entries = NULL;
248     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
249     unsigned int i, found;
250 
251     ASSERT( spin_is_locked(&ir_ctrl->iremap_lock) );
252 
253     for ( found = i = 0; i < IREMAP_ENTRY_NR; i++ )
254     {
255         struct iremap_entry *p;
256         if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 )
257         {
258             /* This entry across page boundry */
259             if ( iremap_entries )
260                 unmap_vtd_domain_page(iremap_entries);
261 
262             GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, i,
263                              iremap_entries, p);
264         }
265         else
266             p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
267 
268         if ( p->val ) /* not a free entry */
269             found = 0;
270         else if ( ++found == nr )
271             break;
272     }
273 
274     if ( iremap_entries )
275         unmap_vtd_domain_page(iremap_entries);
276 
277     if ( i < IREMAP_ENTRY_NR )
278         ir_ctrl->iremap_num += nr;
279     return i;
280 }
281 
remap_entry_to_ioapic_rte(struct iommu * iommu,int index,struct IO_xAPIC_route_entry * old_rte)282 static int remap_entry_to_ioapic_rte(
283     struct iommu *iommu, int index, struct IO_xAPIC_route_entry *old_rte)
284 {
285     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
286     unsigned long flags;
287     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
288 
289     if ( index < 0 || index > IREMAP_ENTRY_NR - 1 )
290     {
291         dprintk(XENLOG_ERR VTDPREFIX,
292                 "IO-APIC index (%d) for remap table is invalid\n",
293                 index);
294         return -EFAULT;
295     }
296 
297     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
298 
299     GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
300                      iremap_entries, iremap_entry);
301 
302     if ( iremap_entry->val == 0 )
303     {
304         dprintk(XENLOG_ERR VTDPREFIX,
305                 "IO-APIC index (%d) has an empty entry\n",
306                 index);
307         unmap_vtd_domain_page(iremap_entries);
308         spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
309         return -EFAULT;
310     }
311 
312     old_rte->vector = iremap_entry->remap.vector;
313     old_rte->delivery_mode = iremap_entry->remap.dlm;
314     old_rte->dest_mode = iremap_entry->remap.dm;
315     old_rte->trigger = iremap_entry->remap.tm;
316     old_rte->__reserved_2 = 0;
317     old_rte->dest.logical.__reserved_1 = 0;
318     old_rte->dest.logical.logical_dest = iremap_entry->remap.dst >> 8;
319 
320     unmap_vtd_domain_page(iremap_entries);
321     spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
322     return 0;
323 }
324 
ioapic_rte_to_remap_entry(struct iommu * iommu,int apic,unsigned int ioapic_pin,struct IO_xAPIC_route_entry * old_rte,unsigned int rte_upper,unsigned int value)325 static int ioapic_rte_to_remap_entry(struct iommu *iommu,
326     int apic, unsigned int ioapic_pin, struct IO_xAPIC_route_entry *old_rte,
327     unsigned int rte_upper, unsigned int value)
328 {
329     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
330     struct iremap_entry new_ire;
331     struct IO_APIC_route_remap_entry *remap_rte;
332     struct IO_xAPIC_route_entry new_rte;
333     int index;
334     unsigned long flags;
335     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
336     bool init = false;
337 
338     remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
339     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
340 
341     index = apic_pin_2_ir_idx[apic][ioapic_pin];
342     if ( index < 0 )
343     {
344         index = alloc_remap_entry(iommu, 1);
345         if ( index < IREMAP_ENTRY_NR )
346             apic_pin_2_ir_idx[apic][ioapic_pin] = index;
347         init = true;
348     }
349 
350     if ( index > IREMAP_ENTRY_NR - 1 )
351     {
352         dprintk(XENLOG_ERR VTDPREFIX,
353                 "IO-APIC intremap index (%d) larger than maximum index (%d)\n",
354                 index, IREMAP_ENTRY_NR - 1);
355         spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
356         return -EFAULT;
357     }
358 
359     GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
360                      iremap_entries, iremap_entry);
361 
362     new_ire = *iremap_entry;
363 
364     if ( rte_upper )
365     {
366         if ( x2apic_enabled )
367             new_ire.remap.dst = value;
368         else
369             new_ire.remap.dst = (value >> 24) << 8;
370     }
371     else
372     {
373         *(((u32 *)&new_rte) + 0) = value;
374         new_ire.remap.fpd = 0;
375         new_ire.remap.dm = new_rte.dest_mode;
376         new_ire.remap.tm = new_rte.trigger;
377         new_ire.remap.dlm = new_rte.delivery_mode;
378         /* Hardware require RH = 1 for LPR delivery mode */
379         new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
380         new_ire.remap.avail = 0;
381         new_ire.remap.res_1 = 0;
382         new_ire.remap.vector = new_rte.vector;
383         new_ire.remap.res_2 = 0;
384 
385         set_ioapic_source_id(IO_APIC_ID(apic), &new_ire);
386         new_ire.remap.res_3 = 0;
387         new_ire.remap.res_4 = 0;
388         new_ire.remap.p = 1;     /* finally, set present bit */
389 
390         /* now construct new ioapic rte entry */
391         remap_rte->vector = new_rte.vector;
392         remap_rte->delivery_mode = 0;    /* has to be 0 for remap format */
393         remap_rte->index_15 = (index >> 15) & 0x1;
394         remap_rte->index_0_14 = index & 0x7fff;
395 
396         remap_rte->delivery_status = new_rte.delivery_status;
397         remap_rte->polarity = new_rte.polarity;
398         remap_rte->irr = new_rte.irr;
399         remap_rte->trigger = new_rte.trigger;
400         remap_rte->mask = new_rte.mask;
401         remap_rte->reserved = 0;
402         remap_rte->format = 1;    /* indicate remap format */
403     }
404 
405     update_irte(iommu, iremap_entry, &new_ire, !init);
406     iommu_flush_cache_entry(iremap_entry, sizeof(*iremap_entry));
407     iommu_flush_iec_index(iommu, 0, index);
408 
409     unmap_vtd_domain_page(iremap_entries);
410     spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
411     return 0;
412 }
413 
io_apic_read_remap_rte(unsigned int apic,unsigned int reg)414 unsigned int io_apic_read_remap_rte(
415     unsigned int apic, unsigned int reg)
416 {
417     unsigned int ioapic_pin = (reg - 0x10) / 2;
418     int index;
419     struct IO_xAPIC_route_entry old_rte = { 0 };
420     int rte_upper = (reg & 1) ? 1 : 0;
421     struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
422     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
423 
424     if ( !ir_ctrl->iremap_num ||
425         ( (index = apic_pin_2_ir_idx[apic][ioapic_pin]) < 0 ) )
426         return __io_apic_read(apic, reg);
427 
428     old_rte = __ioapic_read_entry(apic, ioapic_pin, 1);
429 
430     if ( remap_entry_to_ioapic_rte(iommu, index, &old_rte) )
431         return __io_apic_read(apic, reg);
432 
433     if ( rte_upper )
434         return (*(((u32 *)&old_rte) + 1));
435     else
436         return (*(((u32 *)&old_rte) + 0));
437 }
438 
io_apic_write_remap_rte(unsigned int apic,unsigned int reg,unsigned int value)439 void io_apic_write_remap_rte(
440     unsigned int apic, unsigned int reg, unsigned int value)
441 {
442     unsigned int ioapic_pin = (reg - 0x10) / 2;
443     struct IO_xAPIC_route_entry old_rte = { 0 };
444     struct IO_APIC_route_remap_entry *remap_rte;
445     unsigned int rte_upper = (reg & 1) ? 1 : 0;
446     struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
447     int saved_mask;
448 
449     old_rte = __ioapic_read_entry(apic, ioapic_pin, 1);
450 
451     remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
452 
453     /* mask the interrupt while we change the intremap table */
454     saved_mask = remap_rte->mask;
455     remap_rte->mask = 1;
456     __io_apic_write(apic, reg & ~1, *(u32 *)&old_rte);
457     remap_rte->mask = saved_mask;
458 
459     if ( ioapic_rte_to_remap_entry(iommu, apic, ioapic_pin,
460                                    &old_rte, rte_upper, value) )
461     {
462         __io_apic_write(apic, reg, value);
463 
464         /* Recover the original value of 'mask' bit */
465         if ( rte_upper )
466             __io_apic_write(apic, reg & ~1, *(u32 *)&old_rte);
467     }
468     else
469         __ioapic_write_entry(apic, ioapic_pin, 1, old_rte);
470 }
471 
set_msi_source_id(struct pci_dev * pdev,struct iremap_entry * ire)472 static void set_msi_source_id(struct pci_dev *pdev, struct iremap_entry *ire)
473 {
474     u16 seg;
475     u8 bus, devfn, secbus;
476     int ret;
477 
478     if ( !pdev || !ire )
479         return;
480 
481     seg = pdev->seg;
482     bus = pdev->bus;
483     devfn = pdev->devfn;
484     switch ( pdev->type )
485     {
486         unsigned int sq;
487 
488     case DEV_TYPE_PCIe_ENDPOINT:
489     case DEV_TYPE_PCIe_BRIDGE:
490     case DEV_TYPE_PCIe2PCI_BRIDGE:
491     case DEV_TYPE_PCI_HOST_BRIDGE:
492         switch ( pdev->phantom_stride )
493         {
494         case 1: sq = SQ_13_IGNORE_3; break;
495         case 2: sq = SQ_13_IGNORE_2; break;
496         case 4: sq = SQ_13_IGNORE_1; break;
497         default: sq = SQ_ALL_16; break;
498         }
499         set_ire_sid(ire, SVT_VERIFY_SID_SQ, sq, PCI_BDF2(bus, devfn));
500         break;
501 
502     case DEV_TYPE_PCI:
503     case DEV_TYPE_LEGACY_PCI_BRIDGE:
504     case DEV_TYPE_PCI2PCIe_BRIDGE:
505         ret = find_upstream_bridge(seg, &bus, &devfn, &secbus);
506         if ( ret == 0 ) /* integrated PCI device */
507         {
508             set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16,
509                         PCI_BDF2(bus, devfn));
510         }
511         else if ( ret == 1 ) /* find upstream bridge */
512         {
513             if ( pdev_type(seg, bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE )
514                 set_ire_sid(ire, SVT_VERIFY_BUS, SQ_ALL_16,
515                             (bus << 8) | pdev->bus);
516             else
517                 set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16,
518                             PCI_BDF2(bus, devfn));
519         }
520         else
521             dprintk(XENLOG_WARNING VTDPREFIX,
522                     "d%d: no upstream bridge for %04x:%02x:%02x.%u\n",
523                     pdev->domain->domain_id,
524                     seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
525         break;
526 
527     default:
528         dprintk(XENLOG_WARNING VTDPREFIX,
529                 "d%d: unknown(%u): %04x:%02x:%02x.%u\n",
530                 pdev->domain->domain_id, pdev->type,
531                 seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
532         break;
533    }
534 }
535 
remap_entry_to_msi_msg(struct iommu * iommu,struct msi_msg * msg,unsigned int index)536 static int remap_entry_to_msi_msg(
537     struct iommu *iommu, struct msi_msg *msg, unsigned int index)
538 {
539     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
540     struct msi_msg_remap_entry *remap_rte;
541     unsigned long flags;
542     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
543 
544     remap_rte = (struct msi_msg_remap_entry *) msg;
545     index += (remap_rte->address_lo.index_15 << 15) |
546              remap_rte->address_lo.index_0_14;
547 
548     if ( index >= IREMAP_ENTRY_NR )
549     {
550         dprintk(XENLOG_ERR VTDPREFIX,
551                 "MSI index (%d) for remap table is invalid\n",
552                 index);
553         return -EFAULT;
554     }
555 
556     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
557 
558     GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
559                      iremap_entries, iremap_entry);
560 
561     if ( iremap_entry->val == 0 )
562     {
563         dprintk(XENLOG_ERR VTDPREFIX,
564                 "MSI index (%d) has an empty entry\n",
565                 index);
566         unmap_vtd_domain_page(iremap_entries);
567         spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
568         return -EFAULT;
569     }
570 
571     msg->address_hi = MSI_ADDR_BASE_HI;
572     msg->address_lo =
573         MSI_ADDR_BASE_LO |
574         ((iremap_entry->remap.dm == 0) ?
575             MSI_ADDR_DESTMODE_PHYS:
576             MSI_ADDR_DESTMODE_LOGIC) |
577         ((iremap_entry->remap.dlm != dest_LowestPrio) ?
578             MSI_ADDR_REDIRECTION_CPU:
579             MSI_ADDR_REDIRECTION_LOWPRI);
580     if ( x2apic_enabled )
581         msg->dest32 = iremap_entry->remap.dst;
582     else
583         msg->dest32 = (iremap_entry->remap.dst >> 8) & 0xff;
584     msg->address_lo |= MSI_ADDR_DEST_ID(msg->dest32);
585 
586     msg->data =
587         MSI_DATA_TRIGGER_EDGE |
588         MSI_DATA_LEVEL_ASSERT |
589         ((iremap_entry->remap.dlm != dest_LowestPrio) ?
590             MSI_DATA_DELIVERY_FIXED:
591             MSI_DATA_DELIVERY_LOWPRI) |
592         iremap_entry->remap.vector;
593 
594     unmap_vtd_domain_page(iremap_entries);
595     spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
596     return 0;
597 }
598 
msi_msg_to_remap_entry(struct iommu * iommu,struct pci_dev * pdev,struct msi_desc * msi_desc,struct msi_msg * msg)599 static int msi_msg_to_remap_entry(
600     struct iommu *iommu, struct pci_dev *pdev,
601     struct msi_desc *msi_desc, struct msi_msg *msg)
602 {
603     struct iremap_entry *iremap_entry = NULL, *iremap_entries, new_ire = { };
604     struct msi_msg_remap_entry *remap_rte;
605     unsigned int index, i, nr = 1;
606     unsigned long flags;
607     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
608     const struct pi_desc *pi_desc = msi_desc->pi_desc;
609 
610     if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
611         nr = msi_desc->msi.nvec;
612 
613     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
614 
615     if ( msg == NULL )
616     {
617         /* Free specified unused IRTEs */
618         for ( i = 0; i < nr; ++i )
619         {
620             free_remap_entry(iommu, msi_desc->remap_index + i);
621             msi_desc[i].irte_initialized = false;
622         }
623         spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
624         return 0;
625     }
626 
627     if ( msi_desc->remap_index < 0 )
628     {
629         index = alloc_remap_entry(iommu, nr);
630         for ( i = 0; i < nr; ++i )
631             msi_desc[i].remap_index = index + i;
632     }
633     else
634         index = msi_desc->remap_index;
635 
636     if ( index > IREMAP_ENTRY_NR - 1 )
637     {
638         dprintk(XENLOG_ERR VTDPREFIX,
639                 "MSI intremap index (%d) larger than maximum index (%d)!\n",
640                 index, IREMAP_ENTRY_NR - 1);
641         for ( i = 0; i < nr; ++i )
642             msi_desc[i].remap_index = -1;
643         spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
644         return -EFAULT;
645     }
646 
647     GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
648                      iremap_entries, iremap_entry);
649 
650     if ( !pi_desc )
651     {
652         new_ire.remap.dm = msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT;
653         new_ire.remap.tm = msg->data >> MSI_DATA_TRIGGER_SHIFT;
654         new_ire.remap.dlm = msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT;
655         /* Hardware requires RH = 1 for lowest priority delivery mode */
656         new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
657         new_ire.remap.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
658                                 MSI_DATA_VECTOR_MASK;
659         if ( x2apic_enabled )
660             new_ire.remap.dst = msg->dest32;
661         else
662             new_ire.remap.dst =
663                 MASK_EXTR(msg->address_lo, MSI_ADDR_DEST_ID_MASK) << 8;
664         new_ire.remap.p = 1;
665     }
666     else
667     {
668         new_ire.post.im = 1;
669         new_ire.post.vector = msi_desc->gvec;
670         new_ire.post.pda_l = virt_to_maddr(pi_desc) >> (32 - PDA_LOW_BIT);
671         new_ire.post.pda_h = virt_to_maddr(pi_desc) >> 32;
672         new_ire.post.p = 1;
673     }
674 
675     if ( pdev )
676         set_msi_source_id(pdev, &new_ire);
677     else
678         set_hpet_source_id(msi_desc->hpet_id, &new_ire);
679 
680     /* now construct new MSI/MSI-X rte entry */
681     remap_rte = (struct msi_msg_remap_entry *)msg;
682     remap_rte->address_lo.dontcare = 0;
683     i = index;
684     if ( !nr )
685         i -= msi_desc->msi_attrib.entry_nr;
686     remap_rte->address_lo.index_15 = (i >> 15) & 0x1;
687     remap_rte->address_lo.index_0_14 = i & 0x7fff;
688     remap_rte->address_lo.SHV = 1;
689     remap_rte->address_lo.format = 1;
690 
691     remap_rte->address_hi = 0;
692     remap_rte->data = index - i;
693 
694     update_irte(iommu, iremap_entry, &new_ire, msi_desc->irte_initialized);
695     msi_desc->irte_initialized = true;
696 
697     iommu_flush_cache_entry(iremap_entry, sizeof(*iremap_entry));
698     iommu_flush_iec_index(iommu, 0, index);
699 
700     unmap_vtd_domain_page(iremap_entries);
701     spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
702     return 0;
703 }
704 
msi_msg_read_remap_rte(struct msi_desc * msi_desc,struct msi_msg * msg)705 void msi_msg_read_remap_rte(
706     struct msi_desc *msi_desc, struct msi_msg *msg)
707 {
708     struct pci_dev *pdev = msi_desc->dev;
709     struct acpi_drhd_unit *drhd = NULL;
710 
711     drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
712                 : hpet_to_drhd(msi_desc->hpet_id);
713     if ( drhd )
714         remap_entry_to_msi_msg(drhd->iommu, msg,
715                                msi_desc->msi_attrib.type == PCI_CAP_ID_MSI
716                                ? msi_desc->msi_attrib.entry_nr : 0);
717 }
718 
msi_msg_write_remap_rte(struct msi_desc * msi_desc,struct msi_msg * msg)719 int msi_msg_write_remap_rte(
720     struct msi_desc *msi_desc, struct msi_msg *msg)
721 {
722     struct pci_dev *pdev = msi_desc->dev;
723     struct acpi_drhd_unit *drhd = NULL;
724 
725     drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
726                 : hpet_to_drhd(msi_desc->hpet_id);
727     return drhd ? msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, msg)
728                 : -EINVAL;
729 }
730 
intel_setup_hpet_msi(struct msi_desc * msi_desc)731 int __init intel_setup_hpet_msi(struct msi_desc *msi_desc)
732 {
733     struct iommu *iommu = hpet_to_iommu(msi_desc->hpet_id);
734     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
735     unsigned long flags;
736     int rc = 0;
737 
738     if ( !ir_ctrl || !ir_ctrl->iremap_maddr )
739         return 0;
740 
741     spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
742     msi_desc->remap_index = alloc_remap_entry(iommu, 1);
743     if ( msi_desc->remap_index >= IREMAP_ENTRY_NR )
744     {
745         dprintk(XENLOG_ERR VTDPREFIX,
746                 "HPET intremap index (%d) larger than maximum index (%d)!\n",
747                 msi_desc->remap_index, IREMAP_ENTRY_NR - 1);
748         msi_desc->remap_index = -1;
749         rc = -ENXIO;
750     }
751     spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
752 
753     return rc;
754 }
755 
enable_intremap(struct iommu * iommu,int eim)756 int enable_intremap(struct iommu *iommu, int eim)
757 {
758     struct acpi_drhd_unit *drhd;
759     struct ir_ctrl *ir_ctrl;
760     u32 sts, gcmd;
761     unsigned long flags;
762 
763     ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
764 
765     if ( !platform_supports_intremap() )
766     {
767         printk(XENLOG_ERR VTDPREFIX
768                " Platform firmware does not support interrupt remapping\n");
769         return -EINVAL;
770     }
771 
772     ir_ctrl = iommu_ir_ctrl(iommu);
773     sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
774 
775     /* Return if already enabled by Xen */
776     if ( (sts & DMA_GSTS_IRES) && ir_ctrl->iremap_maddr )
777         return 0;
778 
779     if ( !(sts & DMA_GSTS_QIES) )
780     {
781         printk(XENLOG_ERR VTDPREFIX
782                " Queued invalidation is not enabled on IOMMU #%u:"
783                " Should not enable interrupt remapping\n", iommu->index);
784         return -EINVAL;
785     }
786 
787     if ( !eim && (sts & DMA_GSTS_CFIS) )
788         printk(XENLOG_WARNING VTDPREFIX
789                " Compatibility Format Interrupts permitted on IOMMU #%u:"
790                " Device pass-through will be insecure\n", iommu->index);
791 
792     if ( ir_ctrl->iremap_maddr == 0 )
793     {
794         drhd = iommu_to_drhd(iommu);
795         ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, IREMAP_ARCH_PAGE_NR);
796         if ( ir_ctrl->iremap_maddr == 0 )
797         {
798             dprintk(XENLOG_WARNING VTDPREFIX,
799                     "Cannot allocate memory for ir_ctrl->iremap_maddr\n");
800             return -ENOMEM;
801         }
802         ir_ctrl->iremap_num = 0;
803     }
804 
805     /* set extended interrupt mode bit */
806     ir_ctrl->iremap_maddr |= eim ? IRTA_EIME : 0;
807 
808     spin_lock_irqsave(&iommu->register_lock, flags);
809 
810     /* set size of the interrupt remapping table */
811     ir_ctrl->iremap_maddr |= IRTA_REG_TABLE_SIZE;
812     dmar_writeq(iommu->reg, DMAR_IRTA_REG, ir_ctrl->iremap_maddr);
813 
814     /* set SIRTP */
815     gcmd = dmar_readl(iommu->reg, DMAR_GSTS_REG);
816     gcmd |= DMA_GCMD_SIRTP;
817     dmar_writel(iommu->reg, DMAR_GCMD_REG, gcmd);
818 
819     IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
820                   (sts & DMA_GSTS_SIRTPS), sts);
821     spin_unlock_irqrestore(&iommu->register_lock, flags);
822 
823     /* After set SIRTP, must globally invalidate the interrupt entry cache */
824     iommu_flush_iec_global(iommu);
825 
826     spin_lock_irqsave(&iommu->register_lock, flags);
827     /* enable interrupt remapping hardware */
828     gcmd |= DMA_GCMD_IRE;
829     dmar_writel(iommu->reg, DMAR_GCMD_REG, gcmd);
830 
831     IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
832                   (sts & DMA_GSTS_IRES), sts);
833     spin_unlock_irqrestore(&iommu->register_lock, flags);
834 
835     return init_apic_pin_2_ir_idx();
836 }
837 
disable_intremap(struct iommu * iommu)838 void disable_intremap(struct iommu *iommu)
839 {
840     u32 sts;
841     u64 irta;
842     unsigned long flags;
843 
844     if ( !ecap_intr_remap(iommu->ecap) )
845         return;
846 
847     spin_lock_irqsave(&iommu->register_lock, flags);
848     sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
849     if ( !(sts & DMA_GSTS_IRES) )
850         goto out;
851 
852     dmar_writel(iommu->reg, DMAR_GCMD_REG, sts & (~DMA_GCMD_IRE));
853 
854     IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
855                   !(sts & DMA_GSTS_IRES), sts);
856 
857     /* If we are disabling Interrupt Remapping, make sure we dont stay in
858      * Extended Interrupt Mode, as this is unaffected by the Interrupt
859      * Remapping flag in each DMAR Global Control Register.
860      * Specifically, local apics in xapic mode do not like interrupts delivered
861      * in x2apic mode.  Any code turning interrupt remapping back on will set
862      * EIME back correctly.
863      */
864     if ( !ecap_eim(iommu->ecap) )
865         goto out;
866 
867     /* Can't read the register unless we ecaps says we can */
868     irta = dmar_readl(iommu->reg, DMAR_IRTA_REG);
869     if ( !(irta & IRTA_EIME) )
870         goto out;
871 
872     dmar_writel(iommu->reg, DMAR_IRTA_REG, irta & ~IRTA_EIME);
873     IOMMU_WAIT_OP(iommu, DMAR_IRTA_REG, dmar_readl,
874                   !(irta & IRTA_EIME), irta);
875 
876 out:
877     spin_unlock_irqrestore(&iommu->register_lock, flags);
878 }
879 
880 /*
881  * This function is used to enable Interrupt remapping when
882  * enable x2apic
883  */
iommu_enable_x2apic_IR(void)884 int iommu_enable_x2apic_IR(void)
885 {
886     struct acpi_drhd_unit *drhd;
887     struct iommu *iommu;
888 
889     if ( system_state < SYS_STATE_active )
890     {
891         if ( !iommu_supports_eim() )
892             return -EOPNOTSUPP;
893 
894         if ( !platform_supports_x2apic() )
895             return -ENXIO;
896     }
897     else if ( !x2apic_enabled )
898         return -EOPNOTSUPP;
899 
900     for_each_drhd_unit ( drhd )
901     {
902         iommu = drhd->iommu;
903 
904         /* Clear previous faults */
905         clear_fault_bits(iommu);
906 
907         /*
908          * Disable interrupt remapping and queued invalidation if
909          * already enabled by BIOS
910          */
911         disable_intremap(iommu);
912         disable_qinval(iommu);
913     }
914 
915     /* Enable queue invalidation */
916     for_each_drhd_unit ( drhd )
917     {
918         iommu = drhd->iommu;
919         if ( enable_qinval(iommu) != 0 )
920         {
921             dprintk(XENLOG_INFO VTDPREFIX,
922                     "Failed to enable Queued Invalidation!\n");
923             return -EIO;
924         }
925     }
926 
927     /* Enable interrupt remapping */
928     for_each_drhd_unit ( drhd )
929     {
930         iommu = drhd->iommu;
931         if ( enable_intremap(iommu, 1) )
932         {
933             dprintk(XENLOG_INFO VTDPREFIX,
934                     "Failed to enable Interrupt Remapping!\n");
935             return -EIO;
936         }
937     }
938 
939     return 0;
940 }
941 
942 /*
943  * This function is used to disable Interrutp remapping when
944  * suspend local apic
945  */
iommu_disable_x2apic_IR(void)946 void iommu_disable_x2apic_IR(void)
947 {
948     struct acpi_drhd_unit *drhd;
949 
950     /* x2apic_enabled implies iommu_supports_eim(). */
951     if ( !x2apic_enabled )
952         return;
953 
954     for_each_drhd_unit ( drhd )
955         disable_intremap(drhd->iommu);
956 
957     for_each_drhd_unit ( drhd )
958         disable_qinval(drhd->iommu);
959 }
960 
961 /*
962  * This function is used to update the IRTE for posted-interrupt
963  * when guest changes MSI/MSI-X information.
964  */
pi_update_irte(const struct pi_desc * pi_desc,const struct pirq * pirq,const uint8_t gvec)965 int pi_update_irte(const struct pi_desc *pi_desc, const struct pirq *pirq,
966     const uint8_t gvec)
967 {
968     struct irq_desc *desc;
969     struct msi_desc *msi_desc;
970     int rc;
971 
972     desc = pirq_spin_lock_irq_desc(pirq, NULL);
973     if ( !desc )
974         return -EINVAL;
975 
976     msi_desc = desc->msi_desc;
977     if ( !msi_desc )
978     {
979         rc = -ENODEV;
980         goto unlock_out;
981     }
982     msi_desc->pi_desc = pi_desc;
983     msi_desc->gvec = gvec;
984 
985     spin_unlock_irq(&desc->lock);
986 
987     ASSERT(pcidevs_locked());
988     return iommu_update_ire_from_msi(msi_desc, &msi_desc->msg);
989 
990  unlock_out:
991     spin_unlock_irq(&desc->lock);
992 
993     return rc;
994 }
995