1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; If not, see <http://www.gnu.org/licenses/>.
15 *
16 * Copyright (C) Allen Kay <allen.m.kay@intel.com>
17 * Copyright (C) Xiaohui Xin <xiaohui.xin@intel.com>
18 */
19
20 #include <xen/irq.h>
21 #include <xen/sched.h>
22 #include <xen/iommu.h>
23 #include <xen/time.h>
24 #include <xen/list.h>
25 #include <xen/pci.h>
26 #include <xen/pci_regs.h>
27 #include "iommu.h"
28 #include "dmar.h"
29 #include "vtd.h"
30 #include "extern.h"
31
32 #include <asm/apic.h>
33 #include <asm/io_apic.h>
34 #define nr_ioapic_entries(i) nr_ioapic_entries[i]
35
36 /*
37 * source validation type (SVT)
38 */
39 #define SVT_NO_VERIFY 0x0 /* no verification is required */
40 #define SVT_VERIFY_SID_SQ 0x1 /* verify using SID and SQ fiels */
41 #define SVT_VERIFY_BUS 0x2 /* verify bus of request-id */
42
43 /*
44 * source-id qualifier (SQ)
45 */
46 #define SQ_ALL_16 0x0 /* verify all 16 bits of request-id */
47 #define SQ_13_IGNORE_1 0x1 /* verify most significant 13 bits, ignore
48 * the third least significant bit
49 */
50 #define SQ_13_IGNORE_2 0x2 /* verify most significant 13 bits, ignore
51 * the second and third least significant bits
52 */
53 #define SQ_13_IGNORE_3 0x3 /* verify most significant 13 bits, ignore
54 * the least three significant bits
55 */
56
57 /* apic_pin_2_ir_idx[apicid][pin] = interrupt remapping table index */
58 static int **apic_pin_2_ir_idx;
59
init_apic_pin_2_ir_idx(void)60 static int init_apic_pin_2_ir_idx(void)
61 {
62 int *_apic_pin_2_ir_idx;
63 unsigned int nr_pins, i;
64
65 /* Here we shouldn't need to re-init when resuming from S3. */
66 if ( apic_pin_2_ir_idx != NULL )
67 return 0;
68
69 nr_pins = 0;
70 for ( i = 0; i < nr_ioapics; i++ )
71 nr_pins += nr_ioapic_entries(i);
72
73 _apic_pin_2_ir_idx = xmalloc_array(int, nr_pins);
74 apic_pin_2_ir_idx = xmalloc_array(int *, nr_ioapics);
75 if ( (_apic_pin_2_ir_idx == NULL) || (apic_pin_2_ir_idx == NULL) )
76 {
77 xfree(_apic_pin_2_ir_idx);
78 xfree(apic_pin_2_ir_idx);
79 return -ENOMEM;
80 }
81
82 for ( i = 0; i < nr_pins; i++ )
83 _apic_pin_2_ir_idx[i] = -1;
84
85 nr_pins = 0;
86 for ( i = 0; i < nr_ioapics; i++ )
87 {
88 apic_pin_2_ir_idx[i] = &_apic_pin_2_ir_idx[nr_pins];
89 nr_pins += nr_ioapic_entries(i);
90 }
91
92 return 0;
93 }
94
apicid_to_bdf(int apic_id)95 static u16 apicid_to_bdf(int apic_id)
96 {
97 struct acpi_drhd_unit *drhd = ioapic_to_drhd(apic_id);
98 struct acpi_ioapic_unit *acpi_ioapic_unit;
99
100 list_for_each_entry ( acpi_ioapic_unit, &drhd->ioapic_list, list )
101 if ( acpi_ioapic_unit->apic_id == apic_id )
102 return acpi_ioapic_unit->ioapic.info;
103
104 dprintk(XENLOG_ERR VTDPREFIX, "Didn't find the bdf for the apic_id!\n");
105 return 0;
106 }
107
hpetid_to_bdf(unsigned int hpet_id)108 static u16 hpetid_to_bdf(unsigned int hpet_id)
109 {
110 struct acpi_drhd_unit *drhd = hpet_to_drhd(hpet_id);
111 struct acpi_hpet_unit *acpi_hpet_unit;
112
113 list_for_each_entry ( acpi_hpet_unit, &drhd->hpet_list, list )
114 if ( acpi_hpet_unit->id == hpet_id )
115 return acpi_hpet_unit->bdf;
116
117 dprintk(XENLOG_ERR VTDPREFIX, "Didn't find the bdf for HPET %u!\n", hpet_id);
118 return 0;
119 }
120
set_ire_sid(struct iremap_entry * ire,unsigned int svt,unsigned int sq,unsigned int sid)121 static void set_ire_sid(struct iremap_entry *ire,
122 unsigned int svt, unsigned int sq, unsigned int sid)
123 {
124 ire->remap.svt = svt;
125 ire->remap.sq = sq;
126 ire->remap.sid = sid;
127 }
128
set_ioapic_source_id(int apic_id,struct iremap_entry * ire)129 static void set_ioapic_source_id(int apic_id, struct iremap_entry *ire)
130 {
131 set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16,
132 apicid_to_bdf(apic_id));
133 }
134
set_hpet_source_id(unsigned int id,struct iremap_entry * ire)135 static void set_hpet_source_id(unsigned int id, struct iremap_entry *ire)
136 {
137 /*
138 * Should really use SQ_ALL_16. Some platforms are broken.
139 * While we figure out the right quirks for these broken platforms, use
140 * SQ_13_IGNORE_3 for now.
141 */
142 set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_13_IGNORE_3, hpetid_to_bdf(id));
143 }
144
iommu_supports_eim(void)145 bool_t __init iommu_supports_eim(void)
146 {
147 struct acpi_drhd_unit *drhd;
148 unsigned int apic;
149
150 if ( !iommu_qinval || !iommu_intremap || list_empty(&acpi_drhd_units) )
151 return 0;
152
153 /* We MUST have a DRHD unit for each IOAPIC. */
154 for ( apic = 0; apic < nr_ioapics; apic++ )
155 if ( !ioapic_to_drhd(IO_APIC_ID(apic)) )
156 {
157 dprintk(XENLOG_WARNING VTDPREFIX,
158 "There is not a DRHD for IOAPIC %#x (id: %#x)!\n",
159 apic, IO_APIC_ID(apic));
160 return 0;
161 }
162
163 for_each_drhd_unit ( drhd )
164 if ( !ecap_queued_inval(drhd->iommu->ecap) ||
165 !ecap_intr_remap(drhd->iommu->ecap) ||
166 !ecap_eim(drhd->iommu->ecap) )
167 return 0;
168
169 return 1;
170 }
171
172 /*
173 * Assume iremap_lock has been acquired. It is to make sure software will not
174 * change the same IRTE behind us. With this assumption, if only high qword or
175 * low qword in IRTE is to be updated, this function's atomic variant can
176 * present an atomic update to VT-d hardware even when cmpxchg16b
177 * instruction is not supported.
178 */
update_irte(struct iommu * iommu,struct iremap_entry * entry,const struct iremap_entry * new_ire,bool atomic)179 static void update_irte(struct iommu *iommu, struct iremap_entry *entry,
180 const struct iremap_entry *new_ire, bool atomic)
181 {
182 ASSERT(spin_is_locked(&iommu_ir_ctrl(iommu)->iremap_lock));
183
184 if ( cpu_has_cx16 )
185 {
186 __uint128_t ret;
187 struct iremap_entry old_ire;
188
189 old_ire = *entry;
190 ret = cmpxchg16b(entry, &old_ire, new_ire);
191
192 /*
193 * In the above, we use cmpxchg16 to atomically update the 128-bit
194 * IRTE, and the hardware cannot update the IRTE behind us, so
195 * the return value of cmpxchg16 should be the same as old_ire.
196 * This ASSERT validate it.
197 */
198 ASSERT(ret == old_ire.val);
199 }
200 else
201 {
202 /*
203 * VT-d hardware doesn't update IRTEs behind us, nor the software
204 * since we hold iremap_lock. If the caller wants VT-d hardware to
205 * always see a consistent entry, but we can't meet it, a bug will
206 * be raised.
207 */
208 if ( entry->lo == new_ire->lo )
209 write_atomic(&entry->hi, new_ire->hi);
210 else if ( entry->hi == new_ire->hi )
211 write_atomic(&entry->lo, new_ire->lo);
212 else if ( !atomic )
213 *entry = *new_ire;
214 else
215 BUG();
216 }
217 }
218
219 /* Mark specified intr remap entry as free */
free_remap_entry(struct iommu * iommu,int index)220 static void free_remap_entry(struct iommu *iommu, int index)
221 {
222 struct iremap_entry *iremap_entry = NULL, *iremap_entries, new_ire = { };
223 struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
224
225 if ( index < 0 || index > IREMAP_ENTRY_NR - 1 )
226 return;
227
228 ASSERT( spin_is_locked(&ir_ctrl->iremap_lock) );
229
230 GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
231 iremap_entries, iremap_entry);
232
233 update_irte(iommu, iremap_entry, &new_ire, false);
234 iommu_flush_cache_entry(iremap_entry, sizeof(*iremap_entry));
235 iommu_flush_iec_index(iommu, 0, index);
236
237 unmap_vtd_domain_page(iremap_entries);
238 ir_ctrl->iremap_num--;
239 }
240
241 /*
242 * Look for a free intr remap entry (or a contiguous set thereof).
243 * Need hold iremap_lock, and setup returned entry before releasing lock.
244 */
alloc_remap_entry(struct iommu * iommu,unsigned int nr)245 static unsigned int alloc_remap_entry(struct iommu *iommu, unsigned int nr)
246 {
247 struct iremap_entry *iremap_entries = NULL;
248 struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
249 unsigned int i, found;
250
251 ASSERT( spin_is_locked(&ir_ctrl->iremap_lock) );
252
253 for ( found = i = 0; i < IREMAP_ENTRY_NR; i++ )
254 {
255 struct iremap_entry *p;
256 if ( i % (1 << IREMAP_ENTRY_ORDER) == 0 )
257 {
258 /* This entry across page boundry */
259 if ( iremap_entries )
260 unmap_vtd_domain_page(iremap_entries);
261
262 GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, i,
263 iremap_entries, p);
264 }
265 else
266 p = &iremap_entries[i % (1 << IREMAP_ENTRY_ORDER)];
267
268 if ( p->val ) /* not a free entry */
269 found = 0;
270 else if ( ++found == nr )
271 break;
272 }
273
274 if ( iremap_entries )
275 unmap_vtd_domain_page(iremap_entries);
276
277 if ( i < IREMAP_ENTRY_NR )
278 ir_ctrl->iremap_num += nr;
279 return i;
280 }
281
remap_entry_to_ioapic_rte(struct iommu * iommu,int index,struct IO_xAPIC_route_entry * old_rte)282 static int remap_entry_to_ioapic_rte(
283 struct iommu *iommu, int index, struct IO_xAPIC_route_entry *old_rte)
284 {
285 struct iremap_entry *iremap_entry = NULL, *iremap_entries;
286 unsigned long flags;
287 struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
288
289 if ( index < 0 || index > IREMAP_ENTRY_NR - 1 )
290 {
291 dprintk(XENLOG_ERR VTDPREFIX,
292 "IO-APIC index (%d) for remap table is invalid\n",
293 index);
294 return -EFAULT;
295 }
296
297 spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
298
299 GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
300 iremap_entries, iremap_entry);
301
302 if ( iremap_entry->val == 0 )
303 {
304 dprintk(XENLOG_ERR VTDPREFIX,
305 "IO-APIC index (%d) has an empty entry\n",
306 index);
307 unmap_vtd_domain_page(iremap_entries);
308 spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
309 return -EFAULT;
310 }
311
312 old_rte->vector = iremap_entry->remap.vector;
313 old_rte->delivery_mode = iremap_entry->remap.dlm;
314 old_rte->dest_mode = iremap_entry->remap.dm;
315 old_rte->trigger = iremap_entry->remap.tm;
316 old_rte->__reserved_2 = 0;
317 old_rte->dest.logical.__reserved_1 = 0;
318 old_rte->dest.logical.logical_dest = iremap_entry->remap.dst >> 8;
319
320 unmap_vtd_domain_page(iremap_entries);
321 spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
322 return 0;
323 }
324
ioapic_rte_to_remap_entry(struct iommu * iommu,int apic,unsigned int ioapic_pin,struct IO_xAPIC_route_entry * old_rte,unsigned int rte_upper,unsigned int value)325 static int ioapic_rte_to_remap_entry(struct iommu *iommu,
326 int apic, unsigned int ioapic_pin, struct IO_xAPIC_route_entry *old_rte,
327 unsigned int rte_upper, unsigned int value)
328 {
329 struct iremap_entry *iremap_entry = NULL, *iremap_entries;
330 struct iremap_entry new_ire;
331 struct IO_APIC_route_remap_entry *remap_rte;
332 struct IO_xAPIC_route_entry new_rte;
333 int index;
334 unsigned long flags;
335 struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
336 bool init = false;
337
338 remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
339 spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
340
341 index = apic_pin_2_ir_idx[apic][ioapic_pin];
342 if ( index < 0 )
343 {
344 index = alloc_remap_entry(iommu, 1);
345 if ( index < IREMAP_ENTRY_NR )
346 apic_pin_2_ir_idx[apic][ioapic_pin] = index;
347 init = true;
348 }
349
350 if ( index > IREMAP_ENTRY_NR - 1 )
351 {
352 dprintk(XENLOG_ERR VTDPREFIX,
353 "IO-APIC intremap index (%d) larger than maximum index (%d)\n",
354 index, IREMAP_ENTRY_NR - 1);
355 spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
356 return -EFAULT;
357 }
358
359 GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
360 iremap_entries, iremap_entry);
361
362 new_ire = *iremap_entry;
363
364 if ( rte_upper )
365 {
366 if ( x2apic_enabled )
367 new_ire.remap.dst = value;
368 else
369 new_ire.remap.dst = (value >> 24) << 8;
370 }
371 else
372 {
373 *(((u32 *)&new_rte) + 0) = value;
374 new_ire.remap.fpd = 0;
375 new_ire.remap.dm = new_rte.dest_mode;
376 new_ire.remap.tm = new_rte.trigger;
377 new_ire.remap.dlm = new_rte.delivery_mode;
378 /* Hardware require RH = 1 for LPR delivery mode */
379 new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
380 new_ire.remap.avail = 0;
381 new_ire.remap.res_1 = 0;
382 new_ire.remap.vector = new_rte.vector;
383 new_ire.remap.res_2 = 0;
384
385 set_ioapic_source_id(IO_APIC_ID(apic), &new_ire);
386 new_ire.remap.res_3 = 0;
387 new_ire.remap.res_4 = 0;
388 new_ire.remap.p = 1; /* finally, set present bit */
389
390 /* now construct new ioapic rte entry */
391 remap_rte->vector = new_rte.vector;
392 remap_rte->delivery_mode = 0; /* has to be 0 for remap format */
393 remap_rte->index_15 = (index >> 15) & 0x1;
394 remap_rte->index_0_14 = index & 0x7fff;
395
396 remap_rte->delivery_status = new_rte.delivery_status;
397 remap_rte->polarity = new_rte.polarity;
398 remap_rte->irr = new_rte.irr;
399 remap_rte->trigger = new_rte.trigger;
400 remap_rte->mask = new_rte.mask;
401 remap_rte->reserved = 0;
402 remap_rte->format = 1; /* indicate remap format */
403 }
404
405 update_irte(iommu, iremap_entry, &new_ire, !init);
406 iommu_flush_cache_entry(iremap_entry, sizeof(*iremap_entry));
407 iommu_flush_iec_index(iommu, 0, index);
408
409 unmap_vtd_domain_page(iremap_entries);
410 spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
411 return 0;
412 }
413
io_apic_read_remap_rte(unsigned int apic,unsigned int reg)414 unsigned int io_apic_read_remap_rte(
415 unsigned int apic, unsigned int reg)
416 {
417 unsigned int ioapic_pin = (reg - 0x10) / 2;
418 int index;
419 struct IO_xAPIC_route_entry old_rte = { 0 };
420 int rte_upper = (reg & 1) ? 1 : 0;
421 struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
422 struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
423
424 if ( !ir_ctrl->iremap_num ||
425 ( (index = apic_pin_2_ir_idx[apic][ioapic_pin]) < 0 ) )
426 return __io_apic_read(apic, reg);
427
428 old_rte = __ioapic_read_entry(apic, ioapic_pin, 1);
429
430 if ( remap_entry_to_ioapic_rte(iommu, index, &old_rte) )
431 return __io_apic_read(apic, reg);
432
433 if ( rte_upper )
434 return (*(((u32 *)&old_rte) + 1));
435 else
436 return (*(((u32 *)&old_rte) + 0));
437 }
438
io_apic_write_remap_rte(unsigned int apic,unsigned int reg,unsigned int value)439 void io_apic_write_remap_rte(
440 unsigned int apic, unsigned int reg, unsigned int value)
441 {
442 unsigned int ioapic_pin = (reg - 0x10) / 2;
443 struct IO_xAPIC_route_entry old_rte = { 0 };
444 struct IO_APIC_route_remap_entry *remap_rte;
445 unsigned int rte_upper = (reg & 1) ? 1 : 0;
446 struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
447 int saved_mask;
448
449 old_rte = __ioapic_read_entry(apic, ioapic_pin, 1);
450
451 remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
452
453 /* mask the interrupt while we change the intremap table */
454 saved_mask = remap_rte->mask;
455 remap_rte->mask = 1;
456 __io_apic_write(apic, reg & ~1, *(u32 *)&old_rte);
457 remap_rte->mask = saved_mask;
458
459 if ( ioapic_rte_to_remap_entry(iommu, apic, ioapic_pin,
460 &old_rte, rte_upper, value) )
461 {
462 __io_apic_write(apic, reg, value);
463
464 /* Recover the original value of 'mask' bit */
465 if ( rte_upper )
466 __io_apic_write(apic, reg & ~1, *(u32 *)&old_rte);
467 }
468 else
469 __ioapic_write_entry(apic, ioapic_pin, 1, old_rte);
470 }
471
set_msi_source_id(struct pci_dev * pdev,struct iremap_entry * ire)472 static void set_msi_source_id(struct pci_dev *pdev, struct iremap_entry *ire)
473 {
474 u16 seg;
475 u8 bus, devfn, secbus;
476 int ret;
477
478 if ( !pdev || !ire )
479 return;
480
481 seg = pdev->seg;
482 bus = pdev->bus;
483 devfn = pdev->devfn;
484 switch ( pdev->type )
485 {
486 unsigned int sq;
487
488 case DEV_TYPE_PCIe_ENDPOINT:
489 case DEV_TYPE_PCIe_BRIDGE:
490 case DEV_TYPE_PCIe2PCI_BRIDGE:
491 case DEV_TYPE_PCI_HOST_BRIDGE:
492 switch ( pdev->phantom_stride )
493 {
494 case 1: sq = SQ_13_IGNORE_3; break;
495 case 2: sq = SQ_13_IGNORE_2; break;
496 case 4: sq = SQ_13_IGNORE_1; break;
497 default: sq = SQ_ALL_16; break;
498 }
499 set_ire_sid(ire, SVT_VERIFY_SID_SQ, sq, PCI_BDF2(bus, devfn));
500 break;
501
502 case DEV_TYPE_PCI:
503 case DEV_TYPE_LEGACY_PCI_BRIDGE:
504 case DEV_TYPE_PCI2PCIe_BRIDGE:
505 ret = find_upstream_bridge(seg, &bus, &devfn, &secbus);
506 if ( ret == 0 ) /* integrated PCI device */
507 {
508 set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16,
509 PCI_BDF2(bus, devfn));
510 }
511 else if ( ret == 1 ) /* find upstream bridge */
512 {
513 if ( pdev_type(seg, bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE )
514 set_ire_sid(ire, SVT_VERIFY_BUS, SQ_ALL_16,
515 (bus << 8) | pdev->bus);
516 else
517 set_ire_sid(ire, SVT_VERIFY_SID_SQ, SQ_ALL_16,
518 PCI_BDF2(bus, devfn));
519 }
520 else
521 dprintk(XENLOG_WARNING VTDPREFIX,
522 "d%d: no upstream bridge for %04x:%02x:%02x.%u\n",
523 pdev->domain->domain_id,
524 seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
525 break;
526
527 default:
528 dprintk(XENLOG_WARNING VTDPREFIX,
529 "d%d: unknown(%u): %04x:%02x:%02x.%u\n",
530 pdev->domain->domain_id, pdev->type,
531 seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
532 break;
533 }
534 }
535
remap_entry_to_msi_msg(struct iommu * iommu,struct msi_msg * msg,unsigned int index)536 static int remap_entry_to_msi_msg(
537 struct iommu *iommu, struct msi_msg *msg, unsigned int index)
538 {
539 struct iremap_entry *iremap_entry = NULL, *iremap_entries;
540 struct msi_msg_remap_entry *remap_rte;
541 unsigned long flags;
542 struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
543
544 remap_rte = (struct msi_msg_remap_entry *) msg;
545 index += (remap_rte->address_lo.index_15 << 15) |
546 remap_rte->address_lo.index_0_14;
547
548 if ( index >= IREMAP_ENTRY_NR )
549 {
550 dprintk(XENLOG_ERR VTDPREFIX,
551 "MSI index (%d) for remap table is invalid\n",
552 index);
553 return -EFAULT;
554 }
555
556 spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
557
558 GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
559 iremap_entries, iremap_entry);
560
561 if ( iremap_entry->val == 0 )
562 {
563 dprintk(XENLOG_ERR VTDPREFIX,
564 "MSI index (%d) has an empty entry\n",
565 index);
566 unmap_vtd_domain_page(iremap_entries);
567 spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
568 return -EFAULT;
569 }
570
571 msg->address_hi = MSI_ADDR_BASE_HI;
572 msg->address_lo =
573 MSI_ADDR_BASE_LO |
574 ((iremap_entry->remap.dm == 0) ?
575 MSI_ADDR_DESTMODE_PHYS:
576 MSI_ADDR_DESTMODE_LOGIC) |
577 ((iremap_entry->remap.dlm != dest_LowestPrio) ?
578 MSI_ADDR_REDIRECTION_CPU:
579 MSI_ADDR_REDIRECTION_LOWPRI);
580 if ( x2apic_enabled )
581 msg->dest32 = iremap_entry->remap.dst;
582 else
583 msg->dest32 = (iremap_entry->remap.dst >> 8) & 0xff;
584 msg->address_lo |= MSI_ADDR_DEST_ID(msg->dest32);
585
586 msg->data =
587 MSI_DATA_TRIGGER_EDGE |
588 MSI_DATA_LEVEL_ASSERT |
589 ((iremap_entry->remap.dlm != dest_LowestPrio) ?
590 MSI_DATA_DELIVERY_FIXED:
591 MSI_DATA_DELIVERY_LOWPRI) |
592 iremap_entry->remap.vector;
593
594 unmap_vtd_domain_page(iremap_entries);
595 spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
596 return 0;
597 }
598
msi_msg_to_remap_entry(struct iommu * iommu,struct pci_dev * pdev,struct msi_desc * msi_desc,struct msi_msg * msg)599 static int msi_msg_to_remap_entry(
600 struct iommu *iommu, struct pci_dev *pdev,
601 struct msi_desc *msi_desc, struct msi_msg *msg)
602 {
603 struct iremap_entry *iremap_entry = NULL, *iremap_entries, new_ire = { };
604 struct msi_msg_remap_entry *remap_rte;
605 unsigned int index, i, nr = 1;
606 unsigned long flags;
607 struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
608 const struct pi_desc *pi_desc = msi_desc->pi_desc;
609
610 if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
611 nr = msi_desc->msi.nvec;
612
613 spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
614
615 if ( msg == NULL )
616 {
617 /* Free specified unused IRTEs */
618 for ( i = 0; i < nr; ++i )
619 {
620 free_remap_entry(iommu, msi_desc->remap_index + i);
621 msi_desc[i].irte_initialized = false;
622 }
623 spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
624 return 0;
625 }
626
627 if ( msi_desc->remap_index < 0 )
628 {
629 index = alloc_remap_entry(iommu, nr);
630 for ( i = 0; i < nr; ++i )
631 msi_desc[i].remap_index = index + i;
632 }
633 else
634 index = msi_desc->remap_index;
635
636 if ( index > IREMAP_ENTRY_NR - 1 )
637 {
638 dprintk(XENLOG_ERR VTDPREFIX,
639 "MSI intremap index (%d) larger than maximum index (%d)!\n",
640 index, IREMAP_ENTRY_NR - 1);
641 for ( i = 0; i < nr; ++i )
642 msi_desc[i].remap_index = -1;
643 spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
644 return -EFAULT;
645 }
646
647 GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
648 iremap_entries, iremap_entry);
649
650 if ( !pi_desc )
651 {
652 new_ire.remap.dm = msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT;
653 new_ire.remap.tm = msg->data >> MSI_DATA_TRIGGER_SHIFT;
654 new_ire.remap.dlm = msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT;
655 /* Hardware requires RH = 1 for lowest priority delivery mode */
656 new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
657 new_ire.remap.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
658 MSI_DATA_VECTOR_MASK;
659 if ( x2apic_enabled )
660 new_ire.remap.dst = msg->dest32;
661 else
662 new_ire.remap.dst =
663 MASK_EXTR(msg->address_lo, MSI_ADDR_DEST_ID_MASK) << 8;
664 new_ire.remap.p = 1;
665 }
666 else
667 {
668 new_ire.post.im = 1;
669 new_ire.post.vector = msi_desc->gvec;
670 new_ire.post.pda_l = virt_to_maddr(pi_desc) >> (32 - PDA_LOW_BIT);
671 new_ire.post.pda_h = virt_to_maddr(pi_desc) >> 32;
672 new_ire.post.p = 1;
673 }
674
675 if ( pdev )
676 set_msi_source_id(pdev, &new_ire);
677 else
678 set_hpet_source_id(msi_desc->hpet_id, &new_ire);
679
680 /* now construct new MSI/MSI-X rte entry */
681 remap_rte = (struct msi_msg_remap_entry *)msg;
682 remap_rte->address_lo.dontcare = 0;
683 i = index;
684 if ( !nr )
685 i -= msi_desc->msi_attrib.entry_nr;
686 remap_rte->address_lo.index_15 = (i >> 15) & 0x1;
687 remap_rte->address_lo.index_0_14 = i & 0x7fff;
688 remap_rte->address_lo.SHV = 1;
689 remap_rte->address_lo.format = 1;
690
691 remap_rte->address_hi = 0;
692 remap_rte->data = index - i;
693
694 update_irte(iommu, iremap_entry, &new_ire, msi_desc->irte_initialized);
695 msi_desc->irte_initialized = true;
696
697 iommu_flush_cache_entry(iremap_entry, sizeof(*iremap_entry));
698 iommu_flush_iec_index(iommu, 0, index);
699
700 unmap_vtd_domain_page(iremap_entries);
701 spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
702 return 0;
703 }
704
msi_msg_read_remap_rte(struct msi_desc * msi_desc,struct msi_msg * msg)705 void msi_msg_read_remap_rte(
706 struct msi_desc *msi_desc, struct msi_msg *msg)
707 {
708 struct pci_dev *pdev = msi_desc->dev;
709 struct acpi_drhd_unit *drhd = NULL;
710
711 drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
712 : hpet_to_drhd(msi_desc->hpet_id);
713 if ( drhd )
714 remap_entry_to_msi_msg(drhd->iommu, msg,
715 msi_desc->msi_attrib.type == PCI_CAP_ID_MSI
716 ? msi_desc->msi_attrib.entry_nr : 0);
717 }
718
msi_msg_write_remap_rte(struct msi_desc * msi_desc,struct msi_msg * msg)719 int msi_msg_write_remap_rte(
720 struct msi_desc *msi_desc, struct msi_msg *msg)
721 {
722 struct pci_dev *pdev = msi_desc->dev;
723 struct acpi_drhd_unit *drhd = NULL;
724
725 drhd = pdev ? acpi_find_matched_drhd_unit(pdev)
726 : hpet_to_drhd(msi_desc->hpet_id);
727 return drhd ? msi_msg_to_remap_entry(drhd->iommu, pdev, msi_desc, msg)
728 : -EINVAL;
729 }
730
intel_setup_hpet_msi(struct msi_desc * msi_desc)731 int __init intel_setup_hpet_msi(struct msi_desc *msi_desc)
732 {
733 struct iommu *iommu = hpet_to_iommu(msi_desc->hpet_id);
734 struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
735 unsigned long flags;
736 int rc = 0;
737
738 if ( !ir_ctrl || !ir_ctrl->iremap_maddr )
739 return 0;
740
741 spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
742 msi_desc->remap_index = alloc_remap_entry(iommu, 1);
743 if ( msi_desc->remap_index >= IREMAP_ENTRY_NR )
744 {
745 dprintk(XENLOG_ERR VTDPREFIX,
746 "HPET intremap index (%d) larger than maximum index (%d)!\n",
747 msi_desc->remap_index, IREMAP_ENTRY_NR - 1);
748 msi_desc->remap_index = -1;
749 rc = -ENXIO;
750 }
751 spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
752
753 return rc;
754 }
755
enable_intremap(struct iommu * iommu,int eim)756 int enable_intremap(struct iommu *iommu, int eim)
757 {
758 struct acpi_drhd_unit *drhd;
759 struct ir_ctrl *ir_ctrl;
760 u32 sts, gcmd;
761 unsigned long flags;
762
763 ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
764
765 if ( !platform_supports_intremap() )
766 {
767 printk(XENLOG_ERR VTDPREFIX
768 " Platform firmware does not support interrupt remapping\n");
769 return -EINVAL;
770 }
771
772 ir_ctrl = iommu_ir_ctrl(iommu);
773 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
774
775 /* Return if already enabled by Xen */
776 if ( (sts & DMA_GSTS_IRES) && ir_ctrl->iremap_maddr )
777 return 0;
778
779 if ( !(sts & DMA_GSTS_QIES) )
780 {
781 printk(XENLOG_ERR VTDPREFIX
782 " Queued invalidation is not enabled on IOMMU #%u:"
783 " Should not enable interrupt remapping\n", iommu->index);
784 return -EINVAL;
785 }
786
787 if ( !eim && (sts & DMA_GSTS_CFIS) )
788 printk(XENLOG_WARNING VTDPREFIX
789 " Compatibility Format Interrupts permitted on IOMMU #%u:"
790 " Device pass-through will be insecure\n", iommu->index);
791
792 if ( ir_ctrl->iremap_maddr == 0 )
793 {
794 drhd = iommu_to_drhd(iommu);
795 ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, IREMAP_ARCH_PAGE_NR);
796 if ( ir_ctrl->iremap_maddr == 0 )
797 {
798 dprintk(XENLOG_WARNING VTDPREFIX,
799 "Cannot allocate memory for ir_ctrl->iremap_maddr\n");
800 return -ENOMEM;
801 }
802 ir_ctrl->iremap_num = 0;
803 }
804
805 /* set extended interrupt mode bit */
806 ir_ctrl->iremap_maddr |= eim ? IRTA_EIME : 0;
807
808 spin_lock_irqsave(&iommu->register_lock, flags);
809
810 /* set size of the interrupt remapping table */
811 ir_ctrl->iremap_maddr |= IRTA_REG_TABLE_SIZE;
812 dmar_writeq(iommu->reg, DMAR_IRTA_REG, ir_ctrl->iremap_maddr);
813
814 /* set SIRTP */
815 gcmd = dmar_readl(iommu->reg, DMAR_GSTS_REG);
816 gcmd |= DMA_GCMD_SIRTP;
817 dmar_writel(iommu->reg, DMAR_GCMD_REG, gcmd);
818
819 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
820 (sts & DMA_GSTS_SIRTPS), sts);
821 spin_unlock_irqrestore(&iommu->register_lock, flags);
822
823 /* After set SIRTP, must globally invalidate the interrupt entry cache */
824 iommu_flush_iec_global(iommu);
825
826 spin_lock_irqsave(&iommu->register_lock, flags);
827 /* enable interrupt remapping hardware */
828 gcmd |= DMA_GCMD_IRE;
829 dmar_writel(iommu->reg, DMAR_GCMD_REG, gcmd);
830
831 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
832 (sts & DMA_GSTS_IRES), sts);
833 spin_unlock_irqrestore(&iommu->register_lock, flags);
834
835 return init_apic_pin_2_ir_idx();
836 }
837
disable_intremap(struct iommu * iommu)838 void disable_intremap(struct iommu *iommu)
839 {
840 u32 sts;
841 u64 irta;
842 unsigned long flags;
843
844 if ( !ecap_intr_remap(iommu->ecap) )
845 return;
846
847 spin_lock_irqsave(&iommu->register_lock, flags);
848 sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
849 if ( !(sts & DMA_GSTS_IRES) )
850 goto out;
851
852 dmar_writel(iommu->reg, DMAR_GCMD_REG, sts & (~DMA_GCMD_IRE));
853
854 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
855 !(sts & DMA_GSTS_IRES), sts);
856
857 /* If we are disabling Interrupt Remapping, make sure we dont stay in
858 * Extended Interrupt Mode, as this is unaffected by the Interrupt
859 * Remapping flag in each DMAR Global Control Register.
860 * Specifically, local apics in xapic mode do not like interrupts delivered
861 * in x2apic mode. Any code turning interrupt remapping back on will set
862 * EIME back correctly.
863 */
864 if ( !ecap_eim(iommu->ecap) )
865 goto out;
866
867 /* Can't read the register unless we ecaps says we can */
868 irta = dmar_readl(iommu->reg, DMAR_IRTA_REG);
869 if ( !(irta & IRTA_EIME) )
870 goto out;
871
872 dmar_writel(iommu->reg, DMAR_IRTA_REG, irta & ~IRTA_EIME);
873 IOMMU_WAIT_OP(iommu, DMAR_IRTA_REG, dmar_readl,
874 !(irta & IRTA_EIME), irta);
875
876 out:
877 spin_unlock_irqrestore(&iommu->register_lock, flags);
878 }
879
880 /*
881 * This function is used to enable Interrupt remapping when
882 * enable x2apic
883 */
iommu_enable_x2apic_IR(void)884 int iommu_enable_x2apic_IR(void)
885 {
886 struct acpi_drhd_unit *drhd;
887 struct iommu *iommu;
888
889 if ( system_state < SYS_STATE_active )
890 {
891 if ( !iommu_supports_eim() )
892 return -EOPNOTSUPP;
893
894 if ( !platform_supports_x2apic() )
895 return -ENXIO;
896 }
897 else if ( !x2apic_enabled )
898 return -EOPNOTSUPP;
899
900 for_each_drhd_unit ( drhd )
901 {
902 iommu = drhd->iommu;
903
904 /* Clear previous faults */
905 clear_fault_bits(iommu);
906
907 /*
908 * Disable interrupt remapping and queued invalidation if
909 * already enabled by BIOS
910 */
911 disable_intremap(iommu);
912 disable_qinval(iommu);
913 }
914
915 /* Enable queue invalidation */
916 for_each_drhd_unit ( drhd )
917 {
918 iommu = drhd->iommu;
919 if ( enable_qinval(iommu) != 0 )
920 {
921 dprintk(XENLOG_INFO VTDPREFIX,
922 "Failed to enable Queued Invalidation!\n");
923 return -EIO;
924 }
925 }
926
927 /* Enable interrupt remapping */
928 for_each_drhd_unit ( drhd )
929 {
930 iommu = drhd->iommu;
931 if ( enable_intremap(iommu, 1) )
932 {
933 dprintk(XENLOG_INFO VTDPREFIX,
934 "Failed to enable Interrupt Remapping!\n");
935 return -EIO;
936 }
937 }
938
939 return 0;
940 }
941
942 /*
943 * This function is used to disable Interrutp remapping when
944 * suspend local apic
945 */
iommu_disable_x2apic_IR(void)946 void iommu_disable_x2apic_IR(void)
947 {
948 struct acpi_drhd_unit *drhd;
949
950 /* x2apic_enabled implies iommu_supports_eim(). */
951 if ( !x2apic_enabled )
952 return;
953
954 for_each_drhd_unit ( drhd )
955 disable_intremap(drhd->iommu);
956
957 for_each_drhd_unit ( drhd )
958 disable_qinval(drhd->iommu);
959 }
960
961 /*
962 * This function is used to update the IRTE for posted-interrupt
963 * when guest changes MSI/MSI-X information.
964 */
pi_update_irte(const struct pi_desc * pi_desc,const struct pirq * pirq,const uint8_t gvec)965 int pi_update_irte(const struct pi_desc *pi_desc, const struct pirq *pirq,
966 const uint8_t gvec)
967 {
968 struct irq_desc *desc;
969 struct msi_desc *msi_desc;
970 int rc;
971
972 desc = pirq_spin_lock_irq_desc(pirq, NULL);
973 if ( !desc )
974 return -EINVAL;
975
976 msi_desc = desc->msi_desc;
977 if ( !msi_desc )
978 {
979 rc = -ENODEV;
980 goto unlock_out;
981 }
982 msi_desc->pi_desc = pi_desc;
983 msi_desc->gvec = gvec;
984
985 spin_unlock_irq(&desc->lock);
986
987 ASSERT(pcidevs_locked());
988 return iommu_update_ire_from_msi(msi_desc, &msi_desc->msg);
989
990 unlock_out:
991 spin_unlock_irq(&desc->lock);
992
993 return rc;
994 }
995