1 /*
2 * Copyright (C) 2007 Advanced Micro Devices, Inc.
3 * Author: Leo Duran <leo.duran@amd.com>
4 * Author: Wei Wang <wei.wang2@amd.com> - adapted to xen
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include <xen/sched.h>
21 #include <xen/iocap.h>
22 #include <xen/pci.h>
23 #include <xen/pci_regs.h>
24 #include <xen/paging.h>
25 #include <xen/softirq.h>
26 #include <asm/amd-iommu.h>
27 #include <asm/hvm/svm/amd-iommu-proto.h>
28 #include "../ats.h"
29
30 static bool_t __read_mostly init_done;
31
find_iommu_for_device(int seg,int bdf)32 struct amd_iommu *find_iommu_for_device(int seg, int bdf)
33 {
34 struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg);
35
36 if ( !ivrs_mappings || bdf >= ivrs_bdf_entries )
37 return NULL;
38
39 if ( unlikely(!ivrs_mappings[bdf].iommu) && likely(init_done) )
40 {
41 unsigned int bd0 = bdf & ~PCI_FUNC(~0);
42
43 if ( ivrs_mappings[bd0].iommu )
44 {
45 struct ivrs_mappings tmp = ivrs_mappings[bd0];
46
47 tmp.iommu = NULL;
48 if ( tmp.dte_requestor_id == bd0 )
49 tmp.dte_requestor_id = bdf;
50 ivrs_mappings[bdf] = tmp;
51
52 printk(XENLOG_WARNING "%04x:%02x:%02x.%u not found in ACPI tables;"
53 " using same IOMMU as function 0\n",
54 seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf));
55
56 /* write iommu field last */
57 ivrs_mappings[bdf].iommu = ivrs_mappings[bd0].iommu;
58 }
59 }
60
61 return ivrs_mappings[bdf].iommu;
62 }
63
64 /*
65 * Some devices will use alias id and original device id to index interrupt
66 * table and I/O page table respectively. Such devices will have
67 * both alias entry and select entry in IVRS structure.
68 *
69 * Return original device id, if device has valid interrupt remapping
70 * table setup for both select entry and alias entry.
71 */
get_dma_requestor_id(u16 seg,u16 bdf)72 int get_dma_requestor_id(u16 seg, u16 bdf)
73 {
74 struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg);
75 int req_id;
76
77 BUG_ON ( bdf >= ivrs_bdf_entries );
78 req_id = ivrs_mappings[bdf].dte_requestor_id;
79 if ( (ivrs_mappings[bdf].intremap_table != NULL) &&
80 (ivrs_mappings[req_id].intremap_table != NULL) )
81 req_id = bdf;
82
83 return req_id;
84 }
85
is_translation_valid(u32 * entry)86 static int is_translation_valid(u32 *entry)
87 {
88 return (get_field_from_reg_u32(entry[0],
89 IOMMU_DEV_TABLE_VALID_MASK,
90 IOMMU_DEV_TABLE_VALID_SHIFT) &&
91 get_field_from_reg_u32(entry[0],
92 IOMMU_DEV_TABLE_TRANSLATION_VALID_MASK,
93 IOMMU_DEV_TABLE_TRANSLATION_VALID_SHIFT));
94 }
95
disable_translation(u32 * dte)96 static void disable_translation(u32 *dte)
97 {
98 u32 entry;
99
100 entry = dte[0];
101 set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
102 IOMMU_DEV_TABLE_TRANSLATION_VALID_MASK,
103 IOMMU_DEV_TABLE_TRANSLATION_VALID_SHIFT, &entry);
104 set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
105 IOMMU_DEV_TABLE_VALID_MASK,
106 IOMMU_DEV_TABLE_VALID_SHIFT, &entry);
107 dte[0] = entry;
108 }
109
amd_iommu_setup_domain_device(struct domain * domain,struct amd_iommu * iommu,u8 devfn,struct pci_dev * pdev)110 static void amd_iommu_setup_domain_device(
111 struct domain *domain, struct amd_iommu *iommu,
112 u8 devfn, struct pci_dev *pdev)
113 {
114 void *dte;
115 unsigned long flags;
116 int req_id, valid = 1;
117 int dte_i = 0;
118 u8 bus = pdev->bus;
119 const struct domain_iommu *hd = dom_iommu(domain);
120
121 BUG_ON( !hd->arch.root_table || !hd->arch.paging_mode ||
122 !iommu->dev_table.buffer );
123
124 if ( iommu_passthrough && is_hardware_domain(domain) )
125 valid = 0;
126
127 if ( ats_enabled )
128 dte_i = 1;
129
130 /* get device-table entry */
131 req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(bus, devfn));
132 dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
133
134 spin_lock_irqsave(&iommu->lock, flags);
135
136 if ( !is_translation_valid((u32 *)dte) )
137 {
138 /* bind DTE to domain page-tables */
139 amd_iommu_set_root_page_table(
140 (u32 *)dte, page_to_maddr(hd->arch.root_table), domain->domain_id,
141 hd->arch.paging_mode, valid);
142
143 if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
144 iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
145 iommu_dte_set_iotlb((u32 *)dte, dte_i);
146
147 amd_iommu_flush_device(iommu, req_id);
148
149 AMD_IOMMU_DEBUG("Setup I/O page table: device id = %#x, type = %#x, "
150 "root table = %#"PRIx64", "
151 "domain = %d, paging mode = %d\n",
152 req_id, pdev->type,
153 page_to_maddr(hd->arch.root_table),
154 domain->domain_id, hd->arch.paging_mode);
155 }
156
157 spin_unlock_irqrestore(&iommu->lock, flags);
158
159 ASSERT(pcidevs_locked());
160
161 if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
162 !pci_ats_enabled(iommu->seg, bus, pdev->devfn) )
163 {
164 if ( devfn == pdev->devfn )
165 enable_ats_device(pdev, &iommu->ats_devices);
166
167 amd_iommu_flush_iotlb(devfn, pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0);
168 }
169 }
170
amd_iov_detect(void)171 int __init amd_iov_detect(void)
172 {
173 INIT_LIST_HEAD(&amd_iommu_head);
174
175 if ( !iommu_enable && !iommu_intremap )
176 return 0;
177
178 if ( (amd_iommu_detect_acpi() !=0) || (iommu_found() == 0) )
179 {
180 printk("AMD-Vi: IOMMU not found!\n");
181 iommu_intremap = 0;
182 return -ENODEV;
183 }
184
185 if ( amd_iommu_init() != 0 )
186 {
187 printk("AMD-Vi: Error initialization\n");
188 return -ENODEV;
189 }
190
191 init_done = 1;
192
193 if ( !amd_iommu_perdev_intremap )
194 printk(XENLOG_WARNING "AMD-Vi: Using global interrupt remap table is not recommended (see XSA-36)!\n");
195 return scan_pci_devices();
196 }
197
amd_iommu_alloc_root(struct domain_iommu * hd)198 int amd_iommu_alloc_root(struct domain_iommu *hd)
199 {
200 if ( unlikely(!hd->arch.root_table) )
201 {
202 hd->arch.root_table = alloc_amd_iommu_pgtable();
203 if ( !hd->arch.root_table )
204 return -ENOMEM;
205 }
206
207 return 0;
208 }
209
allocate_domain_resources(struct domain_iommu * hd)210 static int __must_check allocate_domain_resources(struct domain_iommu *hd)
211 {
212 int rc;
213
214 spin_lock(&hd->arch.mapping_lock);
215 rc = amd_iommu_alloc_root(hd);
216 spin_unlock(&hd->arch.mapping_lock);
217
218 return rc;
219 }
220
get_paging_mode(unsigned long entries)221 static int get_paging_mode(unsigned long entries)
222 {
223 int level = 1;
224
225 BUG_ON( !entries );
226
227 while ( entries > PTE_PER_TABLE_SIZE )
228 {
229 entries = PTE_PER_TABLE_ALIGN(entries) >> PTE_PER_TABLE_SHIFT;
230 if ( ++level > 6 )
231 return -ENOMEM;
232 }
233
234 return level;
235 }
236
amd_iommu_domain_init(struct domain * d)237 static int amd_iommu_domain_init(struct domain *d)
238 {
239 struct domain_iommu *hd = dom_iommu(d);
240
241 /* For pv and dom0, stick with get_paging_mode(max_page)
242 * For HVM dom0, use 2 level page table at first */
243 hd->arch.paging_mode = is_hvm_domain(d) ?
244 IOMMU_PAGING_MODE_LEVEL_2 :
245 get_paging_mode(max_page);
246 return 0;
247 }
248
249 static int amd_iommu_add_device(u8 devfn, struct pci_dev *pdev);
250
amd_iommu_hwdom_init(struct domain * d)251 static void __hwdom_init amd_iommu_hwdom_init(struct domain *d)
252 {
253 unsigned long i;
254 const struct amd_iommu *iommu;
255
256 if ( allocate_domain_resources(dom_iommu(d)) )
257 BUG();
258
259 if ( !iommu_passthrough && !need_iommu(d) )
260 {
261 int rc = 0;
262
263 /* Set up 1:1 page table for dom0 */
264 for ( i = 0; i < max_pdx; i++ )
265 {
266 unsigned long pfn = pdx_to_pfn(i);
267
268 /*
269 * XXX Should we really map all non-RAM (above 4G)? Minimally
270 * a pfn_valid() check would seem desirable here.
271 */
272 if ( mfn_valid(_mfn(pfn)) )
273 {
274 int ret = amd_iommu_map_page(d, pfn, pfn,
275 IOMMUF_readable|IOMMUF_writable);
276
277 if ( !rc )
278 rc = ret;
279 }
280
281 if ( !(i & 0xfffff) )
282 process_pending_softirqs();
283 }
284
285 if ( rc )
286 AMD_IOMMU_DEBUG("d%d: IOMMU mapping failed: %d\n",
287 d->domain_id, rc);
288 }
289
290 for_each_amd_iommu ( iommu )
291 if ( iomem_deny_access(d, PFN_DOWN(iommu->mmio_base_phys),
292 PFN_DOWN(iommu->mmio_base_phys +
293 IOMMU_MMIO_REGION_LENGTH - 1)) )
294 BUG();
295
296 setup_hwdom_pci_devices(d, amd_iommu_add_device);
297 }
298
amd_iommu_disable_domain_device(struct domain * domain,struct amd_iommu * iommu,u8 devfn,struct pci_dev * pdev)299 void amd_iommu_disable_domain_device(struct domain *domain,
300 struct amd_iommu *iommu,
301 u8 devfn, struct pci_dev *pdev)
302 {
303 void *dte;
304 unsigned long flags;
305 int req_id;
306 u8 bus = pdev->bus;
307
308 BUG_ON ( iommu->dev_table.buffer == NULL );
309 req_id = get_dma_requestor_id(iommu->seg, PCI_BDF2(bus, devfn));
310 dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
311
312 spin_lock_irqsave(&iommu->lock, flags);
313 if ( is_translation_valid((u32 *)dte) )
314 {
315 disable_translation((u32 *)dte);
316
317 if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
318 iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
319 iommu_dte_set_iotlb((u32 *)dte, 0);
320
321 amd_iommu_flush_device(iommu, req_id);
322
323 AMD_IOMMU_DEBUG("Disable: device id = %#x, "
324 "domain = %d, paging mode = %d\n",
325 req_id, domain->domain_id,
326 dom_iommu(domain)->arch.paging_mode);
327 }
328 spin_unlock_irqrestore(&iommu->lock, flags);
329
330 ASSERT(pcidevs_locked());
331
332 if ( devfn == pdev->devfn &&
333 pci_ats_device(iommu->seg, bus, devfn) &&
334 pci_ats_enabled(iommu->seg, bus, devfn) )
335 disable_ats_device(pdev);
336 }
337
reassign_device(struct domain * source,struct domain * target,u8 devfn,struct pci_dev * pdev)338 static int reassign_device(struct domain *source, struct domain *target,
339 u8 devfn, struct pci_dev *pdev)
340 {
341 struct amd_iommu *iommu;
342 int bdf, rc;
343 struct domain_iommu *t = dom_iommu(target);
344
345 bdf = PCI_BDF2(pdev->bus, pdev->devfn);
346 iommu = find_iommu_for_device(pdev->seg, bdf);
347 if ( !iommu )
348 {
349 AMD_IOMMU_DEBUG("Fail to find iommu."
350 " %04x:%02x:%x02.%x cannot be assigned to dom%d\n",
351 pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
352 target->domain_id);
353 return -ENODEV;
354 }
355
356 amd_iommu_disable_domain_device(source, iommu, devfn, pdev);
357
358 if ( devfn == pdev->devfn )
359 {
360 list_move(&pdev->domain_list, &target->arch.pdev_list);
361 pdev->domain = target;
362 }
363
364 rc = allocate_domain_resources(t);
365 if ( rc )
366 return rc;
367
368 amd_iommu_setup_domain_device(target, iommu, devfn, pdev);
369 AMD_IOMMU_DEBUG("Re-assign %04x:%02x:%02x.%u from dom%d to dom%d\n",
370 pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
371 source->domain_id, target->domain_id);
372
373 return 0;
374 }
375
amd_iommu_assign_device(struct domain * d,u8 devfn,struct pci_dev * pdev,u32 flag)376 static int amd_iommu_assign_device(struct domain *d, u8 devfn,
377 struct pci_dev *pdev,
378 u32 flag)
379 {
380 struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(pdev->seg);
381 int bdf = PCI_BDF2(pdev->bus, devfn);
382 int req_id = get_dma_requestor_id(pdev->seg, bdf);
383
384 if ( ivrs_mappings[req_id].unity_map_enable )
385 {
386 amd_iommu_reserve_domain_unity_map(
387 d,
388 ivrs_mappings[req_id].addr_range_start,
389 ivrs_mappings[req_id].addr_range_length,
390 ivrs_mappings[req_id].write_permission,
391 ivrs_mappings[req_id].read_permission);
392 }
393
394 return reassign_device(hardware_domain, d, devfn, pdev);
395 }
396
deallocate_next_page_table(struct page_info * pg,int level)397 static void deallocate_next_page_table(struct page_info *pg, int level)
398 {
399 PFN_ORDER(pg) = level;
400 spin_lock(&iommu_pt_cleanup_lock);
401 page_list_add_tail(pg, &iommu_pt_cleanup_list);
402 spin_unlock(&iommu_pt_cleanup_lock);
403 }
404
deallocate_page_table(struct page_info * pg)405 static void deallocate_page_table(struct page_info *pg)
406 {
407 void *table_vaddr, *pde;
408 u64 next_table_maddr;
409 unsigned int index, level = PFN_ORDER(pg), next_level;
410
411 PFN_ORDER(pg) = 0;
412
413 if ( level <= 1 )
414 {
415 free_amd_iommu_pgtable(pg);
416 return;
417 }
418
419 table_vaddr = __map_domain_page(pg);
420
421 for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
422 {
423 pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
424 next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
425 next_level = iommu_next_level((u32*)pde);
426
427 if ( (next_table_maddr != 0) && (next_level != 0) &&
428 iommu_is_pte_present((u32*)pde) )
429 {
430 /* We do not support skip levels yet */
431 ASSERT(next_level == level - 1);
432 deallocate_next_page_table(maddr_to_page(next_table_maddr),
433 next_level);
434 }
435 }
436
437 unmap_domain_page(table_vaddr);
438 free_amd_iommu_pgtable(pg);
439 }
440
deallocate_iommu_page_tables(struct domain * d)441 static void deallocate_iommu_page_tables(struct domain *d)
442 {
443 struct domain_iommu *hd = dom_iommu(d);
444
445 if ( iommu_use_hap_pt(d) )
446 return;
447
448 spin_lock(&hd->arch.mapping_lock);
449 if ( hd->arch.root_table )
450 {
451 deallocate_next_page_table(hd->arch.root_table, hd->arch.paging_mode);
452 hd->arch.root_table = NULL;
453 }
454 spin_unlock(&hd->arch.mapping_lock);
455 }
456
457
amd_iommu_domain_destroy(struct domain * d)458 static void amd_iommu_domain_destroy(struct domain *d)
459 {
460 deallocate_iommu_page_tables(d);
461 amd_iommu_flush_all_pages(d);
462 }
463
amd_iommu_add_device(u8 devfn,struct pci_dev * pdev)464 static int amd_iommu_add_device(u8 devfn, struct pci_dev *pdev)
465 {
466 struct amd_iommu *iommu;
467 u16 bdf;
468
469 if ( !pdev->domain )
470 return -EINVAL;
471
472 bdf = PCI_BDF2(pdev->bus, pdev->devfn);
473 iommu = find_iommu_for_device(pdev->seg, bdf);
474 if ( unlikely(!iommu) )
475 {
476 /* Filter bridge devices. */
477 if ( pdev->type == DEV_TYPE_PCI_HOST_BRIDGE &&
478 is_hardware_domain(pdev->domain) )
479 {
480 AMD_IOMMU_DEBUG("Skipping host bridge %04x:%02x:%02x.%u\n",
481 pdev->seg, pdev->bus, PCI_SLOT(devfn),
482 PCI_FUNC(devfn));
483 return 0;
484 }
485
486 AMD_IOMMU_DEBUG("No iommu for %04x:%02x:%02x.%u; cannot be handed to d%d\n",
487 pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
488 pdev->domain->domain_id);
489 return -ENODEV;
490 }
491
492 amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev);
493 return 0;
494 }
495
amd_iommu_remove_device(u8 devfn,struct pci_dev * pdev)496 static int amd_iommu_remove_device(u8 devfn, struct pci_dev *pdev)
497 {
498 struct amd_iommu *iommu;
499 u16 bdf;
500 if ( !pdev->domain )
501 return -EINVAL;
502
503 bdf = PCI_BDF2(pdev->bus, pdev->devfn);
504 iommu = find_iommu_for_device(pdev->seg, bdf);
505 if ( !iommu )
506 {
507 AMD_IOMMU_DEBUG("Fail to find iommu."
508 " %04x:%02x:%02x.%u cannot be removed from dom%d\n",
509 pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
510 pdev->domain->domain_id);
511 return -ENODEV;
512 }
513
514 amd_iommu_disable_domain_device(pdev->domain, iommu, devfn, pdev);
515 return 0;
516 }
517
amd_iommu_group_id(u16 seg,u8 bus,u8 devfn)518 static int amd_iommu_group_id(u16 seg, u8 bus, u8 devfn)
519 {
520 int bdf = PCI_BDF2(bus, devfn);
521
522 return (bdf < ivrs_bdf_entries) ? get_dma_requestor_id(seg, bdf) : bdf;
523 }
524
525 #include <asm/io_apic.h>
526
amd_dump_p2m_table_level(struct page_info * pg,int level,paddr_t gpa,int indent)527 static void amd_dump_p2m_table_level(struct page_info* pg, int level,
528 paddr_t gpa, int indent)
529 {
530 paddr_t address;
531 void *table_vaddr, *pde;
532 paddr_t next_table_maddr;
533 int index, next_level, present;
534 u32 *entry;
535
536 if ( level < 1 )
537 return;
538
539 table_vaddr = __map_domain_page(pg);
540 if ( table_vaddr == NULL )
541 {
542 printk("Failed to map IOMMU domain page %"PRIpaddr"\n",
543 page_to_maddr(pg));
544 return;
545 }
546
547 for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
548 {
549 if ( !(index % 2) )
550 process_pending_softirqs();
551
552 pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
553 next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
554 entry = (u32*)pde;
555
556 present = get_field_from_reg_u32(entry[0],
557 IOMMU_PDE_PRESENT_MASK,
558 IOMMU_PDE_PRESENT_SHIFT);
559
560 if ( !present )
561 continue;
562
563 next_level = get_field_from_reg_u32(entry[0],
564 IOMMU_PDE_NEXT_LEVEL_MASK,
565 IOMMU_PDE_NEXT_LEVEL_SHIFT);
566
567 if ( next_level && (next_level != (level - 1)) )
568 {
569 printk("IOMMU p2m table error. next_level = %d, expected %d\n",
570 next_level, level - 1);
571
572 continue;
573 }
574
575 address = gpa + amd_offset_level_address(index, level);
576 if ( next_level >= 1 )
577 amd_dump_p2m_table_level(
578 maddr_to_page(next_table_maddr), next_level,
579 address, indent + 1);
580 else
581 printk("%*sgfn: %08lx mfn: %08lx\n",
582 indent, "",
583 (unsigned long)PFN_DOWN(address),
584 (unsigned long)PFN_DOWN(next_table_maddr));
585 }
586
587 unmap_domain_page(table_vaddr);
588 }
589
amd_dump_p2m_table(struct domain * d)590 static void amd_dump_p2m_table(struct domain *d)
591 {
592 const struct domain_iommu *hd = dom_iommu(d);
593
594 if ( !hd->arch.root_table )
595 return;
596
597 printk("p2m table has %d levels\n", hd->arch.paging_mode);
598 amd_dump_p2m_table_level(hd->arch.root_table, hd->arch.paging_mode, 0, 0);
599 }
600
601 const struct iommu_ops amd_iommu_ops = {
602 .init = amd_iommu_domain_init,
603 .hwdom_init = amd_iommu_hwdom_init,
604 .add_device = amd_iommu_add_device,
605 .remove_device = amd_iommu_remove_device,
606 .assign_device = amd_iommu_assign_device,
607 .teardown = amd_iommu_domain_destroy,
608 .map_page = amd_iommu_map_page,
609 .unmap_page = amd_iommu_unmap_page,
610 .free_page_table = deallocate_page_table,
611 .reassign_device = reassign_device,
612 .get_device_group_id = amd_iommu_group_id,
613 .update_ire_from_apic = amd_iommu_ioapic_update_ire,
614 .update_ire_from_msi = amd_iommu_msi_msg_update_ire,
615 .read_apic_from_ire = amd_iommu_read_ioapic_from_ire,
616 .read_msi_from_ire = amd_iommu_read_msi_from_ire,
617 .setup_hpet_msi = amd_setup_hpet_msi,
618 .suspend = amd_iommu_suspend,
619 .resume = amd_iommu_resume,
620 .share_p2m = amd_iommu_share_p2m,
621 .crash_shutdown = amd_iommu_crash_shutdown,
622 .dump_p2m_table = amd_dump_p2m_table,
623 };
624