1 /*
2 * Copyright (C) 2007 Advanced Micro Devices, Inc.
3 * Author: Leo Duran <leo.duran@amd.com>
4 * Author: Wei Wang <wei.wang2@amd.com> - adapted to xen
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include <xen/acpi.h>
21
22 #include "iommu.h"
23
24 #define CONTIG_MASK IOMMU_PTE_CONTIG_MASK
25 #include <asm/pt-contig-markers.h>
26
27 /* Given pfn and page table level, return pde index */
pfn_to_pde_idx(unsigned long pfn,unsigned int level)28 static unsigned int pfn_to_pde_idx(unsigned long pfn, unsigned int level)
29 {
30 unsigned int idx;
31
32 idx = pfn >> (PTE_PER_TABLE_SHIFT * (--level));
33 idx &= ~PTE_PER_TABLE_MASK;
34 return idx;
35 }
36
clear_iommu_pte_present(unsigned long l1_mfn,unsigned long dfn,unsigned int level,bool * free)37 static union amd_iommu_pte clear_iommu_pte_present(unsigned long l1_mfn,
38 unsigned long dfn,
39 unsigned int level,
40 bool *free)
41 {
42 union amd_iommu_pte *table, *pte, old;
43 unsigned int idx = pfn_to_pde_idx(dfn, level);
44
45 table = map_domain_page(_mfn(l1_mfn));
46 pte = &table[idx];
47 old = *pte;
48
49 write_atomic(&pte->raw, 0);
50
51 *free = pt_update_contig_markers(&table->raw, idx, level, PTE_kind_null);
52
53 unmap_domain_page(table);
54
55 return old;
56 }
57
set_iommu_pde_present(union amd_iommu_pte * pte,unsigned long next_mfn,unsigned int next_level,bool iw,bool ir)58 static void set_iommu_pde_present(union amd_iommu_pte *pte,
59 unsigned long next_mfn,
60 unsigned int next_level,
61 bool iw, bool ir)
62 {
63 union amd_iommu_pte new = {};
64
65 /*
66 * FC bit should be enabled in PTE, this helps to solve potential
67 * issues with ATS devices
68 */
69 new.fc = !next_level;
70
71 new.mfn = next_mfn;
72 new.iw = iw;
73 new.ir = ir;
74 new.next_level = next_level;
75 new.pr = true;
76
77 write_atomic(&pte->raw, new.raw);
78 }
79
set_iommu_pte_present(unsigned long pt_mfn,unsigned long dfn,unsigned long next_mfn,unsigned int level,bool iw,bool ir,bool * contig)80 static union amd_iommu_pte set_iommu_pte_present(unsigned long pt_mfn,
81 unsigned long dfn,
82 unsigned long next_mfn,
83 unsigned int level,
84 bool iw, bool ir,
85 bool *contig)
86 {
87 union amd_iommu_pte *table, *pde, old;
88
89 table = map_domain_page(_mfn(pt_mfn));
90 pde = &table[pfn_to_pde_idx(dfn, level)];
91
92 old = *pde;
93 if ( !old.pr || old.next_level ||
94 old.mfn != next_mfn ||
95 old.iw != iw || old.ir != ir )
96 {
97 set_iommu_pde_present(pde, next_mfn, 0, iw, ir);
98 *contig = pt_update_contig_markers(&table->raw,
99 pfn_to_pde_idx(dfn, level),
100 level, PTE_kind_leaf);
101 }
102 else
103 {
104 old.pr = false; /* signal "no change" to the caller */
105 *contig = false;
106 }
107
108 unmap_domain_page(table);
109
110 return old;
111 }
112
set_iommu_ptes_present(unsigned long pt_mfn,unsigned long dfn,unsigned long next_mfn,unsigned int nr_ptes,unsigned int pde_level,bool iw,bool ir)113 static void set_iommu_ptes_present(unsigned long pt_mfn,
114 unsigned long dfn,
115 unsigned long next_mfn,
116 unsigned int nr_ptes,
117 unsigned int pde_level,
118 bool iw, bool ir)
119 {
120 union amd_iommu_pte *table, *pde;
121 unsigned long page_sz = 1UL << (PTE_PER_TABLE_SHIFT * (pde_level - 1));
122
123 table = map_domain_page(_mfn(pt_mfn));
124 pde = &table[pfn_to_pde_idx(dfn, pde_level)];
125
126 if ( (void *)(pde + nr_ptes) > (void *)table + PAGE_SIZE )
127 {
128 ASSERT_UNREACHABLE();
129 return;
130 }
131
132 ASSERT(!(next_mfn & (page_sz - 1)));
133
134 while ( nr_ptes-- )
135 {
136 ASSERT(!pde->next_level);
137 ASSERT(!pde->u);
138
139 if ( pde > table )
140 ASSERT(pde->ign0 == ffs(pde - table) - 1);
141 else
142 ASSERT(pde->ign0 == CONTIG_LEVEL_SHIFT);
143
144 pde->iw = iw;
145 pde->ir = ir;
146 pde->fc = true; /* See set_iommu_pde_present(). */
147 pde->mfn = next_mfn;
148 pde->pr = true;
149
150 ++pde;
151 next_mfn += page_sz;
152 }
153
154 unmap_domain_page(table);
155 }
156
157 /*
158 * This function returns
159 * - -errno for errors,
160 * - 0 for a successful update, atomic when necessary
161 * - 1 for a successful but non-atomic update, which may need to be warned
162 * about by the caller.
163 */
amd_iommu_set_root_page_table(struct amd_iommu_dte * dte,uint64_t root_ptr,uint16_t domain_id,uint8_t paging_mode,unsigned int flags)164 int amd_iommu_set_root_page_table(struct amd_iommu_dte *dte,
165 uint64_t root_ptr, uint16_t domain_id,
166 uint8_t paging_mode, unsigned int flags)
167 {
168 bool valid = flags & SET_ROOT_VALID;
169
170 if ( dte->v && dte->tv &&
171 (cpu_has_cx16 || (flags & SET_ROOT_WITH_UNITY_MAP)) )
172 {
173 union {
174 struct amd_iommu_dte dte;
175 uint64_t raw64[4];
176 __uint128_t raw128[2];
177 } ldte = { .dte = *dte };
178 __uint128_t old = ldte.raw128[0];
179 int ret = 0;
180
181 ldte.dte.domain_id = domain_id;
182 ldte.dte.pt_root = paddr_to_pfn(root_ptr);
183 ldte.dte.iw = true;
184 ldte.dte.ir = true;
185 ldte.dte.paging_mode = paging_mode;
186 ldte.dte.v = valid;
187
188 if ( cpu_has_cx16 )
189 {
190 __uint128_t res = cmpxchg16b(dte, &old, &ldte.raw128[0]);
191
192 /*
193 * Hardware does not update the DTE behind our backs, so the
194 * return value should match "old".
195 */
196 if ( res != old )
197 {
198 printk(XENLOG_ERR
199 "Dom%d: unexpected DTE %016lx_%016lx (expected %016lx_%016lx)\n",
200 domain_id,
201 (uint64_t)(res >> 64), (uint64_t)res,
202 (uint64_t)(old >> 64), (uint64_t)old);
203 ret = -EILSEQ;
204 }
205 }
206 else /* Best effort, updating domain_id last. */
207 {
208 uint64_t *ptr = (void *)dte;
209
210 write_atomic(ptr + 0, ldte.raw64[0]);
211 /* No barrier should be needed between these two. */
212 write_atomic(ptr + 1, ldte.raw64[1]);
213
214 ret = 1;
215 }
216
217 return ret;
218 }
219
220 if ( valid || dte->v )
221 {
222 dte->tv = false;
223 dte->v = true;
224 smp_wmb();
225 }
226 dte->domain_id = domain_id;
227 dte->pt_root = paddr_to_pfn(root_ptr);
228 dte->iw = true;
229 dte->ir = true;
230 dte->paging_mode = paging_mode;
231 smp_wmb();
232 dte->tv = true;
233 dte->v = valid;
234
235 return 0;
236 }
237
amd_iommu_set_intremap_table(struct amd_iommu_dte * dte,const void * ptr,const struct amd_iommu * iommu,bool valid)238 void amd_iommu_set_intremap_table(
239 struct amd_iommu_dte *dte, const void *ptr,
240 const struct amd_iommu *iommu, bool valid)
241 {
242 if ( ptr )
243 {
244 dte->it_root = virt_to_maddr(ptr) >> 6;
245 dte->int_tab_len = amd_iommu_intremap_table_order(ptr, iommu);
246 dte->int_ctl = IOMMU_DEV_TABLE_INT_CONTROL_TRANSLATED;
247 }
248 else
249 {
250 dte->it_root = 0;
251 dte->int_tab_len = 0;
252 dte->int_ctl = IOMMU_DEV_TABLE_INT_CONTROL_ABORTED;
253 }
254
255 dte->ig = false; /* unmapped interrupts result in i/o page faults */
256 smp_wmb();
257 dte->iv = valid;
258 }
259
iommu_dte_add_device_entry(struct amd_iommu_dte * dte,const struct ivrs_mappings * ivrs_dev)260 void __init iommu_dte_add_device_entry(struct amd_iommu_dte *dte,
261 const struct ivrs_mappings *ivrs_dev)
262 {
263 uint8_t flags = ivrs_dev->device_flags;
264
265 *dte = (struct amd_iommu_dte){
266 .init_pass = flags & ACPI_IVHD_INIT_PASS,
267 .ext_int_pass = flags & ACPI_IVHD_EINT_PASS,
268 .nmi_pass = flags & ACPI_IVHD_NMI_PASS,
269 .lint0_pass = flags & ACPI_IVHD_LINT0_PASS,
270 .lint1_pass = flags & ACPI_IVHD_LINT1_PASS,
271 .ioctl = IOMMU_DEV_TABLE_IO_CONTROL_ABORTED,
272 .sys_mgt = MASK_EXTR(flags, ACPI_IVHD_SYSTEM_MGMT),
273 .ex = ivrs_dev->dte_allow_exclusion,
274 };
275 }
276
277 /* Walk io page tables and build level page tables if necessary
278 * {Re, un}mapping super page frames causes re-allocation of io
279 * page tables.
280 */
iommu_pde_from_dfn(struct domain * d,unsigned long dfn,unsigned int target,unsigned long * pt_mfn,unsigned int * flush_flags,bool map)281 static int iommu_pde_from_dfn(struct domain *d, unsigned long dfn,
282 unsigned int target, unsigned long *pt_mfn,
283 unsigned int *flush_flags, bool map)
284 {
285 union amd_iommu_pte *pde, *next_table_vaddr;
286 unsigned long next_table_mfn;
287 unsigned int level;
288 struct page_info *table;
289 struct domain_iommu *hd = dom_iommu(d);
290
291 table = hd->arch.amd.root_table;
292 level = hd->arch.amd.paging_mode;
293
294 if ( !table || target < 1 || level < target || level > 6 )
295 {
296 ASSERT_UNREACHABLE();
297 return 1;
298 }
299
300 /*
301 * A frame number past what the current page tables can represent can't
302 * possibly have a mapping.
303 */
304 if ( dfn >> (PTE_PER_TABLE_SHIFT * level) )
305 return 0;
306
307 next_table_mfn = mfn_x(page_to_mfn(table));
308
309 while ( level > target )
310 {
311 unsigned int next_level = level - 1;
312
313 next_table_vaddr = map_domain_page(_mfn(next_table_mfn));
314 pde = &next_table_vaddr[pfn_to_pde_idx(dfn, level)];
315
316 /* Here might be a super page frame */
317 next_table_mfn = pde->mfn;
318
319 /* Split super page frame into smaller pieces.*/
320 if ( pde->pr && !pde->next_level && next_table_mfn )
321 {
322 unsigned long mfn, pfn;
323
324 pfn = dfn & ~((1UL << (PTE_PER_TABLE_SHIFT * next_level)) - 1);
325 mfn = next_table_mfn;
326
327 /* allocate lower level page table */
328 table = iommu_alloc_pgtable(hd, IOMMU_PTE_CONTIG_MASK);
329 if ( table == NULL )
330 {
331 AMD_IOMMU_ERROR("cannot allocate I/O page table\n");
332 unmap_domain_page(next_table_vaddr);
333 return 1;
334 }
335
336 next_table_mfn = mfn_x(page_to_mfn(table));
337
338 set_iommu_ptes_present(next_table_mfn, pfn, mfn, PTE_PER_TABLE_SIZE,
339 next_level, pde->iw, pde->ir);
340 smp_wmb();
341 set_iommu_pde_present(pde, next_table_mfn, next_level, true,
342 true);
343 pt_update_contig_markers(&next_table_vaddr->raw,
344 pfn_to_pde_idx(dfn, level),
345 level, PTE_kind_table);
346
347 *flush_flags |= IOMMU_FLUSHF_modified;
348
349 perfc_incr(iommu_pt_shatters);
350 }
351
352 /* Install lower level page table for non-present entries */
353 else if ( !pde->pr )
354 {
355 if ( !map )
356 {
357 unmap_domain_page(next_table_vaddr);
358 return 0;
359 }
360
361 if ( next_table_mfn == 0 )
362 {
363 table = iommu_alloc_pgtable(hd, IOMMU_PTE_CONTIG_MASK);
364 if ( table == NULL )
365 {
366 AMD_IOMMU_ERROR("cannot allocate I/O page table\n");
367 unmap_domain_page(next_table_vaddr);
368 return 1;
369 }
370 next_table_mfn = mfn_x(page_to_mfn(table));
371 set_iommu_pde_present(pde, next_table_mfn, next_level, true,
372 true);
373 pt_update_contig_markers(&next_table_vaddr->raw,
374 pfn_to_pde_idx(dfn, level),
375 level, PTE_kind_table);
376 }
377 else /* should never reach here */
378 {
379 unmap_domain_page(next_table_vaddr);
380 return 1;
381 }
382 }
383
384 unmap_domain_page(next_table_vaddr);
385 level--;
386 }
387
388 /* mfn of target level page table */
389 *pt_mfn = next_table_mfn;
390 return 0;
391 }
392
queue_free_pt(struct domain_iommu * hd,mfn_t mfn,unsigned int level)393 static void queue_free_pt(struct domain_iommu *hd, mfn_t mfn, unsigned int level)
394 {
395 if ( level > 1 )
396 {
397 union amd_iommu_pte *pt = map_domain_page(mfn);
398 unsigned int i;
399
400 for ( i = 0; i < PTE_PER_TABLE_SIZE; ++i )
401 if ( pt[i].pr && pt[i].next_level )
402 {
403 ASSERT(pt[i].next_level < level);
404 queue_free_pt(hd, _mfn(pt[i].mfn), pt[i].next_level);
405 }
406
407 unmap_domain_page(pt);
408 }
409
410 iommu_queue_free_pgtable(hd, mfn_to_page(mfn));
411 }
412
amd_iommu_map_page(struct domain * d,dfn_t dfn,mfn_t mfn,unsigned int flags,unsigned int * flush_flags)413 int cf_check amd_iommu_map_page(
414 struct domain *d, dfn_t dfn, mfn_t mfn, unsigned int flags,
415 unsigned int *flush_flags)
416 {
417 struct domain_iommu *hd = dom_iommu(d);
418 unsigned int level = (IOMMUF_order(flags) / PTE_PER_TABLE_SHIFT) + 1;
419 bool contig;
420 int rc;
421 unsigned long pt_mfn = 0;
422 union amd_iommu_pte old;
423
424 ASSERT((hd->platform_ops->page_sizes >> IOMMUF_order(flags)) &
425 PAGE_SIZE_4K);
426
427 spin_lock(&hd->arch.mapping_lock);
428
429 /*
430 * IOMMU mapping request can be safely ignored when the domain is dying.
431 *
432 * hd->arch.mapping_lock guarantees that d->is_dying will be observed
433 * before any page tables are freed (see iommu_free_pgtables()).
434 */
435 if ( d->is_dying )
436 {
437 spin_unlock(&hd->arch.mapping_lock);
438 return 0;
439 }
440
441 rc = amd_iommu_alloc_root(d);
442 if ( rc )
443 {
444 spin_unlock(&hd->arch.mapping_lock);
445 AMD_IOMMU_ERROR("root table alloc failed, dfn = %"PRI_dfn"\n",
446 dfn_x(dfn));
447 domain_crash(d);
448 return rc;
449 }
450
451 if ( iommu_pde_from_dfn(d, dfn_x(dfn), level, &pt_mfn, flush_flags, true) ||
452 !pt_mfn )
453 {
454 spin_unlock(&hd->arch.mapping_lock);
455 AMD_IOMMU_ERROR("invalid IO pagetable entry dfn = %"PRI_dfn"\n",
456 dfn_x(dfn));
457 domain_crash(d);
458 return -EFAULT;
459 }
460
461 /* Install mapping */
462 old = set_iommu_pte_present(pt_mfn, dfn_x(dfn), mfn_x(mfn), level,
463 flags & IOMMUF_writable,
464 flags & IOMMUF_readable, &contig);
465
466 while ( unlikely(contig) && ++level < hd->arch.amd.paging_mode )
467 {
468 struct page_info *pg = mfn_to_page(_mfn(pt_mfn));
469 unsigned long next_mfn;
470
471 if ( iommu_pde_from_dfn(d, dfn_x(dfn), level, &pt_mfn, flush_flags,
472 false) )
473 BUG();
474 BUG_ON(!pt_mfn);
475
476 next_mfn = mfn_x(mfn) & (~0UL << (PTE_PER_TABLE_SHIFT * (level - 1)));
477 set_iommu_pte_present(pt_mfn, dfn_x(dfn), next_mfn, level,
478 flags & IOMMUF_writable,
479 flags & IOMMUF_readable, &contig);
480 *flush_flags |= IOMMU_FLUSHF_modified | IOMMU_FLUSHF_all;
481 iommu_queue_free_pgtable(hd, pg);
482 perfc_incr(iommu_pt_coalesces);
483 }
484
485 spin_unlock(&hd->arch.mapping_lock);
486
487 *flush_flags |= IOMMU_FLUSHF_added;
488 if ( old.pr )
489 {
490 *flush_flags |= IOMMU_FLUSHF_modified;
491
492 if ( IOMMUF_order(flags) && old.next_level )
493 queue_free_pt(hd, _mfn(old.mfn), old.next_level);
494 }
495
496 return 0;
497 }
498
amd_iommu_unmap_page(struct domain * d,dfn_t dfn,unsigned int order,unsigned int * flush_flags)499 int cf_check amd_iommu_unmap_page(
500 struct domain *d, dfn_t dfn, unsigned int order, unsigned int *flush_flags)
501 {
502 unsigned long pt_mfn = 0;
503 struct domain_iommu *hd = dom_iommu(d);
504 unsigned int level = (order / PTE_PER_TABLE_SHIFT) + 1;
505 union amd_iommu_pte old = {};
506
507 /*
508 * While really we could unmap at any granularity, for now we assume unmaps
509 * are issued by common code only at the same granularity as maps.
510 */
511 ASSERT((hd->platform_ops->page_sizes >> order) & PAGE_SIZE_4K);
512
513 spin_lock(&hd->arch.mapping_lock);
514
515 if ( !hd->arch.amd.root_table )
516 {
517 spin_unlock(&hd->arch.mapping_lock);
518 return 0;
519 }
520
521 if ( iommu_pde_from_dfn(d, dfn_x(dfn), level, &pt_mfn, flush_flags, false) )
522 {
523 spin_unlock(&hd->arch.mapping_lock);
524 AMD_IOMMU_ERROR("invalid IO pagetable entry dfn = %"PRI_dfn"\n",
525 dfn_x(dfn));
526 domain_crash(d);
527 return -EFAULT;
528 }
529
530 if ( pt_mfn )
531 {
532 bool free;
533
534 /* Mark PTE as 'page not present'. */
535 old = clear_iommu_pte_present(pt_mfn, dfn_x(dfn), level, &free);
536
537 while ( unlikely(free) && ++level < hd->arch.amd.paging_mode )
538 {
539 struct page_info *pg = mfn_to_page(_mfn(pt_mfn));
540
541 if ( iommu_pde_from_dfn(d, dfn_x(dfn), level, &pt_mfn,
542 flush_flags, false) )
543 BUG();
544 BUG_ON(!pt_mfn);
545
546 clear_iommu_pte_present(pt_mfn, dfn_x(dfn), level, &free);
547 *flush_flags |= IOMMU_FLUSHF_all;
548 iommu_queue_free_pgtable(hd, pg);
549 perfc_incr(iommu_pt_coalesces);
550 }
551 }
552
553 spin_unlock(&hd->arch.mapping_lock);
554
555 if ( old.pr )
556 {
557 *flush_flags |= IOMMU_FLUSHF_modified;
558
559 if ( order && old.next_level )
560 queue_free_pt(hd, _mfn(old.mfn), old.next_level);
561 }
562
563 return 0;
564 }
565
amd_iommu_print_entries(const struct amd_iommu * iommu,unsigned int dev_id,dfn_t dfn)566 void amd_iommu_print_entries(const struct amd_iommu *iommu, unsigned int dev_id,
567 dfn_t dfn)
568 {
569 mfn_t pt_mfn;
570 unsigned int level;
571 const struct amd_iommu_dte *dt = iommu->dev_table.buffer;
572
573 if ( !dt[dev_id].tv )
574 {
575 printk("%pp: no root\n", &PCI_SBDF(iommu->seg, dev_id));
576 return;
577 }
578
579 pt_mfn = _mfn(dt[dev_id].pt_root);
580 level = dt[dev_id].paging_mode;
581 printk("%pp root @ %"PRI_mfn" (%u levels) dfn=%"PRI_dfn"\n",
582 &PCI_SBDF(iommu->seg, dev_id), mfn_x(pt_mfn), level, dfn_x(dfn));
583
584 while ( level )
585 {
586 const union amd_iommu_pte *pt = map_domain_page(pt_mfn);
587 unsigned int idx = pfn_to_pde_idx(dfn_x(dfn), level);
588 union amd_iommu_pte pte = pt[idx];
589
590 unmap_domain_page(pt);
591
592 printk(" L%u[%03x] = %"PRIx64" %c%c\n", level, idx, pte.raw,
593 pte.pr ? pte.ir ? 'r' : '-' : 'n',
594 pte.pr ? pte.iw ? 'w' : '-' : 'p');
595
596 if ( !pte.pr )
597 break;
598
599 if ( pte.next_level >= level )
600 {
601 printk(" L%u[%03x]: next: %u\n", level, idx, pte.next_level);
602 break;
603 }
604
605 pt_mfn = _mfn(pte.mfn);
606 level = pte.next_level;
607 }
608 }
609
flush_count(unsigned long dfn,unsigned long page_count,unsigned int order)610 static unsigned long flush_count(unsigned long dfn, unsigned long page_count,
611 unsigned int order)
612 {
613 unsigned long start = dfn >> order;
614 unsigned long end = ((dfn + page_count - 1) >> order) + 1;
615
616 ASSERT(end > start);
617 return end - start;
618 }
619
amd_iommu_flush_iotlb_pages(struct domain * d,dfn_t dfn,unsigned long page_count,unsigned int flush_flags)620 int cf_check amd_iommu_flush_iotlb_pages(
621 struct domain *d, dfn_t dfn, unsigned long page_count,
622 unsigned int flush_flags)
623 {
624 unsigned long dfn_l = dfn_x(dfn);
625
626 if ( !(flush_flags & IOMMU_FLUSHF_all) )
627 {
628 ASSERT(page_count && !dfn_eq(dfn, INVALID_DFN));
629 ASSERT(flush_flags);
630 }
631
632 /* Unless a PTE was modified, no flush is required */
633 if ( !(flush_flags & IOMMU_FLUSHF_modified) )
634 return 0;
635
636 /* If so requested or if the range wraps then just flush everything. */
637 if ( (flush_flags & IOMMU_FLUSHF_all) || dfn_l + page_count < dfn_l )
638 {
639 amd_iommu_flush_all_pages(d);
640 return 0;
641 }
642
643 /*
644 * Flushes are expensive so find the minimal single flush that will
645 * cover the page range.
646 *
647 * NOTE: It is unnecessary to round down the DFN value to align with
648 * the flush order here. This is done by the internals of the
649 * flush code.
650 */
651 if ( page_count == 1 ) /* order 0 flush count */
652 amd_iommu_flush_pages(d, dfn_l, 0);
653 else if ( flush_count(dfn_l, page_count, 9) == 1 )
654 amd_iommu_flush_pages(d, dfn_l, 9);
655 else if ( flush_count(dfn_l, page_count, 18) == 1 )
656 amd_iommu_flush_pages(d, dfn_l, 18);
657 else
658 amd_iommu_flush_all_pages(d);
659
660 return 0;
661 }
662
amd_iommu_reserve_domain_unity_map(struct domain * d,const struct ivrs_unity_map * map,unsigned int flag)663 int amd_iommu_reserve_domain_unity_map(struct domain *d,
664 const struct ivrs_unity_map *map,
665 unsigned int flag)
666 {
667 int rc;
668
669 if ( d == dom_io )
670 return 0;
671
672 for ( rc = 0; !rc && map; map = map->next )
673 {
674 p2m_access_t p2ma = p2m_access_n;
675
676 if ( map->read )
677 p2ma |= p2m_access_r;
678 if ( map->write )
679 p2ma |= p2m_access_w;
680
681 rc = iommu_identity_mapping(d, p2ma, map->addr,
682 map->addr + map->length - 1, flag);
683 }
684
685 return rc;
686 }
687
amd_iommu_reserve_domain_unity_unmap(struct domain * d,const struct ivrs_unity_map * map)688 int amd_iommu_reserve_domain_unity_unmap(struct domain *d,
689 const struct ivrs_unity_map *map)
690 {
691 int rc;
692
693 if ( d == dom_io )
694 return 0;
695
696 for ( rc = 0; map; map = map->next )
697 {
698 int ret = iommu_identity_mapping(d, p2m_access_x, map->addr,
699 map->addr + map->length - 1, 0);
700
701 if ( ret && ret != -ENOENT && !rc )
702 rc = ret;
703 }
704
705 return rc;
706 }
707
amd_iommu_get_reserved_device_memory(iommu_grdm_t * func,void * ctxt)708 int cf_check amd_iommu_get_reserved_device_memory(
709 iommu_grdm_t *func, void *ctxt)
710 {
711 unsigned int seg = 0 /* XXX */, bdf;
712 const struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg);
713 /* At least for global entries, avoid reporting them multiple times. */
714 enum { pending, processing, done } global = pending;
715
716 for ( bdf = 0; bdf < ivrs_bdf_entries; ++bdf )
717 {
718 pci_sbdf_t sbdf = PCI_SBDF(seg, bdf);
719 const struct ivrs_unity_map *um = ivrs_mappings[bdf].unity_map;
720 unsigned int req = ivrs_mappings[bdf].dte_requestor_id;
721 const struct amd_iommu *iommu = ivrs_mappings[bdf].iommu;
722 int rc;
723
724 if ( !iommu )
725 {
726 /* May need to trigger the workaround in find_iommu_for_device(). */
727 const struct pci_dev *pdev;
728
729 pcidevs_lock();
730 pdev = pci_get_pdev(NULL, sbdf);
731 pcidevs_unlock();
732
733 if ( pdev )
734 iommu = find_iommu_for_device(seg, bdf);
735 if ( !iommu )
736 continue;
737 }
738
739 if ( func(0, 0, sbdf.sbdf, ctxt) )
740 {
741 /*
742 * When the caller processes a XENMEM_RDM_ALL request, don't report
743 * multiple times the same range(s) for perhaps many devices with
744 * the same alias ID.
745 */
746 if ( bdf != req && ivrs_mappings[req].iommu &&
747 func(0, 0, PCI_SBDF(seg, req).sbdf, ctxt) )
748 continue;
749
750 if ( global == pending )
751 global = processing;
752 }
753
754 if ( iommu->exclusion_enable &&
755 (iommu->exclusion_allow_all ?
756 global == processing :
757 ivrs_mappings[bdf].dte_allow_exclusion) )
758 {
759 rc = func(PFN_DOWN(iommu->exclusion_base),
760 PFN_UP(iommu->exclusion_limit | 1) -
761 PFN_DOWN(iommu->exclusion_base), sbdf.sbdf, ctxt);
762 if ( unlikely(rc < 0) )
763 return rc;
764 }
765
766 for ( ; um; um = um->next )
767 {
768 if ( um->global && global != processing )
769 continue;
770
771 rc = func(PFN_DOWN(um->addr), PFN_DOWN(um->length),
772 sbdf.sbdf, ctxt);
773 if ( unlikely(rc < 0) )
774 return rc;
775 }
776
777 if ( global == processing )
778 global = done;
779 }
780
781 return 0;
782 }
783
fill_qpt(union amd_iommu_pte * this,unsigned int level,struct page_info * pgs[IOMMU_MAX_PT_LEVELS])784 static int fill_qpt(union amd_iommu_pte *this, unsigned int level,
785 struct page_info *pgs[IOMMU_MAX_PT_LEVELS])
786 {
787 struct domain_iommu *hd = dom_iommu(dom_io);
788 unsigned int i;
789 int rc = 0;
790
791 for ( i = 0; !rc && i < PTE_PER_TABLE_SIZE; ++i )
792 {
793 union amd_iommu_pte *pte = &this[i], *next;
794
795 if ( !pte->pr )
796 {
797 if ( !pgs[level] )
798 {
799 /*
800 * The pgtable allocator is fine for the leaf page, as well as
801 * page table pages, and the resulting allocations are always
802 * zeroed.
803 */
804 pgs[level] = iommu_alloc_pgtable(hd, 0);
805 if ( !pgs[level] )
806 {
807 rc = -ENOMEM;
808 break;
809 }
810
811 if ( level )
812 {
813 next = __map_domain_page(pgs[level]);
814 rc = fill_qpt(next, level - 1, pgs);
815 unmap_domain_page(next);
816 }
817 }
818
819 /*
820 * PDEs are essentially a subset of PTEs, so this function
821 * is fine to use even at the leaf.
822 */
823 set_iommu_pde_present(pte, mfn_x(page_to_mfn(pgs[level])), level,
824 true, true);
825 }
826 else if ( level && pte->next_level )
827 {
828 next = map_domain_page(_mfn(pte->mfn));
829 rc = fill_qpt(next, level - 1, pgs);
830 unmap_domain_page(next);
831 }
832 }
833
834 return rc;
835 }
836
amd_iommu_quarantine_init(struct pci_dev * pdev,bool scratch_page)837 int cf_check amd_iommu_quarantine_init(struct pci_dev *pdev, bool scratch_page)
838 {
839 struct domain_iommu *hd = dom_iommu(dom_io);
840 unsigned int level = hd->arch.amd.paging_mode;
841 unsigned int req_id = get_dma_requestor_id(pdev->seg, pdev->sbdf.bdf);
842 const struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(pdev->seg);
843 int rc;
844
845 ASSERT(pcidevs_locked());
846 ASSERT(!hd->arch.amd.root_table);
847 ASSERT(page_list_empty(&hd->arch.pgtables.list));
848
849 if ( !scratch_page && !ivrs_mappings[req_id].unity_map )
850 return 0;
851
852 ASSERT(pdev->arch.pseudo_domid != DOMID_INVALID);
853
854 if ( pdev->arch.amd.root_table )
855 {
856 clear_domain_page(pdev->arch.leaf_mfn);
857 return 0;
858 }
859
860 pdev->arch.amd.root_table = iommu_alloc_pgtable(hd, 0);
861 if ( !pdev->arch.amd.root_table )
862 return -ENOMEM;
863
864 /* Transiently install the root into DomIO, for iommu_identity_mapping(). */
865 hd->arch.amd.root_table = pdev->arch.amd.root_table;
866
867 rc = amd_iommu_reserve_domain_unity_map(dom_io,
868 ivrs_mappings[req_id].unity_map,
869 0);
870
871 iommu_identity_map_teardown(dom_io);
872 hd->arch.amd.root_table = NULL;
873
874 if ( rc )
875 AMD_IOMMU_WARN("%pp: quarantine unity mapping failed\n", &pdev->sbdf);
876 else if ( scratch_page )
877 {
878 union amd_iommu_pte *root;
879 struct page_info *pgs[IOMMU_MAX_PT_LEVELS] = {};
880
881 root = __map_domain_page(pdev->arch.amd.root_table);
882 rc = fill_qpt(root, level - 1, pgs);
883 unmap_domain_page(root);
884
885 pdev->arch.leaf_mfn = page_to_mfn(pgs[0]);
886 }
887
888 page_list_move(&pdev->arch.pgtables_list, &hd->arch.pgtables.list);
889
890 if ( rc )
891 amd_iommu_quarantine_teardown(pdev);
892
893 return rc;
894 }
895
amd_iommu_quarantine_teardown(struct pci_dev * pdev)896 void amd_iommu_quarantine_teardown(struct pci_dev *pdev)
897 {
898 struct domain_iommu *hd = dom_iommu(dom_io);
899
900 ASSERT(pcidevs_locked());
901
902 if ( !pdev->arch.amd.root_table )
903 return;
904
905 ASSERT(page_list_empty(&hd->arch.pgtables.list));
906 page_list_move(&hd->arch.pgtables.list, &pdev->arch.pgtables_list);
907 while ( iommu_free_pgtables(dom_io) == -ERESTART )
908 /* nothing */;
909 pdev->arch.amd.root_table = NULL;
910 }
911
912 /*
913 * Local variables:
914 * mode: C
915 * c-file-style: "BSD"
916 * c-basic-offset: 4
917 * tab-width: 4
918 * indent-tabs-mode: nil
919 * End:
920 */
921