1 /*
2  * Copyright (C) 2007 Advanced Micro Devices, Inc.
3  * Author: Leo Duran <leo.duran@amd.com>
4  * Author: Wei Wang <wei.wang2@amd.com> - adapted to xen
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include <xen/acpi.h>
21 
22 #include "iommu.h"
23 
24 #define CONTIG_MASK IOMMU_PTE_CONTIG_MASK
25 #include <asm/pt-contig-markers.h>
26 
27 /* Given pfn and page table level, return pde index */
pfn_to_pde_idx(unsigned long pfn,unsigned int level)28 static unsigned int pfn_to_pde_idx(unsigned long pfn, unsigned int level)
29 {
30     unsigned int idx;
31 
32     idx = pfn >> (PTE_PER_TABLE_SHIFT * (--level));
33     idx &= ~PTE_PER_TABLE_MASK;
34     return idx;
35 }
36 
clear_iommu_pte_present(unsigned long l1_mfn,unsigned long dfn,unsigned int level,bool * free)37 static union amd_iommu_pte clear_iommu_pte_present(unsigned long l1_mfn,
38                                                    unsigned long dfn,
39                                                    unsigned int level,
40                                                    bool *free)
41 {
42     union amd_iommu_pte *table, *pte, old;
43     unsigned int idx = pfn_to_pde_idx(dfn, level);
44 
45     table = map_domain_page(_mfn(l1_mfn));
46     pte = &table[idx];
47     old = *pte;
48 
49     write_atomic(&pte->raw, 0);
50 
51     *free = pt_update_contig_markers(&table->raw, idx, level, PTE_kind_null);
52 
53     unmap_domain_page(table);
54 
55     return old;
56 }
57 
set_iommu_pde_present(union amd_iommu_pte * pte,unsigned long next_mfn,unsigned int next_level,bool iw,bool ir)58 static void set_iommu_pde_present(union amd_iommu_pte *pte,
59                                   unsigned long next_mfn,
60                                   unsigned int next_level,
61                                   bool iw, bool ir)
62 {
63     union amd_iommu_pte new = {};
64 
65     /*
66      * FC bit should be enabled in PTE, this helps to solve potential
67      * issues with ATS devices
68      */
69     new.fc = !next_level;
70 
71     new.mfn = next_mfn;
72     new.iw = iw;
73     new.ir = ir;
74     new.next_level = next_level;
75     new.pr = true;
76 
77     write_atomic(&pte->raw, new.raw);
78 }
79 
set_iommu_pte_present(unsigned long pt_mfn,unsigned long dfn,unsigned long next_mfn,unsigned int level,bool iw,bool ir,bool * contig)80 static union amd_iommu_pte set_iommu_pte_present(unsigned long pt_mfn,
81                                                  unsigned long dfn,
82                                                  unsigned long next_mfn,
83                                                  unsigned int level,
84                                                  bool iw, bool ir,
85                                                  bool *contig)
86 {
87     union amd_iommu_pte *table, *pde, old;
88 
89     table = map_domain_page(_mfn(pt_mfn));
90     pde = &table[pfn_to_pde_idx(dfn, level)];
91 
92     old = *pde;
93     if ( !old.pr || old.next_level ||
94          old.mfn != next_mfn ||
95          old.iw != iw || old.ir != ir )
96     {
97         set_iommu_pde_present(pde, next_mfn, 0, iw, ir);
98         *contig = pt_update_contig_markers(&table->raw,
99                                            pfn_to_pde_idx(dfn, level),
100                                            level, PTE_kind_leaf);
101     }
102     else
103     {
104         old.pr = false; /* signal "no change" to the caller */
105         *contig = false;
106     }
107 
108     unmap_domain_page(table);
109 
110     return old;
111 }
112 
set_iommu_ptes_present(unsigned long pt_mfn,unsigned long dfn,unsigned long next_mfn,unsigned int nr_ptes,unsigned int pde_level,bool iw,bool ir)113 static void set_iommu_ptes_present(unsigned long pt_mfn,
114                                    unsigned long dfn,
115                                    unsigned long next_mfn,
116                                    unsigned int nr_ptes,
117                                    unsigned int pde_level,
118                                    bool iw, bool ir)
119 {
120     union amd_iommu_pte *table, *pde;
121     unsigned long page_sz = 1UL << (PTE_PER_TABLE_SHIFT * (pde_level - 1));
122 
123     table = map_domain_page(_mfn(pt_mfn));
124     pde = &table[pfn_to_pde_idx(dfn, pde_level)];
125 
126     if ( (void *)(pde + nr_ptes) > (void *)table + PAGE_SIZE )
127     {
128         ASSERT_UNREACHABLE();
129         return;
130     }
131 
132     ASSERT(!(next_mfn & (page_sz - 1)));
133 
134     while ( nr_ptes-- )
135     {
136         ASSERT(!pde->next_level);
137         ASSERT(!pde->u);
138 
139         if ( pde > table )
140             ASSERT(pde->ign0 == ffs(pde - table) - 1);
141         else
142             ASSERT(pde->ign0 == CONTIG_LEVEL_SHIFT);
143 
144         pde->iw = iw;
145         pde->ir = ir;
146         pde->fc = true; /* See set_iommu_pde_present(). */
147         pde->mfn = next_mfn;
148         pde->pr = true;
149 
150         ++pde;
151         next_mfn += page_sz;
152     }
153 
154     unmap_domain_page(table);
155 }
156 
157 /*
158  * This function returns
159  * - -errno for errors,
160  * - 0 for a successful update, atomic when necessary
161  * - 1 for a successful but non-atomic update, which may need to be warned
162  *   about by the caller.
163  */
amd_iommu_set_root_page_table(struct amd_iommu_dte * dte,uint64_t root_ptr,uint16_t domain_id,uint8_t paging_mode,unsigned int flags)164 int amd_iommu_set_root_page_table(struct amd_iommu_dte *dte,
165                                   uint64_t root_ptr, uint16_t domain_id,
166                                   uint8_t paging_mode, unsigned int flags)
167 {
168     bool valid = flags & SET_ROOT_VALID;
169 
170     if ( dte->v && dte->tv &&
171          (cpu_has_cx16 || (flags & SET_ROOT_WITH_UNITY_MAP)) )
172     {
173         union {
174             struct amd_iommu_dte dte;
175             uint64_t raw64[4];
176             __uint128_t raw128[2];
177         } ldte = { .dte = *dte };
178         __uint128_t old = ldte.raw128[0];
179         int ret = 0;
180 
181         ldte.dte.domain_id = domain_id;
182         ldte.dte.pt_root = paddr_to_pfn(root_ptr);
183         ldte.dte.iw = true;
184         ldte.dte.ir = true;
185         ldte.dte.paging_mode = paging_mode;
186         ldte.dte.v = valid;
187 
188         if ( cpu_has_cx16 )
189         {
190             __uint128_t res = cmpxchg16b(dte, &old, &ldte.raw128[0]);
191 
192             /*
193              * Hardware does not update the DTE behind our backs, so the
194              * return value should match "old".
195              */
196             if ( res != old )
197             {
198                 printk(XENLOG_ERR
199                        "Dom%d: unexpected DTE %016lx_%016lx (expected %016lx_%016lx)\n",
200                        domain_id,
201                        (uint64_t)(res >> 64), (uint64_t)res,
202                        (uint64_t)(old >> 64), (uint64_t)old);
203                 ret = -EILSEQ;
204             }
205         }
206         else /* Best effort, updating domain_id last. */
207         {
208             uint64_t *ptr = (void *)dte;
209 
210             write_atomic(ptr + 0, ldte.raw64[0]);
211             /* No barrier should be needed between these two. */
212             write_atomic(ptr + 1, ldte.raw64[1]);
213 
214             ret = 1;
215         }
216 
217         return ret;
218     }
219 
220     if ( valid || dte->v )
221     {
222         dte->tv = false;
223         dte->v = true;
224         smp_wmb();
225     }
226     dte->domain_id = domain_id;
227     dte->pt_root = paddr_to_pfn(root_ptr);
228     dte->iw = true;
229     dte->ir = true;
230     dte->paging_mode = paging_mode;
231     smp_wmb();
232     dte->tv = true;
233     dte->v = valid;
234 
235     return 0;
236 }
237 
amd_iommu_set_intremap_table(struct amd_iommu_dte * dte,const void * ptr,const struct amd_iommu * iommu,bool valid)238 void amd_iommu_set_intremap_table(
239     struct amd_iommu_dte *dte, const void *ptr,
240     const struct amd_iommu *iommu, bool valid)
241 {
242     if ( ptr )
243     {
244         dte->it_root = virt_to_maddr(ptr) >> 6;
245         dte->int_tab_len = amd_iommu_intremap_table_order(ptr, iommu);
246         dte->int_ctl = IOMMU_DEV_TABLE_INT_CONTROL_TRANSLATED;
247     }
248     else
249     {
250         dte->it_root = 0;
251         dte->int_tab_len = 0;
252         dte->int_ctl = IOMMU_DEV_TABLE_INT_CONTROL_ABORTED;
253     }
254 
255     dte->ig = false; /* unmapped interrupts result in i/o page faults */
256     smp_wmb();
257     dte->iv = valid;
258 }
259 
iommu_dte_add_device_entry(struct amd_iommu_dte * dte,const struct ivrs_mappings * ivrs_dev)260 void __init iommu_dte_add_device_entry(struct amd_iommu_dte *dte,
261                                        const struct ivrs_mappings *ivrs_dev)
262 {
263     uint8_t flags = ivrs_dev->device_flags;
264 
265     *dte = (struct amd_iommu_dte){
266         .init_pass = flags & ACPI_IVHD_INIT_PASS,
267         .ext_int_pass = flags & ACPI_IVHD_EINT_PASS,
268         .nmi_pass = flags & ACPI_IVHD_NMI_PASS,
269         .lint0_pass = flags & ACPI_IVHD_LINT0_PASS,
270         .lint1_pass = flags & ACPI_IVHD_LINT1_PASS,
271         .ioctl = IOMMU_DEV_TABLE_IO_CONTROL_ABORTED,
272         .sys_mgt = MASK_EXTR(flags, ACPI_IVHD_SYSTEM_MGMT),
273         .ex = ivrs_dev->dte_allow_exclusion,
274     };
275 }
276 
277 /* Walk io page tables and build level page tables if necessary
278  * {Re, un}mapping super page frames causes re-allocation of io
279  * page tables.
280  */
iommu_pde_from_dfn(struct domain * d,unsigned long dfn,unsigned int target,unsigned long * pt_mfn,unsigned int * flush_flags,bool map)281 static int iommu_pde_from_dfn(struct domain *d, unsigned long dfn,
282                               unsigned int target, unsigned long *pt_mfn,
283                               unsigned int *flush_flags, bool map)
284 {
285     union amd_iommu_pte *pde, *next_table_vaddr;
286     unsigned long  next_table_mfn;
287     unsigned int level;
288     struct page_info *table;
289     struct domain_iommu *hd = dom_iommu(d);
290 
291     table = hd->arch.amd.root_table;
292     level = hd->arch.amd.paging_mode;
293 
294     if ( !table || target < 1 || level < target || level > 6 )
295     {
296         ASSERT_UNREACHABLE();
297         return 1;
298     }
299 
300     /*
301      * A frame number past what the current page tables can represent can't
302      * possibly have a mapping.
303      */
304     if ( dfn >> (PTE_PER_TABLE_SHIFT * level) )
305         return 0;
306 
307     next_table_mfn = mfn_x(page_to_mfn(table));
308 
309     while ( level > target )
310     {
311         unsigned int next_level = level - 1;
312 
313         next_table_vaddr = map_domain_page(_mfn(next_table_mfn));
314         pde = &next_table_vaddr[pfn_to_pde_idx(dfn, level)];
315 
316         /* Here might be a super page frame */
317         next_table_mfn = pde->mfn;
318 
319         /* Split super page frame into smaller pieces.*/
320         if ( pde->pr && !pde->next_level && next_table_mfn )
321         {
322             unsigned long mfn, pfn;
323 
324             pfn = dfn & ~((1UL << (PTE_PER_TABLE_SHIFT * next_level)) - 1);
325             mfn = next_table_mfn;
326 
327             /* allocate lower level page table */
328             table = iommu_alloc_pgtable(hd, IOMMU_PTE_CONTIG_MASK);
329             if ( table == NULL )
330             {
331                 AMD_IOMMU_ERROR("cannot allocate I/O page table\n");
332                 unmap_domain_page(next_table_vaddr);
333                 return 1;
334             }
335 
336             next_table_mfn = mfn_x(page_to_mfn(table));
337 
338             set_iommu_ptes_present(next_table_mfn, pfn, mfn, PTE_PER_TABLE_SIZE,
339                                    next_level, pde->iw, pde->ir);
340             smp_wmb();
341             set_iommu_pde_present(pde, next_table_mfn, next_level, true,
342                                   true);
343             pt_update_contig_markers(&next_table_vaddr->raw,
344                                      pfn_to_pde_idx(dfn, level),
345                                      level, PTE_kind_table);
346 
347             *flush_flags |= IOMMU_FLUSHF_modified;
348 
349             perfc_incr(iommu_pt_shatters);
350         }
351 
352         /* Install lower level page table for non-present entries */
353         else if ( !pde->pr )
354         {
355             if ( !map )
356             {
357                 unmap_domain_page(next_table_vaddr);
358                 return 0;
359             }
360 
361             if ( next_table_mfn == 0 )
362             {
363                 table = iommu_alloc_pgtable(hd, IOMMU_PTE_CONTIG_MASK);
364                 if ( table == NULL )
365                 {
366                     AMD_IOMMU_ERROR("cannot allocate I/O page table\n");
367                     unmap_domain_page(next_table_vaddr);
368                     return 1;
369                 }
370                 next_table_mfn = mfn_x(page_to_mfn(table));
371                 set_iommu_pde_present(pde, next_table_mfn, next_level, true,
372                                       true);
373                 pt_update_contig_markers(&next_table_vaddr->raw,
374                                          pfn_to_pde_idx(dfn, level),
375                                          level, PTE_kind_table);
376             }
377             else /* should never reach here */
378             {
379                 unmap_domain_page(next_table_vaddr);
380                 return 1;
381             }
382         }
383 
384         unmap_domain_page(next_table_vaddr);
385         level--;
386     }
387 
388     /* mfn of target level page table */
389     *pt_mfn = next_table_mfn;
390     return 0;
391 }
392 
queue_free_pt(struct domain_iommu * hd,mfn_t mfn,unsigned int level)393 static void queue_free_pt(struct domain_iommu *hd, mfn_t mfn, unsigned int level)
394 {
395     if ( level > 1 )
396     {
397         union amd_iommu_pte *pt = map_domain_page(mfn);
398         unsigned int i;
399 
400         for ( i = 0; i < PTE_PER_TABLE_SIZE; ++i )
401             if ( pt[i].pr && pt[i].next_level )
402             {
403                 ASSERT(pt[i].next_level < level);
404                 queue_free_pt(hd, _mfn(pt[i].mfn), pt[i].next_level);
405             }
406 
407         unmap_domain_page(pt);
408     }
409 
410     iommu_queue_free_pgtable(hd, mfn_to_page(mfn));
411 }
412 
amd_iommu_map_page(struct domain * d,dfn_t dfn,mfn_t mfn,unsigned int flags,unsigned int * flush_flags)413 int cf_check amd_iommu_map_page(
414     struct domain *d, dfn_t dfn, mfn_t mfn, unsigned int flags,
415     unsigned int *flush_flags)
416 {
417     struct domain_iommu *hd = dom_iommu(d);
418     unsigned int level = (IOMMUF_order(flags) / PTE_PER_TABLE_SHIFT) + 1;
419     bool contig;
420     int rc;
421     unsigned long pt_mfn = 0;
422     union amd_iommu_pte old;
423 
424     ASSERT((hd->platform_ops->page_sizes >> IOMMUF_order(flags)) &
425            PAGE_SIZE_4K);
426 
427     spin_lock(&hd->arch.mapping_lock);
428 
429     /*
430      * IOMMU mapping request can be safely ignored when the domain is dying.
431      *
432      * hd->arch.mapping_lock guarantees that d->is_dying will be observed
433      * before any page tables are freed (see iommu_free_pgtables()).
434      */
435     if ( d->is_dying )
436     {
437         spin_unlock(&hd->arch.mapping_lock);
438         return 0;
439     }
440 
441     rc = amd_iommu_alloc_root(d);
442     if ( rc )
443     {
444         spin_unlock(&hd->arch.mapping_lock);
445         AMD_IOMMU_ERROR("root table alloc failed, dfn = %"PRI_dfn"\n",
446                         dfn_x(dfn));
447         domain_crash(d);
448         return rc;
449     }
450 
451     if ( iommu_pde_from_dfn(d, dfn_x(dfn), level, &pt_mfn, flush_flags, true) ||
452          !pt_mfn )
453     {
454         spin_unlock(&hd->arch.mapping_lock);
455         AMD_IOMMU_ERROR("invalid IO pagetable entry dfn = %"PRI_dfn"\n",
456                         dfn_x(dfn));
457         domain_crash(d);
458         return -EFAULT;
459     }
460 
461     /* Install mapping */
462     old = set_iommu_pte_present(pt_mfn, dfn_x(dfn), mfn_x(mfn), level,
463                                 flags & IOMMUF_writable,
464                                 flags & IOMMUF_readable, &contig);
465 
466     while ( unlikely(contig) && ++level < hd->arch.amd.paging_mode )
467     {
468         struct page_info *pg = mfn_to_page(_mfn(pt_mfn));
469         unsigned long next_mfn;
470 
471         if ( iommu_pde_from_dfn(d, dfn_x(dfn), level, &pt_mfn, flush_flags,
472                                 false) )
473             BUG();
474         BUG_ON(!pt_mfn);
475 
476         next_mfn = mfn_x(mfn) & (~0UL << (PTE_PER_TABLE_SHIFT * (level - 1)));
477         set_iommu_pte_present(pt_mfn, dfn_x(dfn), next_mfn, level,
478                               flags & IOMMUF_writable,
479                               flags & IOMMUF_readable, &contig);
480         *flush_flags |= IOMMU_FLUSHF_modified | IOMMU_FLUSHF_all;
481         iommu_queue_free_pgtable(hd, pg);
482         perfc_incr(iommu_pt_coalesces);
483     }
484 
485     spin_unlock(&hd->arch.mapping_lock);
486 
487     *flush_flags |= IOMMU_FLUSHF_added;
488     if ( old.pr )
489     {
490         *flush_flags |= IOMMU_FLUSHF_modified;
491 
492         if ( IOMMUF_order(flags) && old.next_level )
493             queue_free_pt(hd, _mfn(old.mfn), old.next_level);
494     }
495 
496     return 0;
497 }
498 
amd_iommu_unmap_page(struct domain * d,dfn_t dfn,unsigned int order,unsigned int * flush_flags)499 int cf_check amd_iommu_unmap_page(
500     struct domain *d, dfn_t dfn, unsigned int order, unsigned int *flush_flags)
501 {
502     unsigned long pt_mfn = 0;
503     struct domain_iommu *hd = dom_iommu(d);
504     unsigned int level = (order / PTE_PER_TABLE_SHIFT) + 1;
505     union amd_iommu_pte old = {};
506 
507     /*
508      * While really we could unmap at any granularity, for now we assume unmaps
509      * are issued by common code only at the same granularity as maps.
510      */
511     ASSERT((hd->platform_ops->page_sizes >> order) & PAGE_SIZE_4K);
512 
513     spin_lock(&hd->arch.mapping_lock);
514 
515     if ( !hd->arch.amd.root_table )
516     {
517         spin_unlock(&hd->arch.mapping_lock);
518         return 0;
519     }
520 
521     if ( iommu_pde_from_dfn(d, dfn_x(dfn), level, &pt_mfn, flush_flags, false) )
522     {
523         spin_unlock(&hd->arch.mapping_lock);
524         AMD_IOMMU_ERROR("invalid IO pagetable entry dfn = %"PRI_dfn"\n",
525                         dfn_x(dfn));
526         domain_crash(d);
527         return -EFAULT;
528     }
529 
530     if ( pt_mfn )
531     {
532         bool free;
533 
534         /* Mark PTE as 'page not present'. */
535         old = clear_iommu_pte_present(pt_mfn, dfn_x(dfn), level, &free);
536 
537         while ( unlikely(free) && ++level < hd->arch.amd.paging_mode )
538         {
539             struct page_info *pg = mfn_to_page(_mfn(pt_mfn));
540 
541             if ( iommu_pde_from_dfn(d, dfn_x(dfn), level, &pt_mfn,
542                                     flush_flags, false) )
543                 BUG();
544             BUG_ON(!pt_mfn);
545 
546             clear_iommu_pte_present(pt_mfn, dfn_x(dfn), level, &free);
547             *flush_flags |= IOMMU_FLUSHF_all;
548             iommu_queue_free_pgtable(hd, pg);
549             perfc_incr(iommu_pt_coalesces);
550         }
551     }
552 
553     spin_unlock(&hd->arch.mapping_lock);
554 
555     if ( old.pr )
556     {
557         *flush_flags |= IOMMU_FLUSHF_modified;
558 
559         if ( order && old.next_level )
560             queue_free_pt(hd, _mfn(old.mfn), old.next_level);
561     }
562 
563     return 0;
564 }
565 
amd_iommu_print_entries(const struct amd_iommu * iommu,unsigned int dev_id,dfn_t dfn)566 void amd_iommu_print_entries(const struct amd_iommu *iommu, unsigned int dev_id,
567                              dfn_t dfn)
568 {
569     mfn_t pt_mfn;
570     unsigned int level;
571     const struct amd_iommu_dte *dt = iommu->dev_table.buffer;
572 
573     if ( !dt[dev_id].tv )
574     {
575         printk("%pp: no root\n", &PCI_SBDF(iommu->seg, dev_id));
576         return;
577     }
578 
579     pt_mfn = _mfn(dt[dev_id].pt_root);
580     level = dt[dev_id].paging_mode;
581     printk("%pp root @ %"PRI_mfn" (%u levels) dfn=%"PRI_dfn"\n",
582            &PCI_SBDF(iommu->seg, dev_id), mfn_x(pt_mfn), level, dfn_x(dfn));
583 
584     while ( level )
585     {
586         const union amd_iommu_pte *pt = map_domain_page(pt_mfn);
587         unsigned int idx = pfn_to_pde_idx(dfn_x(dfn), level);
588         union amd_iommu_pte pte = pt[idx];
589 
590         unmap_domain_page(pt);
591 
592         printk("  L%u[%03x] = %"PRIx64" %c%c\n", level, idx, pte.raw,
593                pte.pr ? pte.ir ? 'r' : '-' : 'n',
594                pte.pr ? pte.iw ? 'w' : '-' : 'p');
595 
596         if ( !pte.pr )
597             break;
598 
599         if ( pte.next_level >= level )
600         {
601             printk("  L%u[%03x]: next: %u\n", level, idx, pte.next_level);
602             break;
603         }
604 
605         pt_mfn = _mfn(pte.mfn);
606         level = pte.next_level;
607     }
608 }
609 
flush_count(unsigned long dfn,unsigned long page_count,unsigned int order)610 static unsigned long flush_count(unsigned long dfn, unsigned long page_count,
611                                  unsigned int order)
612 {
613     unsigned long start = dfn >> order;
614     unsigned long end = ((dfn + page_count - 1) >> order) + 1;
615 
616     ASSERT(end > start);
617     return end - start;
618 }
619 
amd_iommu_flush_iotlb_pages(struct domain * d,dfn_t dfn,unsigned long page_count,unsigned int flush_flags)620 int cf_check amd_iommu_flush_iotlb_pages(
621     struct domain *d, dfn_t dfn, unsigned long page_count,
622     unsigned int flush_flags)
623 {
624     unsigned long dfn_l = dfn_x(dfn);
625 
626     if ( !(flush_flags & IOMMU_FLUSHF_all) )
627     {
628         ASSERT(page_count && !dfn_eq(dfn, INVALID_DFN));
629         ASSERT(flush_flags);
630     }
631 
632     /* Unless a PTE was modified, no flush is required */
633     if ( !(flush_flags & IOMMU_FLUSHF_modified) )
634         return 0;
635 
636     /* If so requested or if the range wraps then just flush everything. */
637     if ( (flush_flags & IOMMU_FLUSHF_all) || dfn_l + page_count < dfn_l )
638     {
639         amd_iommu_flush_all_pages(d);
640         return 0;
641     }
642 
643     /*
644      * Flushes are expensive so find the minimal single flush that will
645      * cover the page range.
646      *
647      * NOTE: It is unnecessary to round down the DFN value to align with
648      *       the flush order here. This is done by the internals of the
649      *       flush code.
650      */
651     if ( page_count == 1 ) /* order 0 flush count */
652         amd_iommu_flush_pages(d, dfn_l, 0);
653     else if ( flush_count(dfn_l, page_count, 9) == 1 )
654         amd_iommu_flush_pages(d, dfn_l, 9);
655     else if ( flush_count(dfn_l, page_count, 18) == 1 )
656         amd_iommu_flush_pages(d, dfn_l, 18);
657     else
658         amd_iommu_flush_all_pages(d);
659 
660     return 0;
661 }
662 
amd_iommu_reserve_domain_unity_map(struct domain * d,const struct ivrs_unity_map * map,unsigned int flag)663 int amd_iommu_reserve_domain_unity_map(struct domain *d,
664                                        const struct ivrs_unity_map *map,
665                                        unsigned int flag)
666 {
667     int rc;
668 
669     if ( d == dom_io )
670         return 0;
671 
672     for ( rc = 0; !rc && map; map = map->next )
673     {
674         p2m_access_t p2ma = p2m_access_n;
675 
676         if ( map->read )
677             p2ma |= p2m_access_r;
678         if ( map->write )
679             p2ma |= p2m_access_w;
680 
681         rc = iommu_identity_mapping(d, p2ma, map->addr,
682                                     map->addr + map->length - 1, flag);
683     }
684 
685     return rc;
686 }
687 
amd_iommu_reserve_domain_unity_unmap(struct domain * d,const struct ivrs_unity_map * map)688 int amd_iommu_reserve_domain_unity_unmap(struct domain *d,
689                                          const struct ivrs_unity_map *map)
690 {
691     int rc;
692 
693     if ( d == dom_io )
694         return 0;
695 
696     for ( rc = 0; map; map = map->next )
697     {
698         int ret = iommu_identity_mapping(d, p2m_access_x, map->addr,
699                                          map->addr + map->length - 1, 0);
700 
701         if ( ret && ret != -ENOENT && !rc )
702             rc = ret;
703     }
704 
705     return rc;
706 }
707 
amd_iommu_get_reserved_device_memory(iommu_grdm_t * func,void * ctxt)708 int cf_check amd_iommu_get_reserved_device_memory(
709     iommu_grdm_t *func, void *ctxt)
710 {
711     unsigned int seg = 0 /* XXX */, bdf;
712     const struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(seg);
713     /* At least for global entries, avoid reporting them multiple times. */
714     enum { pending, processing, done } global = pending;
715 
716     for ( bdf = 0; bdf < ivrs_bdf_entries; ++bdf )
717     {
718         pci_sbdf_t sbdf = PCI_SBDF(seg, bdf);
719         const struct ivrs_unity_map *um = ivrs_mappings[bdf].unity_map;
720         unsigned int req = ivrs_mappings[bdf].dte_requestor_id;
721         const struct amd_iommu *iommu = ivrs_mappings[bdf].iommu;
722         int rc;
723 
724         if ( !iommu )
725         {
726             /* May need to trigger the workaround in find_iommu_for_device(). */
727             const struct pci_dev *pdev;
728 
729             pcidevs_lock();
730             pdev = pci_get_pdev(NULL, sbdf);
731             pcidevs_unlock();
732 
733             if ( pdev )
734                 iommu = find_iommu_for_device(seg, bdf);
735             if ( !iommu )
736                 continue;
737         }
738 
739         if ( func(0, 0, sbdf.sbdf, ctxt) )
740         {
741             /*
742              * When the caller processes a XENMEM_RDM_ALL request, don't report
743              * multiple times the same range(s) for perhaps many devices with
744              * the same alias ID.
745              */
746             if ( bdf != req && ivrs_mappings[req].iommu &&
747                  func(0, 0, PCI_SBDF(seg, req).sbdf, ctxt) )
748                 continue;
749 
750             if ( global == pending )
751                 global = processing;
752         }
753 
754         if ( iommu->exclusion_enable &&
755              (iommu->exclusion_allow_all ?
756               global == processing :
757               ivrs_mappings[bdf].dte_allow_exclusion) )
758         {
759             rc = func(PFN_DOWN(iommu->exclusion_base),
760                       PFN_UP(iommu->exclusion_limit | 1) -
761                       PFN_DOWN(iommu->exclusion_base), sbdf.sbdf, ctxt);
762             if ( unlikely(rc < 0) )
763                 return rc;
764         }
765 
766         for ( ; um; um = um->next )
767         {
768             if ( um->global && global != processing )
769                 continue;
770 
771             rc = func(PFN_DOWN(um->addr), PFN_DOWN(um->length),
772                       sbdf.sbdf, ctxt);
773             if ( unlikely(rc < 0) )
774                 return rc;
775         }
776 
777         if ( global == processing )
778             global = done;
779     }
780 
781     return 0;
782 }
783 
fill_qpt(union amd_iommu_pte * this,unsigned int level,struct page_info * pgs[IOMMU_MAX_PT_LEVELS])784 static int fill_qpt(union amd_iommu_pte *this, unsigned int level,
785                     struct page_info *pgs[IOMMU_MAX_PT_LEVELS])
786 {
787     struct domain_iommu *hd = dom_iommu(dom_io);
788     unsigned int i;
789     int rc = 0;
790 
791     for ( i = 0; !rc && i < PTE_PER_TABLE_SIZE; ++i )
792     {
793         union amd_iommu_pte *pte = &this[i], *next;
794 
795         if ( !pte->pr )
796         {
797             if ( !pgs[level] )
798             {
799                 /*
800                  * The pgtable allocator is fine for the leaf page, as well as
801                  * page table pages, and the resulting allocations are always
802                  * zeroed.
803                  */
804                 pgs[level] = iommu_alloc_pgtable(hd, 0);
805                 if ( !pgs[level] )
806                 {
807                     rc = -ENOMEM;
808                     break;
809                 }
810 
811                 if ( level )
812                 {
813                     next = __map_domain_page(pgs[level]);
814                     rc = fill_qpt(next, level - 1, pgs);
815                     unmap_domain_page(next);
816                 }
817             }
818 
819             /*
820              * PDEs are essentially a subset of PTEs, so this function
821              * is fine to use even at the leaf.
822              */
823             set_iommu_pde_present(pte, mfn_x(page_to_mfn(pgs[level])), level,
824                                   true, true);
825         }
826         else if ( level && pte->next_level )
827         {
828             next = map_domain_page(_mfn(pte->mfn));
829             rc = fill_qpt(next, level - 1, pgs);
830             unmap_domain_page(next);
831         }
832     }
833 
834     return rc;
835 }
836 
amd_iommu_quarantine_init(struct pci_dev * pdev,bool scratch_page)837 int cf_check amd_iommu_quarantine_init(struct pci_dev *pdev, bool scratch_page)
838 {
839     struct domain_iommu *hd = dom_iommu(dom_io);
840     unsigned int level = hd->arch.amd.paging_mode;
841     unsigned int req_id = get_dma_requestor_id(pdev->seg, pdev->sbdf.bdf);
842     const struct ivrs_mappings *ivrs_mappings = get_ivrs_mappings(pdev->seg);
843     int rc;
844 
845     ASSERT(pcidevs_locked());
846     ASSERT(!hd->arch.amd.root_table);
847     ASSERT(page_list_empty(&hd->arch.pgtables.list));
848 
849     if ( !scratch_page && !ivrs_mappings[req_id].unity_map )
850         return 0;
851 
852     ASSERT(pdev->arch.pseudo_domid != DOMID_INVALID);
853 
854     if ( pdev->arch.amd.root_table )
855     {
856         clear_domain_page(pdev->arch.leaf_mfn);
857         return 0;
858     }
859 
860     pdev->arch.amd.root_table = iommu_alloc_pgtable(hd, 0);
861     if ( !pdev->arch.amd.root_table )
862         return -ENOMEM;
863 
864     /* Transiently install the root into DomIO, for iommu_identity_mapping(). */
865     hd->arch.amd.root_table = pdev->arch.amd.root_table;
866 
867     rc = amd_iommu_reserve_domain_unity_map(dom_io,
868                                             ivrs_mappings[req_id].unity_map,
869                                             0);
870 
871     iommu_identity_map_teardown(dom_io);
872     hd->arch.amd.root_table = NULL;
873 
874     if ( rc )
875         AMD_IOMMU_WARN("%pp: quarantine unity mapping failed\n", &pdev->sbdf);
876     else if ( scratch_page )
877     {
878         union amd_iommu_pte *root;
879         struct page_info *pgs[IOMMU_MAX_PT_LEVELS] = {};
880 
881         root = __map_domain_page(pdev->arch.amd.root_table);
882         rc = fill_qpt(root, level - 1, pgs);
883         unmap_domain_page(root);
884 
885         pdev->arch.leaf_mfn = page_to_mfn(pgs[0]);
886     }
887 
888     page_list_move(&pdev->arch.pgtables_list, &hd->arch.pgtables.list);
889 
890     if ( rc )
891         amd_iommu_quarantine_teardown(pdev);
892 
893     return rc;
894 }
895 
amd_iommu_quarantine_teardown(struct pci_dev * pdev)896 void amd_iommu_quarantine_teardown(struct pci_dev *pdev)
897 {
898     struct domain_iommu *hd = dom_iommu(dom_io);
899 
900     ASSERT(pcidevs_locked());
901 
902     if ( !pdev->arch.amd.root_table )
903         return;
904 
905     ASSERT(page_list_empty(&hd->arch.pgtables.list));
906     page_list_move(&hd->arch.pgtables.list, &pdev->arch.pgtables_list);
907     while ( iommu_free_pgtables(dom_io) == -ERESTART )
908         /* nothing */;
909     pdev->arch.amd.root_table = NULL;
910 }
911 
912 /*
913  * Local variables:
914  * mode: C
915  * c-file-style: "BSD"
916  * c-basic-offset: 4
917  * tab-width: 4
918  * indent-tabs-mode: nil
919  * End:
920  */
921