1 /*
2  * Copyright (C) 2007 Advanced Micro Devices, Inc.
3  * Author: Leo Duran <leo.duran@amd.com>
4  * Author: Wei Wang <wei.wang2@amd.com> - adapted to xen
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include <xen/acpi.h>
21 #include <xen/sched.h>
22 #include <asm/p2m.h>
23 #include <asm/amd-iommu.h>
24 #include <asm/hvm/svm/amd-iommu-proto.h>
25 #include "../ats.h"
26 #include <xen/pci.h>
27 
28 /* Given pfn and page table level, return pde index */
pfn_to_pde_idx(unsigned long pfn,unsigned int level)29 static unsigned int pfn_to_pde_idx(unsigned long pfn, unsigned int level)
30 {
31     unsigned int idx;
32 
33     idx = pfn >> (PTE_PER_TABLE_SHIFT * (--level));
34     idx &= ~PTE_PER_TABLE_MASK;
35     return idx;
36 }
37 
clear_iommu_pte_present(unsigned long l1_mfn,unsigned long gfn)38 void clear_iommu_pte_present(unsigned long l1_mfn, unsigned long gfn)
39 {
40     u64 *table, *pte;
41 
42     table = map_domain_page(_mfn(l1_mfn));
43     pte = table + pfn_to_pde_idx(gfn, IOMMU_PAGING_MODE_LEVEL_1);
44     *pte = 0;
45     unmap_domain_page(table);
46 }
47 
set_iommu_pde_present(u32 * pde,unsigned long next_mfn,unsigned int next_level,bool_t iw,bool_t ir)48 static bool_t set_iommu_pde_present(u32 *pde, unsigned long next_mfn,
49                                     unsigned int next_level,
50                                     bool_t iw, bool_t ir)
51 {
52     u64 addr_lo, addr_hi, maddr_old, maddr_next;
53     u32 entry;
54     bool_t need_flush = 0;
55 
56     maddr_next = (u64)next_mfn << PAGE_SHIFT;
57 
58     addr_hi = get_field_from_reg_u32(pde[1],
59                                      IOMMU_PTE_ADDR_HIGH_MASK,
60                                      IOMMU_PTE_ADDR_HIGH_SHIFT);
61     addr_lo = get_field_from_reg_u32(pde[0],
62                                      IOMMU_PTE_ADDR_LOW_MASK,
63                                      IOMMU_PTE_ADDR_LOW_SHIFT);
64 
65     maddr_old = (addr_hi << 32) | (addr_lo << PAGE_SHIFT);
66 
67     if ( maddr_old != maddr_next )
68         need_flush = 1;
69 
70     addr_lo = maddr_next & DMA_32BIT_MASK;
71     addr_hi = maddr_next >> 32;
72 
73     /* enable read/write permissions,which will be enforced at the PTE */
74     set_field_in_reg_u32((u32)addr_hi, 0,
75                          IOMMU_PDE_ADDR_HIGH_MASK,
76                          IOMMU_PDE_ADDR_HIGH_SHIFT, &entry);
77     set_field_in_reg_u32(iw, entry,
78                          IOMMU_PDE_IO_WRITE_PERMISSION_MASK,
79                          IOMMU_PDE_IO_WRITE_PERMISSION_SHIFT, &entry);
80     set_field_in_reg_u32(ir, entry,
81                          IOMMU_PDE_IO_READ_PERMISSION_MASK,
82                          IOMMU_PDE_IO_READ_PERMISSION_SHIFT, &entry);
83 
84     /* FC bit should be enabled in PTE, this helps to solve potential
85      * issues with ATS devices
86      */
87     if ( next_level == IOMMU_PAGING_MODE_LEVEL_0 )
88         set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
89                              IOMMU_PTE_FC_MASK, IOMMU_PTE_FC_SHIFT, &entry);
90     pde[1] = entry;
91 
92     /* mark next level as 'present' */
93     set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0,
94                          IOMMU_PDE_ADDR_LOW_MASK,
95                          IOMMU_PDE_ADDR_LOW_SHIFT, &entry);
96     set_field_in_reg_u32(next_level, entry,
97                          IOMMU_PDE_NEXT_LEVEL_MASK,
98                          IOMMU_PDE_NEXT_LEVEL_SHIFT, &entry);
99     set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
100                          IOMMU_PDE_PRESENT_MASK,
101                          IOMMU_PDE_PRESENT_SHIFT, &entry);
102     pde[0] = entry;
103 
104     return need_flush;
105 }
106 
set_iommu_pte_present(unsigned long pt_mfn,unsigned long gfn,unsigned long next_mfn,int pde_level,bool_t iw,bool_t ir)107 static bool_t set_iommu_pte_present(unsigned long pt_mfn, unsigned long gfn,
108                                     unsigned long next_mfn, int pde_level,
109                                     bool_t iw, bool_t ir)
110 {
111     u64 *table;
112     u32 *pde;
113     bool_t need_flush = 0;
114 
115     table = map_domain_page(_mfn(pt_mfn));
116 
117     pde = (u32*)(table + pfn_to_pde_idx(gfn, pde_level));
118 
119     need_flush = set_iommu_pde_present(pde, next_mfn,
120                                        IOMMU_PAGING_MODE_LEVEL_0, iw, ir);
121     unmap_domain_page(table);
122     return need_flush;
123 }
124 
amd_iommu_set_root_page_table(u32 * dte,u64 root_ptr,u16 domain_id,u8 paging_mode,u8 valid)125 void amd_iommu_set_root_page_table(
126     u32 *dte, u64 root_ptr, u16 domain_id, u8 paging_mode, u8 valid)
127 {
128     u64 addr_hi, addr_lo;
129     u32 entry;
130     set_field_in_reg_u32(domain_id, 0,
131                          IOMMU_DEV_TABLE_DOMAIN_ID_MASK,
132                          IOMMU_DEV_TABLE_DOMAIN_ID_SHIFT, &entry);
133     dte[2] = entry;
134 
135     addr_lo = root_ptr & DMA_32BIT_MASK;
136     addr_hi = root_ptr >> 32;
137 
138     set_field_in_reg_u32((u32)addr_hi, 0,
139                          IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_MASK,
140                          IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_SHIFT, &entry);
141     set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
142                          IOMMU_DEV_TABLE_IO_WRITE_PERMISSION_MASK,
143                          IOMMU_DEV_TABLE_IO_WRITE_PERMISSION_SHIFT, &entry);
144     set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
145                          IOMMU_DEV_TABLE_IO_READ_PERMISSION_MASK,
146                          IOMMU_DEV_TABLE_IO_READ_PERMISSION_SHIFT, &entry);
147     dte[1] = entry;
148 
149     set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0,
150                          IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_MASK,
151                          IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_SHIFT, &entry);
152     set_field_in_reg_u32(paging_mode, entry,
153                          IOMMU_DEV_TABLE_PAGING_MODE_MASK,
154                          IOMMU_DEV_TABLE_PAGING_MODE_SHIFT, &entry);
155     set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
156                          IOMMU_DEV_TABLE_TRANSLATION_VALID_MASK,
157                          IOMMU_DEV_TABLE_TRANSLATION_VALID_SHIFT, &entry);
158     set_field_in_reg_u32(valid ? IOMMU_CONTROL_ENABLED :
159                          IOMMU_CONTROL_DISABLED, entry,
160                          IOMMU_DEV_TABLE_VALID_MASK,
161                          IOMMU_DEV_TABLE_VALID_SHIFT, &entry);
162     dte[0] = entry;
163 }
164 
iommu_dte_set_iotlb(u32 * dte,u8 i)165 void iommu_dte_set_iotlb(u32 *dte, u8 i)
166 {
167     u32 entry;
168 
169     entry = dte[3];
170     set_field_in_reg_u32(!!i, entry,
171                          IOMMU_DEV_TABLE_IOTLB_SUPPORT_MASK,
172                          IOMMU_DEV_TABLE_IOTLB_SUPPORT_SHIFT, &entry);
173     dte[3] = entry;
174 }
175 
amd_iommu_set_intremap_table(u32 * dte,u64 intremap_ptr,u8 int_valid)176 void __init amd_iommu_set_intremap_table(
177     u32 *dte, u64 intremap_ptr, u8 int_valid)
178 {
179     u64 addr_hi, addr_lo;
180     u32 entry;
181 
182     addr_lo = intremap_ptr & DMA_32BIT_MASK;
183     addr_hi = intremap_ptr >> 32;
184 
185     entry = dte[5];
186     set_field_in_reg_u32((u32)addr_hi, entry,
187                         IOMMU_DEV_TABLE_INT_TABLE_PTR_HIGH_MASK,
188                         IOMMU_DEV_TABLE_INT_TABLE_PTR_HIGH_SHIFT, &entry);
189     /* Fixed and arbitrated interrupts remapepd */
190     set_field_in_reg_u32(2, entry,
191                         IOMMU_DEV_TABLE_INT_CONTROL_MASK,
192                         IOMMU_DEV_TABLE_INT_CONTROL_SHIFT, &entry);
193     dte[5] = entry;
194 
195     set_field_in_reg_u32((u32)addr_lo >> 6, 0,
196                         IOMMU_DEV_TABLE_INT_TABLE_PTR_LOW_MASK,
197                         IOMMU_DEV_TABLE_INT_TABLE_PTR_LOW_SHIFT, &entry);
198     /* 2048 entries */
199     set_field_in_reg_u32(0xB, entry,
200                          IOMMU_DEV_TABLE_INT_TABLE_LENGTH_MASK,
201                          IOMMU_DEV_TABLE_INT_TABLE_LENGTH_SHIFT, &entry);
202 
203     /* unmapped interrupt results io page faults*/
204     set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
205                          IOMMU_DEV_TABLE_INT_TABLE_IGN_UNMAPPED_MASK,
206                          IOMMU_DEV_TABLE_INT_TABLE_IGN_UNMAPPED_SHIFT, &entry);
207     set_field_in_reg_u32(int_valid ? IOMMU_CONTROL_ENABLED :
208                          IOMMU_CONTROL_DISABLED, entry,
209                          IOMMU_DEV_TABLE_INT_VALID_MASK,
210                          IOMMU_DEV_TABLE_INT_VALID_SHIFT, &entry);
211     dte[4] = entry;
212 }
213 
iommu_dte_add_device_entry(u32 * dte,struct ivrs_mappings * ivrs_dev)214 void __init iommu_dte_add_device_entry(u32 *dte, struct ivrs_mappings *ivrs_dev)
215 {
216     u32 entry;
217     u8 sys_mgt, dev_ex, flags;
218     u8 mask = ~(0x7 << 3);
219 
220     dte[7] = dte[6] = dte[4] = dte[2] = dte[1] = dte[0] = 0;
221 
222     flags = ivrs_dev->device_flags;
223     sys_mgt = get_field_from_byte(flags, ACPI_IVHD_SYSTEM_MGMT);
224     dev_ex = ivrs_dev->dte_allow_exclusion;
225 
226     flags &= mask;
227     set_field_in_reg_u32(flags, 0,
228                          IOMMU_DEV_TABLE_IVHD_FLAGS_MASK,
229                          IOMMU_DEV_TABLE_IVHD_FLAGS_SHIFT, &entry);
230     dte[5] = entry;
231 
232     set_field_in_reg_u32(sys_mgt, 0,
233                          IOMMU_DEV_TABLE_SYS_MGT_MSG_ENABLE_MASK,
234                          IOMMU_DEV_TABLE_SYS_MGT_MSG_ENABLE_SHIFT, &entry);
235     set_field_in_reg_u32(dev_ex, entry,
236                          IOMMU_DEV_TABLE_ALLOW_EXCLUSION_MASK,
237                          IOMMU_DEV_TABLE_ALLOW_EXCLUSION_SHIFT, &entry);
238     dte[3] = entry;
239 }
240 
iommu_dte_set_guest_cr3(u32 * dte,u16 dom_id,u64 gcr3,int gv,unsigned int glx)241 void iommu_dte_set_guest_cr3(u32 *dte, u16 dom_id, u64 gcr3,
242                              int gv, unsigned int glx)
243 {
244     u32 entry, gcr3_1, gcr3_2, gcr3_3;
245 
246     gcr3_3 = gcr3 >> 31;
247     gcr3_2 = (gcr3 >> 15) & 0xFFFF;
248     gcr3_1 = (gcr3 >> PAGE_SHIFT) & 0x7;
249 
250     /* I bit must be set when gcr3 is enabled */
251     entry = dte[3];
252     set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
253                          IOMMU_DEV_TABLE_IOTLB_SUPPORT_MASK,
254                          IOMMU_DEV_TABLE_IOTLB_SUPPORT_SHIFT, &entry);
255     /* update gcr3 */
256     set_field_in_reg_u32(gcr3_3, entry,
257                          IOMMU_DEV_TABLE_GCR3_3_MASK,
258                          IOMMU_DEV_TABLE_GCR3_3_SHIFT, &entry);
259     dte[3] = entry;
260 
261     set_field_in_reg_u32(dom_id, entry,
262                          IOMMU_DEV_TABLE_DOMAIN_ID_MASK,
263                          IOMMU_DEV_TABLE_DOMAIN_ID_SHIFT, &entry);
264     /* update gcr3 */
265     entry = dte[2];
266     set_field_in_reg_u32(gcr3_2, entry,
267                          IOMMU_DEV_TABLE_GCR3_2_MASK,
268                          IOMMU_DEV_TABLE_GCR3_2_SHIFT, &entry);
269     dte[2] = entry;
270 
271     entry = dte[1];
272     /* Enable GV bit */
273     set_field_in_reg_u32(!!gv, entry,
274                          IOMMU_DEV_TABLE_GV_MASK,
275                          IOMMU_DEV_TABLE_GV_SHIFT, &entry);
276 
277     /* 1 level guest cr3 table  */
278     set_field_in_reg_u32(glx, entry,
279                          IOMMU_DEV_TABLE_GLX_MASK,
280                          IOMMU_DEV_TABLE_GLX_SHIFT, &entry);
281     /* update gcr3 */
282     set_field_in_reg_u32(gcr3_1, entry,
283                          IOMMU_DEV_TABLE_GCR3_1_MASK,
284                          IOMMU_DEV_TABLE_GCR3_1_SHIFT, &entry);
285     dte[1] = entry;
286 }
287 
amd_iommu_get_next_table_from_pte(u32 * entry)288 u64 amd_iommu_get_next_table_from_pte(u32 *entry)
289 {
290     u64 addr_lo, addr_hi, ptr;
291 
292     addr_lo = get_field_from_reg_u32(
293         entry[0],
294         IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_MASK,
295         IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_SHIFT);
296 
297     addr_hi = get_field_from_reg_u32(
298         entry[1],
299         IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_MASK,
300         IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_SHIFT);
301 
302     ptr = (addr_hi << 32) | (addr_lo << PAGE_SHIFT);
303     return ptr;
304 }
305 
306 /* For each pde, We use ignored bits (bit 1 - bit 8 and bit 63)
307  * to save pde count, pde count = 511 is a candidate of page coalescing.
308  */
get_pde_count(u64 pde)309 static unsigned int get_pde_count(u64 pde)
310 {
311     unsigned int count;
312     u64 upper_mask = 1ULL << 63 ;
313     u64 lower_mask = 0xFF << 1;
314 
315     count = ((pde & upper_mask) >> 55) | ((pde & lower_mask) >> 1);
316     return count;
317 }
318 
319 /* Convert pde count into iommu pte ignored bits */
set_pde_count(u64 * pde,unsigned int count)320 static void set_pde_count(u64 *pde, unsigned int count)
321 {
322     u64 upper_mask = 1ULL << 8 ;
323     u64 lower_mask = 0xFF;
324     u64 pte_mask = (~(1ULL << 63)) & (~(0xFF << 1));
325 
326     *pde &= pte_mask;
327     *pde |= ((count & upper_mask ) << 55) | ((count & lower_mask ) << 1);
328 }
329 
330 /* Return 1, if pages are suitable for merging at merge_level.
331  * otherwise increase pde count if mfn is contigous with mfn - 1
332  */
iommu_update_pde_count(struct domain * d,unsigned long pt_mfn,unsigned long gfn,unsigned long mfn,unsigned int merge_level)333 static int iommu_update_pde_count(struct domain *d, unsigned long pt_mfn,
334                                   unsigned long gfn, unsigned long mfn,
335                                   unsigned int merge_level)
336 {
337     unsigned int pde_count, next_level;
338     unsigned long first_mfn;
339     u64 *table, *pde, *ntable;
340     u64 ntable_maddr, mask;
341     struct domain_iommu *hd = dom_iommu(d);
342     bool_t ok = 0;
343 
344     ASSERT( spin_is_locked(&hd->arch.mapping_lock) && pt_mfn );
345 
346     next_level = merge_level - 1;
347 
348     /* get pde at merge level */
349     table = map_domain_page(_mfn(pt_mfn));
350     pde = table + pfn_to_pde_idx(gfn, merge_level);
351 
352     /* get page table of next level */
353     ntable_maddr = amd_iommu_get_next_table_from_pte((u32*)pde);
354     ntable = map_domain_page(_mfn(paddr_to_pfn(ntable_maddr)));
355 
356     /* get the first mfn of next level */
357     first_mfn = amd_iommu_get_next_table_from_pte((u32*)ntable) >> PAGE_SHIFT;
358 
359     if ( first_mfn == 0 )
360         goto out;
361 
362     mask = (1ULL<< (PTE_PER_TABLE_SHIFT * next_level)) - 1;
363 
364     if ( ((first_mfn & mask) == 0) &&
365          (((gfn & mask) | first_mfn) == mfn) )
366     {
367         pde_count = get_pde_count(*pde);
368 
369         if ( pde_count == (PTE_PER_TABLE_SIZE - 1) )
370             ok = 1;
371         else if ( pde_count < (PTE_PER_TABLE_SIZE - 1))
372         {
373             pde_count++;
374             set_pde_count(pde, pde_count);
375         }
376     }
377 
378     else
379         /* non-contiguous mapping */
380         set_pde_count(pde, 0);
381 
382 out:
383     unmap_domain_page(ntable);
384     unmap_domain_page(table);
385 
386     return ok;
387 }
388 
iommu_merge_pages(struct domain * d,unsigned long pt_mfn,unsigned long gfn,unsigned int flags,unsigned int merge_level)389 static int iommu_merge_pages(struct domain *d, unsigned long pt_mfn,
390                              unsigned long gfn, unsigned int flags,
391                              unsigned int merge_level)
392 {
393     u64 *table, *pde, *ntable;
394     u64 ntable_mfn;
395     unsigned long first_mfn;
396     struct domain_iommu *hd = dom_iommu(d);
397 
398     ASSERT( spin_is_locked(&hd->arch.mapping_lock) && pt_mfn );
399 
400     table = map_domain_page(_mfn(pt_mfn));
401     pde = table + pfn_to_pde_idx(gfn, merge_level);
402 
403     /* get first mfn */
404     ntable_mfn = amd_iommu_get_next_table_from_pte((u32*)pde) >> PAGE_SHIFT;
405 
406     if ( ntable_mfn == 0 )
407     {
408         unmap_domain_page(table);
409         return 1;
410     }
411 
412     ntable = map_domain_page(_mfn(ntable_mfn));
413     first_mfn = amd_iommu_get_next_table_from_pte((u32*)ntable) >> PAGE_SHIFT;
414 
415     if ( first_mfn == 0 )
416     {
417         unmap_domain_page(ntable);
418         unmap_domain_page(table);
419         return 1;
420     }
421 
422     /* setup super page mapping, next level = 0 */
423     set_iommu_pde_present((u32*)pde, first_mfn,
424                           IOMMU_PAGING_MODE_LEVEL_0,
425                           !!(flags & IOMMUF_writable),
426                           !!(flags & IOMMUF_readable));
427 
428     amd_iommu_flush_all_pages(d);
429 
430     unmap_domain_page(ntable);
431     unmap_domain_page(table);
432     return 0;
433 }
434 
435 /* Walk io page tables and build level page tables if necessary
436  * {Re, un}mapping super page frames causes re-allocation of io
437  * page tables.
438  */
iommu_pde_from_gfn(struct domain * d,unsigned long pfn,unsigned long pt_mfn[])439 static int iommu_pde_from_gfn(struct domain *d, unsigned long pfn,
440                               unsigned long pt_mfn[])
441 {
442     u64 *pde, *next_table_vaddr;
443     unsigned long  next_table_mfn;
444     unsigned int level;
445     struct page_info *table;
446     const struct domain_iommu *hd = dom_iommu(d);
447 
448     table = hd->arch.root_table;
449     level = hd->arch.paging_mode;
450 
451     BUG_ON( table == NULL || level < IOMMU_PAGING_MODE_LEVEL_1 ||
452             level > IOMMU_PAGING_MODE_LEVEL_6 );
453 
454     next_table_mfn = page_to_mfn(table);
455 
456     if ( level == IOMMU_PAGING_MODE_LEVEL_1 )
457     {
458         pt_mfn[level] = next_table_mfn;
459         return 0;
460     }
461 
462     while ( level > IOMMU_PAGING_MODE_LEVEL_1 )
463     {
464         unsigned int next_level = level - 1;
465         pt_mfn[level] = next_table_mfn;
466 
467         next_table_vaddr = map_domain_page(_mfn(next_table_mfn));
468         pde = next_table_vaddr + pfn_to_pde_idx(pfn, level);
469 
470         /* Here might be a super page frame */
471         next_table_mfn = amd_iommu_get_next_table_from_pte((uint32_t*)pde)
472                          >> PAGE_SHIFT;
473 
474         /* Split super page frame into smaller pieces.*/
475         if ( iommu_is_pte_present((u32*)pde) &&
476              (iommu_next_level((u32*)pde) == 0) &&
477              next_table_mfn != 0 )
478         {
479             int i;
480             unsigned long mfn, gfn;
481             unsigned int page_sz;
482 
483             page_sz = 1 << (PTE_PER_TABLE_SHIFT * (next_level - 1));
484             gfn =  pfn & ~((1 << (PTE_PER_TABLE_SHIFT * next_level)) - 1);
485             mfn = next_table_mfn;
486 
487             /* allocate lower level page table */
488             table = alloc_amd_iommu_pgtable();
489             if ( table == NULL )
490             {
491                 AMD_IOMMU_DEBUG("Cannot allocate I/O page table\n");
492                 unmap_domain_page(next_table_vaddr);
493                 return 1;
494             }
495 
496             next_table_mfn = page_to_mfn(table);
497             set_iommu_pde_present((u32*)pde, next_table_mfn, next_level,
498                                   !!IOMMUF_writable, !!IOMMUF_readable);
499 
500             for ( i = 0; i < PTE_PER_TABLE_SIZE; i++ )
501             {
502                 set_iommu_pte_present(next_table_mfn, gfn, mfn, next_level,
503                                       !!IOMMUF_writable, !!IOMMUF_readable);
504                 mfn += page_sz;
505                 gfn += page_sz;
506              }
507 
508             amd_iommu_flush_all_pages(d);
509         }
510 
511         /* Install lower level page table for non-present entries */
512         else if ( !iommu_is_pte_present((u32*)pde) )
513         {
514             if ( next_table_mfn == 0 )
515             {
516                 table = alloc_amd_iommu_pgtable();
517                 if ( table == NULL )
518                 {
519                     AMD_IOMMU_DEBUG("Cannot allocate I/O page table\n");
520                     unmap_domain_page(next_table_vaddr);
521                     return 1;
522                 }
523                 next_table_mfn = page_to_mfn(table);
524                 set_iommu_pde_present((u32*)pde, next_table_mfn, next_level,
525                                       !!IOMMUF_writable, !!IOMMUF_readable);
526             }
527             else /* should never reach here */
528             {
529                 unmap_domain_page(next_table_vaddr);
530                 return 1;
531             }
532         }
533 
534         unmap_domain_page(next_table_vaddr);
535         level--;
536     }
537 
538     /* mfn of level 1 page table */
539     pt_mfn[level] = next_table_mfn;
540     return 0;
541 }
542 
update_paging_mode(struct domain * d,unsigned long gfn)543 static int update_paging_mode(struct domain *d, unsigned long gfn)
544 {
545     u16 bdf;
546     void *device_entry;
547     unsigned int req_id, level, offset;
548     unsigned long flags;
549     struct pci_dev *pdev;
550     struct amd_iommu *iommu = NULL;
551     struct page_info *new_root = NULL;
552     struct page_info *old_root = NULL;
553     void *new_root_vaddr;
554     unsigned long old_root_mfn;
555     struct domain_iommu *hd = dom_iommu(d);
556 
557     if ( gfn == gfn_x(INVALID_GFN) )
558         return -EADDRNOTAVAIL;
559     ASSERT(!(gfn >> DEFAULT_DOMAIN_ADDRESS_WIDTH));
560 
561     level = hd->arch.paging_mode;
562     old_root = hd->arch.root_table;
563     offset = gfn >> (PTE_PER_TABLE_SHIFT * (level - 1));
564 
565     ASSERT(spin_is_locked(&hd->arch.mapping_lock) && is_hvm_domain(d));
566 
567     while ( offset >= PTE_PER_TABLE_SIZE )
568     {
569         /* Allocate and install a new root table.
570          * Only upper I/O page table grows, no need to fix next level bits */
571         new_root = alloc_amd_iommu_pgtable();
572         if ( new_root == NULL )
573         {
574             AMD_IOMMU_DEBUG("%s Cannot allocate I/O page table\n",
575                             __func__);
576             return -ENOMEM;
577         }
578 
579         new_root_vaddr = __map_domain_page(new_root);
580         old_root_mfn = page_to_mfn(old_root);
581         set_iommu_pde_present(new_root_vaddr, old_root_mfn, level,
582                               !!IOMMUF_writable, !!IOMMUF_readable);
583         level++;
584         old_root = new_root;
585         offset >>= PTE_PER_TABLE_SHIFT;
586         unmap_domain_page(new_root_vaddr);
587     }
588 
589     if ( new_root != NULL )
590     {
591         hd->arch.paging_mode = level;
592         hd->arch.root_table = new_root;
593 
594         if ( !pcidevs_locked() )
595             AMD_IOMMU_DEBUG("%s Try to access pdev_list "
596                             "without aquiring pcidevs_lock.\n", __func__);
597 
598         /* Update device table entries using new root table and paging mode */
599         for_each_pdev( d, pdev )
600         {
601             bdf = PCI_BDF2(pdev->bus, pdev->devfn);
602             iommu = find_iommu_for_device(pdev->seg, bdf);
603             if ( !iommu )
604             {
605                 AMD_IOMMU_DEBUG("%s Fail to find iommu.\n", __func__);
606                 return -ENODEV;
607             }
608 
609             spin_lock_irqsave(&iommu->lock, flags);
610             do {
611                 req_id = get_dma_requestor_id(pdev->seg, bdf);
612                 device_entry = iommu->dev_table.buffer +
613                                (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
614 
615                 /* valid = 0 only works for dom0 passthrough mode */
616                 amd_iommu_set_root_page_table((u32 *)device_entry,
617                                               page_to_maddr(hd->arch.root_table),
618                                               d->domain_id,
619                                               hd->arch.paging_mode, 1);
620 
621                 amd_iommu_flush_device(iommu, req_id);
622                 bdf += pdev->phantom_stride;
623             } while ( PCI_DEVFN2(bdf) != pdev->devfn &&
624                       PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
625             spin_unlock_irqrestore(&iommu->lock, flags);
626         }
627 
628         /* For safety, invalidate all entries */
629         amd_iommu_flush_all_pages(d);
630     }
631     return 0;
632 }
633 
amd_iommu_map_page(struct domain * d,unsigned long gfn,unsigned long mfn,unsigned int flags)634 int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
635                        unsigned int flags)
636 {
637     bool_t need_flush = 0;
638     struct domain_iommu *hd = dom_iommu(d);
639     int rc;
640     unsigned long pt_mfn[7];
641     unsigned int merge_level;
642 
643     if ( iommu_use_hap_pt(d) )
644         return 0;
645 
646     memset(pt_mfn, 0, sizeof(pt_mfn));
647 
648     spin_lock(&hd->arch.mapping_lock);
649 
650     rc = amd_iommu_alloc_root(hd);
651     if ( rc )
652     {
653         spin_unlock(&hd->arch.mapping_lock);
654         AMD_IOMMU_DEBUG("Root table alloc failed, gfn = %lx\n", gfn);
655         domain_crash(d);
656         return rc;
657     }
658 
659     /* Since HVM domain is initialized with 2 level IO page table,
660      * we might need a deeper page table for lager gfn now */
661     if ( is_hvm_domain(d) )
662     {
663         if ( update_paging_mode(d, gfn) )
664         {
665             spin_unlock(&hd->arch.mapping_lock);
666             AMD_IOMMU_DEBUG("Update page mode failed gfn = %lx\n", gfn);
667             domain_crash(d);
668             return -EFAULT;
669         }
670     }
671 
672     if ( iommu_pde_from_gfn(d, gfn, pt_mfn) || (pt_mfn[1] == 0) )
673     {
674         spin_unlock(&hd->arch.mapping_lock);
675         AMD_IOMMU_DEBUG("Invalid IO pagetable entry gfn = %lx\n", gfn);
676         domain_crash(d);
677         return -EFAULT;
678     }
679 
680     /* Install 4k mapping first */
681     need_flush = set_iommu_pte_present(pt_mfn[1], gfn, mfn,
682                                        IOMMU_PAGING_MODE_LEVEL_1,
683                                        !!(flags & IOMMUF_writable),
684                                        !!(flags & IOMMUF_readable));
685 
686     /* Do not increase pde count if io mapping has not been changed */
687     if ( !need_flush )
688         goto out;
689 
690     /* 4K mapping for PV guests never changes,
691      * no need to flush if we trust non-present bits */
692     if ( is_hvm_domain(d) )
693         amd_iommu_flush_pages(d, gfn, 0);
694 
695     for ( merge_level = IOMMU_PAGING_MODE_LEVEL_2;
696           merge_level <= hd->arch.paging_mode; merge_level++ )
697     {
698         if ( pt_mfn[merge_level] == 0 )
699             break;
700         if ( !iommu_update_pde_count(d, pt_mfn[merge_level],
701                                      gfn, mfn, merge_level) )
702             break;
703 
704         if ( iommu_merge_pages(d, pt_mfn[merge_level], gfn,
705                                flags, merge_level) )
706         {
707             spin_unlock(&hd->arch.mapping_lock);
708             AMD_IOMMU_DEBUG("Merge iommu page failed at level %d, "
709                             "gfn = %lx mfn = %lx\n", merge_level, gfn, mfn);
710             domain_crash(d);
711             return -EFAULT;
712         }
713 
714         /* Deallocate lower level page table */
715         free_amd_iommu_pgtable(mfn_to_page(pt_mfn[merge_level - 1]));
716     }
717 
718 out:
719     spin_unlock(&hd->arch.mapping_lock);
720     return 0;
721 }
722 
amd_iommu_unmap_page(struct domain * d,unsigned long gfn)723 int amd_iommu_unmap_page(struct domain *d, unsigned long gfn)
724 {
725     unsigned long pt_mfn[7];
726     struct domain_iommu *hd = dom_iommu(d);
727 
728     if ( iommu_use_hap_pt(d) )
729         return 0;
730 
731     memset(pt_mfn, 0, sizeof(pt_mfn));
732 
733     spin_lock(&hd->arch.mapping_lock);
734 
735     if ( !hd->arch.root_table )
736     {
737         spin_unlock(&hd->arch.mapping_lock);
738         return 0;
739     }
740 
741     /* Since HVM domain is initialized with 2 level IO page table,
742      * we might need a deeper page table for lager gfn now */
743     if ( is_hvm_domain(d) )
744     {
745         int rc = update_paging_mode(d, gfn);
746 
747         if ( rc )
748         {
749             spin_unlock(&hd->arch.mapping_lock);
750             AMD_IOMMU_DEBUG("Update page mode failed gfn = %lx\n", gfn);
751             if ( rc != -EADDRNOTAVAIL )
752                 domain_crash(d);
753             return rc;
754         }
755     }
756 
757     if ( iommu_pde_from_gfn(d, gfn, pt_mfn) || (pt_mfn[1] == 0) )
758     {
759         spin_unlock(&hd->arch.mapping_lock);
760         AMD_IOMMU_DEBUG("Invalid IO pagetable entry gfn = %lx\n", gfn);
761         domain_crash(d);
762         return -EFAULT;
763     }
764 
765     /* mark PTE as 'page not present' */
766     clear_iommu_pte_present(pt_mfn[1], gfn);
767     spin_unlock(&hd->arch.mapping_lock);
768 
769     amd_iommu_flush_pages(d, gfn, 0);
770 
771     return 0;
772 }
773 
amd_iommu_reserve_domain_unity_map(struct domain * domain,u64 phys_addr,unsigned long size,int iw,int ir)774 int amd_iommu_reserve_domain_unity_map(struct domain *domain,
775                                        u64 phys_addr,
776                                        unsigned long size, int iw, int ir)
777 {
778     unsigned long npages, i;
779     unsigned long gfn;
780     unsigned int flags = !!ir;
781     int rt = 0;
782 
783     if ( iw )
784         flags |= IOMMUF_writable;
785 
786     npages = region_to_pages(phys_addr, size);
787     gfn = phys_addr >> PAGE_SHIFT;
788     for ( i = 0; i < npages; i++ )
789     {
790         rt = amd_iommu_map_page(domain, gfn +i, gfn +i, flags);
791         if ( rt != 0 )
792             return rt;
793     }
794     return 0;
795 }
796 
797 /* Share p2m table with iommu. */
amd_iommu_share_p2m(struct domain * d)798 void amd_iommu_share_p2m(struct domain *d)
799 {
800     struct domain_iommu *hd = dom_iommu(d);
801     struct page_info *p2m_table;
802     mfn_t pgd_mfn;
803 
804     pgd_mfn = pagetable_get_mfn(p2m_get_pagetable(p2m_get_hostp2m(d)));
805     p2m_table = mfn_to_page(mfn_x(pgd_mfn));
806 
807     if ( hd->arch.root_table != p2m_table )
808     {
809         free_amd_iommu_pgtable(hd->arch.root_table);
810         hd->arch.root_table = p2m_table;
811 
812         /* When sharing p2m with iommu, paging mode = 4 */
813         hd->arch.paging_mode = IOMMU_PAGING_MODE_LEVEL_4;
814         AMD_IOMMU_DEBUG("Share p2m table with iommu: p2m table = %#lx\n",
815                         mfn_x(pgd_mfn));
816     }
817 }
818