1 /*
2 * Copyright (C) 2007 Advanced Micro Devices, Inc.
3 * Author: Leo Duran <leo.duran@amd.com>
4 * Author: Wei Wang <wei.wang2@amd.com> - adapted to xen
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include <xen/acpi.h>
21 #include <xen/sched.h>
22 #include <asm/p2m.h>
23 #include <asm/amd-iommu.h>
24 #include <asm/hvm/svm/amd-iommu-proto.h>
25 #include "../ats.h"
26 #include <xen/pci.h>
27
28 /* Given pfn and page table level, return pde index */
pfn_to_pde_idx(unsigned long pfn,unsigned int level)29 static unsigned int pfn_to_pde_idx(unsigned long pfn, unsigned int level)
30 {
31 unsigned int idx;
32
33 idx = pfn >> (PTE_PER_TABLE_SHIFT * (--level));
34 idx &= ~PTE_PER_TABLE_MASK;
35 return idx;
36 }
37
clear_iommu_pte_present(unsigned long l1_mfn,unsigned long gfn)38 void clear_iommu_pte_present(unsigned long l1_mfn, unsigned long gfn)
39 {
40 u64 *table, *pte;
41
42 table = map_domain_page(_mfn(l1_mfn));
43 pte = table + pfn_to_pde_idx(gfn, IOMMU_PAGING_MODE_LEVEL_1);
44 *pte = 0;
45 unmap_domain_page(table);
46 }
47
set_iommu_pde_present(u32 * pde,unsigned long next_mfn,unsigned int next_level,bool_t iw,bool_t ir)48 static bool_t set_iommu_pde_present(u32 *pde, unsigned long next_mfn,
49 unsigned int next_level,
50 bool_t iw, bool_t ir)
51 {
52 u64 addr_lo, addr_hi, maddr_old, maddr_next;
53 u32 entry;
54 bool_t need_flush = 0;
55
56 maddr_next = (u64)next_mfn << PAGE_SHIFT;
57
58 addr_hi = get_field_from_reg_u32(pde[1],
59 IOMMU_PTE_ADDR_HIGH_MASK,
60 IOMMU_PTE_ADDR_HIGH_SHIFT);
61 addr_lo = get_field_from_reg_u32(pde[0],
62 IOMMU_PTE_ADDR_LOW_MASK,
63 IOMMU_PTE_ADDR_LOW_SHIFT);
64
65 maddr_old = (addr_hi << 32) | (addr_lo << PAGE_SHIFT);
66
67 if ( maddr_old != maddr_next )
68 need_flush = 1;
69
70 addr_lo = maddr_next & DMA_32BIT_MASK;
71 addr_hi = maddr_next >> 32;
72
73 /* enable read/write permissions,which will be enforced at the PTE */
74 set_field_in_reg_u32((u32)addr_hi, 0,
75 IOMMU_PDE_ADDR_HIGH_MASK,
76 IOMMU_PDE_ADDR_HIGH_SHIFT, &entry);
77 set_field_in_reg_u32(iw, entry,
78 IOMMU_PDE_IO_WRITE_PERMISSION_MASK,
79 IOMMU_PDE_IO_WRITE_PERMISSION_SHIFT, &entry);
80 set_field_in_reg_u32(ir, entry,
81 IOMMU_PDE_IO_READ_PERMISSION_MASK,
82 IOMMU_PDE_IO_READ_PERMISSION_SHIFT, &entry);
83
84 /* FC bit should be enabled in PTE, this helps to solve potential
85 * issues with ATS devices
86 */
87 if ( next_level == IOMMU_PAGING_MODE_LEVEL_0 )
88 set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
89 IOMMU_PTE_FC_MASK, IOMMU_PTE_FC_SHIFT, &entry);
90 pde[1] = entry;
91
92 /* mark next level as 'present' */
93 set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0,
94 IOMMU_PDE_ADDR_LOW_MASK,
95 IOMMU_PDE_ADDR_LOW_SHIFT, &entry);
96 set_field_in_reg_u32(next_level, entry,
97 IOMMU_PDE_NEXT_LEVEL_MASK,
98 IOMMU_PDE_NEXT_LEVEL_SHIFT, &entry);
99 set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
100 IOMMU_PDE_PRESENT_MASK,
101 IOMMU_PDE_PRESENT_SHIFT, &entry);
102 pde[0] = entry;
103
104 return need_flush;
105 }
106
set_iommu_pte_present(unsigned long pt_mfn,unsigned long gfn,unsigned long next_mfn,int pde_level,bool_t iw,bool_t ir)107 static bool_t set_iommu_pte_present(unsigned long pt_mfn, unsigned long gfn,
108 unsigned long next_mfn, int pde_level,
109 bool_t iw, bool_t ir)
110 {
111 u64 *table;
112 u32 *pde;
113 bool_t need_flush = 0;
114
115 table = map_domain_page(_mfn(pt_mfn));
116
117 pde = (u32*)(table + pfn_to_pde_idx(gfn, pde_level));
118
119 need_flush = set_iommu_pde_present(pde, next_mfn,
120 IOMMU_PAGING_MODE_LEVEL_0, iw, ir);
121 unmap_domain_page(table);
122 return need_flush;
123 }
124
amd_iommu_set_root_page_table(u32 * dte,u64 root_ptr,u16 domain_id,u8 paging_mode,u8 valid)125 void amd_iommu_set_root_page_table(
126 u32 *dte, u64 root_ptr, u16 domain_id, u8 paging_mode, u8 valid)
127 {
128 u64 addr_hi, addr_lo;
129 u32 entry;
130 set_field_in_reg_u32(domain_id, 0,
131 IOMMU_DEV_TABLE_DOMAIN_ID_MASK,
132 IOMMU_DEV_TABLE_DOMAIN_ID_SHIFT, &entry);
133 dte[2] = entry;
134
135 addr_lo = root_ptr & DMA_32BIT_MASK;
136 addr_hi = root_ptr >> 32;
137
138 set_field_in_reg_u32((u32)addr_hi, 0,
139 IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_MASK,
140 IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_SHIFT, &entry);
141 set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
142 IOMMU_DEV_TABLE_IO_WRITE_PERMISSION_MASK,
143 IOMMU_DEV_TABLE_IO_WRITE_PERMISSION_SHIFT, &entry);
144 set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
145 IOMMU_DEV_TABLE_IO_READ_PERMISSION_MASK,
146 IOMMU_DEV_TABLE_IO_READ_PERMISSION_SHIFT, &entry);
147 dte[1] = entry;
148
149 set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0,
150 IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_MASK,
151 IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_SHIFT, &entry);
152 set_field_in_reg_u32(paging_mode, entry,
153 IOMMU_DEV_TABLE_PAGING_MODE_MASK,
154 IOMMU_DEV_TABLE_PAGING_MODE_SHIFT, &entry);
155 set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
156 IOMMU_DEV_TABLE_TRANSLATION_VALID_MASK,
157 IOMMU_DEV_TABLE_TRANSLATION_VALID_SHIFT, &entry);
158 set_field_in_reg_u32(valid ? IOMMU_CONTROL_ENABLED :
159 IOMMU_CONTROL_DISABLED, entry,
160 IOMMU_DEV_TABLE_VALID_MASK,
161 IOMMU_DEV_TABLE_VALID_SHIFT, &entry);
162 dte[0] = entry;
163 }
164
iommu_dte_set_iotlb(u32 * dte,u8 i)165 void iommu_dte_set_iotlb(u32 *dte, u8 i)
166 {
167 u32 entry;
168
169 entry = dte[3];
170 set_field_in_reg_u32(!!i, entry,
171 IOMMU_DEV_TABLE_IOTLB_SUPPORT_MASK,
172 IOMMU_DEV_TABLE_IOTLB_SUPPORT_SHIFT, &entry);
173 dte[3] = entry;
174 }
175
amd_iommu_set_intremap_table(u32 * dte,u64 intremap_ptr,u8 int_valid)176 void __init amd_iommu_set_intremap_table(
177 u32 *dte, u64 intremap_ptr, u8 int_valid)
178 {
179 u64 addr_hi, addr_lo;
180 u32 entry;
181
182 addr_lo = intremap_ptr & DMA_32BIT_MASK;
183 addr_hi = intremap_ptr >> 32;
184
185 entry = dte[5];
186 set_field_in_reg_u32((u32)addr_hi, entry,
187 IOMMU_DEV_TABLE_INT_TABLE_PTR_HIGH_MASK,
188 IOMMU_DEV_TABLE_INT_TABLE_PTR_HIGH_SHIFT, &entry);
189 /* Fixed and arbitrated interrupts remapepd */
190 set_field_in_reg_u32(2, entry,
191 IOMMU_DEV_TABLE_INT_CONTROL_MASK,
192 IOMMU_DEV_TABLE_INT_CONTROL_SHIFT, &entry);
193 dte[5] = entry;
194
195 set_field_in_reg_u32((u32)addr_lo >> 6, 0,
196 IOMMU_DEV_TABLE_INT_TABLE_PTR_LOW_MASK,
197 IOMMU_DEV_TABLE_INT_TABLE_PTR_LOW_SHIFT, &entry);
198 /* 2048 entries */
199 set_field_in_reg_u32(0xB, entry,
200 IOMMU_DEV_TABLE_INT_TABLE_LENGTH_MASK,
201 IOMMU_DEV_TABLE_INT_TABLE_LENGTH_SHIFT, &entry);
202
203 /* unmapped interrupt results io page faults*/
204 set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
205 IOMMU_DEV_TABLE_INT_TABLE_IGN_UNMAPPED_MASK,
206 IOMMU_DEV_TABLE_INT_TABLE_IGN_UNMAPPED_SHIFT, &entry);
207 set_field_in_reg_u32(int_valid ? IOMMU_CONTROL_ENABLED :
208 IOMMU_CONTROL_DISABLED, entry,
209 IOMMU_DEV_TABLE_INT_VALID_MASK,
210 IOMMU_DEV_TABLE_INT_VALID_SHIFT, &entry);
211 dte[4] = entry;
212 }
213
iommu_dte_add_device_entry(u32 * dte,struct ivrs_mappings * ivrs_dev)214 void __init iommu_dte_add_device_entry(u32 *dte, struct ivrs_mappings *ivrs_dev)
215 {
216 u32 entry;
217 u8 sys_mgt, dev_ex, flags;
218 u8 mask = ~(0x7 << 3);
219
220 dte[7] = dte[6] = dte[4] = dte[2] = dte[1] = dte[0] = 0;
221
222 flags = ivrs_dev->device_flags;
223 sys_mgt = get_field_from_byte(flags, ACPI_IVHD_SYSTEM_MGMT);
224 dev_ex = ivrs_dev->dte_allow_exclusion;
225
226 flags &= mask;
227 set_field_in_reg_u32(flags, 0,
228 IOMMU_DEV_TABLE_IVHD_FLAGS_MASK,
229 IOMMU_DEV_TABLE_IVHD_FLAGS_SHIFT, &entry);
230 dte[5] = entry;
231
232 set_field_in_reg_u32(sys_mgt, 0,
233 IOMMU_DEV_TABLE_SYS_MGT_MSG_ENABLE_MASK,
234 IOMMU_DEV_TABLE_SYS_MGT_MSG_ENABLE_SHIFT, &entry);
235 set_field_in_reg_u32(dev_ex, entry,
236 IOMMU_DEV_TABLE_ALLOW_EXCLUSION_MASK,
237 IOMMU_DEV_TABLE_ALLOW_EXCLUSION_SHIFT, &entry);
238 dte[3] = entry;
239 }
240
iommu_dte_set_guest_cr3(u32 * dte,u16 dom_id,u64 gcr3,int gv,unsigned int glx)241 void iommu_dte_set_guest_cr3(u32 *dte, u16 dom_id, u64 gcr3,
242 int gv, unsigned int glx)
243 {
244 u32 entry, gcr3_1, gcr3_2, gcr3_3;
245
246 gcr3_3 = gcr3 >> 31;
247 gcr3_2 = (gcr3 >> 15) & 0xFFFF;
248 gcr3_1 = (gcr3 >> PAGE_SHIFT) & 0x7;
249
250 /* I bit must be set when gcr3 is enabled */
251 entry = dte[3];
252 set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
253 IOMMU_DEV_TABLE_IOTLB_SUPPORT_MASK,
254 IOMMU_DEV_TABLE_IOTLB_SUPPORT_SHIFT, &entry);
255 /* update gcr3 */
256 set_field_in_reg_u32(gcr3_3, entry,
257 IOMMU_DEV_TABLE_GCR3_3_MASK,
258 IOMMU_DEV_TABLE_GCR3_3_SHIFT, &entry);
259 dte[3] = entry;
260
261 set_field_in_reg_u32(dom_id, entry,
262 IOMMU_DEV_TABLE_DOMAIN_ID_MASK,
263 IOMMU_DEV_TABLE_DOMAIN_ID_SHIFT, &entry);
264 /* update gcr3 */
265 entry = dte[2];
266 set_field_in_reg_u32(gcr3_2, entry,
267 IOMMU_DEV_TABLE_GCR3_2_MASK,
268 IOMMU_DEV_TABLE_GCR3_2_SHIFT, &entry);
269 dte[2] = entry;
270
271 entry = dte[1];
272 /* Enable GV bit */
273 set_field_in_reg_u32(!!gv, entry,
274 IOMMU_DEV_TABLE_GV_MASK,
275 IOMMU_DEV_TABLE_GV_SHIFT, &entry);
276
277 /* 1 level guest cr3 table */
278 set_field_in_reg_u32(glx, entry,
279 IOMMU_DEV_TABLE_GLX_MASK,
280 IOMMU_DEV_TABLE_GLX_SHIFT, &entry);
281 /* update gcr3 */
282 set_field_in_reg_u32(gcr3_1, entry,
283 IOMMU_DEV_TABLE_GCR3_1_MASK,
284 IOMMU_DEV_TABLE_GCR3_1_SHIFT, &entry);
285 dte[1] = entry;
286 }
287
amd_iommu_get_next_table_from_pte(u32 * entry)288 u64 amd_iommu_get_next_table_from_pte(u32 *entry)
289 {
290 u64 addr_lo, addr_hi, ptr;
291
292 addr_lo = get_field_from_reg_u32(
293 entry[0],
294 IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_MASK,
295 IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_SHIFT);
296
297 addr_hi = get_field_from_reg_u32(
298 entry[1],
299 IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_MASK,
300 IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_SHIFT);
301
302 ptr = (addr_hi << 32) | (addr_lo << PAGE_SHIFT);
303 return ptr;
304 }
305
306 /* For each pde, We use ignored bits (bit 1 - bit 8 and bit 63)
307 * to save pde count, pde count = 511 is a candidate of page coalescing.
308 */
get_pde_count(u64 pde)309 static unsigned int get_pde_count(u64 pde)
310 {
311 unsigned int count;
312 u64 upper_mask = 1ULL << 63 ;
313 u64 lower_mask = 0xFF << 1;
314
315 count = ((pde & upper_mask) >> 55) | ((pde & lower_mask) >> 1);
316 return count;
317 }
318
319 /* Convert pde count into iommu pte ignored bits */
set_pde_count(u64 * pde,unsigned int count)320 static void set_pde_count(u64 *pde, unsigned int count)
321 {
322 u64 upper_mask = 1ULL << 8 ;
323 u64 lower_mask = 0xFF;
324 u64 pte_mask = (~(1ULL << 63)) & (~(0xFF << 1));
325
326 *pde &= pte_mask;
327 *pde |= ((count & upper_mask ) << 55) | ((count & lower_mask ) << 1);
328 }
329
330 /* Return 1, if pages are suitable for merging at merge_level.
331 * otherwise increase pde count if mfn is contigous with mfn - 1
332 */
iommu_update_pde_count(struct domain * d,unsigned long pt_mfn,unsigned long gfn,unsigned long mfn,unsigned int merge_level)333 static int iommu_update_pde_count(struct domain *d, unsigned long pt_mfn,
334 unsigned long gfn, unsigned long mfn,
335 unsigned int merge_level)
336 {
337 unsigned int pde_count, next_level;
338 unsigned long first_mfn;
339 u64 *table, *pde, *ntable;
340 u64 ntable_maddr, mask;
341 struct domain_iommu *hd = dom_iommu(d);
342 bool_t ok = 0;
343
344 ASSERT( spin_is_locked(&hd->arch.mapping_lock) && pt_mfn );
345
346 next_level = merge_level - 1;
347
348 /* get pde at merge level */
349 table = map_domain_page(_mfn(pt_mfn));
350 pde = table + pfn_to_pde_idx(gfn, merge_level);
351
352 /* get page table of next level */
353 ntable_maddr = amd_iommu_get_next_table_from_pte((u32*)pde);
354 ntable = map_domain_page(_mfn(paddr_to_pfn(ntable_maddr)));
355
356 /* get the first mfn of next level */
357 first_mfn = amd_iommu_get_next_table_from_pte((u32*)ntable) >> PAGE_SHIFT;
358
359 if ( first_mfn == 0 )
360 goto out;
361
362 mask = (1ULL<< (PTE_PER_TABLE_SHIFT * next_level)) - 1;
363
364 if ( ((first_mfn & mask) == 0) &&
365 (((gfn & mask) | first_mfn) == mfn) )
366 {
367 pde_count = get_pde_count(*pde);
368
369 if ( pde_count == (PTE_PER_TABLE_SIZE - 1) )
370 ok = 1;
371 else if ( pde_count < (PTE_PER_TABLE_SIZE - 1))
372 {
373 pde_count++;
374 set_pde_count(pde, pde_count);
375 }
376 }
377
378 else
379 /* non-contiguous mapping */
380 set_pde_count(pde, 0);
381
382 out:
383 unmap_domain_page(ntable);
384 unmap_domain_page(table);
385
386 return ok;
387 }
388
iommu_merge_pages(struct domain * d,unsigned long pt_mfn,unsigned long gfn,unsigned int flags,unsigned int merge_level)389 static int iommu_merge_pages(struct domain *d, unsigned long pt_mfn,
390 unsigned long gfn, unsigned int flags,
391 unsigned int merge_level)
392 {
393 u64 *table, *pde, *ntable;
394 u64 ntable_mfn;
395 unsigned long first_mfn;
396 struct domain_iommu *hd = dom_iommu(d);
397
398 ASSERT( spin_is_locked(&hd->arch.mapping_lock) && pt_mfn );
399
400 table = map_domain_page(_mfn(pt_mfn));
401 pde = table + pfn_to_pde_idx(gfn, merge_level);
402
403 /* get first mfn */
404 ntable_mfn = amd_iommu_get_next_table_from_pte((u32*)pde) >> PAGE_SHIFT;
405
406 if ( ntable_mfn == 0 )
407 {
408 unmap_domain_page(table);
409 return 1;
410 }
411
412 ntable = map_domain_page(_mfn(ntable_mfn));
413 first_mfn = amd_iommu_get_next_table_from_pte((u32*)ntable) >> PAGE_SHIFT;
414
415 if ( first_mfn == 0 )
416 {
417 unmap_domain_page(ntable);
418 unmap_domain_page(table);
419 return 1;
420 }
421
422 /* setup super page mapping, next level = 0 */
423 set_iommu_pde_present((u32*)pde, first_mfn,
424 IOMMU_PAGING_MODE_LEVEL_0,
425 !!(flags & IOMMUF_writable),
426 !!(flags & IOMMUF_readable));
427
428 amd_iommu_flush_all_pages(d);
429
430 unmap_domain_page(ntable);
431 unmap_domain_page(table);
432 return 0;
433 }
434
435 /* Walk io page tables and build level page tables if necessary
436 * {Re, un}mapping super page frames causes re-allocation of io
437 * page tables.
438 */
iommu_pde_from_gfn(struct domain * d,unsigned long pfn,unsigned long pt_mfn[])439 static int iommu_pde_from_gfn(struct domain *d, unsigned long pfn,
440 unsigned long pt_mfn[])
441 {
442 u64 *pde, *next_table_vaddr;
443 unsigned long next_table_mfn;
444 unsigned int level;
445 struct page_info *table;
446 const struct domain_iommu *hd = dom_iommu(d);
447
448 table = hd->arch.root_table;
449 level = hd->arch.paging_mode;
450
451 BUG_ON( table == NULL || level < IOMMU_PAGING_MODE_LEVEL_1 ||
452 level > IOMMU_PAGING_MODE_LEVEL_6 );
453
454 next_table_mfn = page_to_mfn(table);
455
456 if ( level == IOMMU_PAGING_MODE_LEVEL_1 )
457 {
458 pt_mfn[level] = next_table_mfn;
459 return 0;
460 }
461
462 while ( level > IOMMU_PAGING_MODE_LEVEL_1 )
463 {
464 unsigned int next_level = level - 1;
465 pt_mfn[level] = next_table_mfn;
466
467 next_table_vaddr = map_domain_page(_mfn(next_table_mfn));
468 pde = next_table_vaddr + pfn_to_pde_idx(pfn, level);
469
470 /* Here might be a super page frame */
471 next_table_mfn = amd_iommu_get_next_table_from_pte((uint32_t*)pde)
472 >> PAGE_SHIFT;
473
474 /* Split super page frame into smaller pieces.*/
475 if ( iommu_is_pte_present((u32*)pde) &&
476 (iommu_next_level((u32*)pde) == 0) &&
477 next_table_mfn != 0 )
478 {
479 int i;
480 unsigned long mfn, gfn;
481 unsigned int page_sz;
482
483 page_sz = 1 << (PTE_PER_TABLE_SHIFT * (next_level - 1));
484 gfn = pfn & ~((1 << (PTE_PER_TABLE_SHIFT * next_level)) - 1);
485 mfn = next_table_mfn;
486
487 /* allocate lower level page table */
488 table = alloc_amd_iommu_pgtable();
489 if ( table == NULL )
490 {
491 AMD_IOMMU_DEBUG("Cannot allocate I/O page table\n");
492 unmap_domain_page(next_table_vaddr);
493 return 1;
494 }
495
496 next_table_mfn = page_to_mfn(table);
497 set_iommu_pde_present((u32*)pde, next_table_mfn, next_level,
498 !!IOMMUF_writable, !!IOMMUF_readable);
499
500 for ( i = 0; i < PTE_PER_TABLE_SIZE; i++ )
501 {
502 set_iommu_pte_present(next_table_mfn, gfn, mfn, next_level,
503 !!IOMMUF_writable, !!IOMMUF_readable);
504 mfn += page_sz;
505 gfn += page_sz;
506 }
507
508 amd_iommu_flush_all_pages(d);
509 }
510
511 /* Install lower level page table for non-present entries */
512 else if ( !iommu_is_pte_present((u32*)pde) )
513 {
514 if ( next_table_mfn == 0 )
515 {
516 table = alloc_amd_iommu_pgtable();
517 if ( table == NULL )
518 {
519 AMD_IOMMU_DEBUG("Cannot allocate I/O page table\n");
520 unmap_domain_page(next_table_vaddr);
521 return 1;
522 }
523 next_table_mfn = page_to_mfn(table);
524 set_iommu_pde_present((u32*)pde, next_table_mfn, next_level,
525 !!IOMMUF_writable, !!IOMMUF_readable);
526 }
527 else /* should never reach here */
528 {
529 unmap_domain_page(next_table_vaddr);
530 return 1;
531 }
532 }
533
534 unmap_domain_page(next_table_vaddr);
535 level--;
536 }
537
538 /* mfn of level 1 page table */
539 pt_mfn[level] = next_table_mfn;
540 return 0;
541 }
542
update_paging_mode(struct domain * d,unsigned long gfn)543 static int update_paging_mode(struct domain *d, unsigned long gfn)
544 {
545 u16 bdf;
546 void *device_entry;
547 unsigned int req_id, level, offset;
548 unsigned long flags;
549 struct pci_dev *pdev;
550 struct amd_iommu *iommu = NULL;
551 struct page_info *new_root = NULL;
552 struct page_info *old_root = NULL;
553 void *new_root_vaddr;
554 unsigned long old_root_mfn;
555 struct domain_iommu *hd = dom_iommu(d);
556
557 if ( gfn == gfn_x(INVALID_GFN) )
558 return -EADDRNOTAVAIL;
559 ASSERT(!(gfn >> DEFAULT_DOMAIN_ADDRESS_WIDTH));
560
561 level = hd->arch.paging_mode;
562 old_root = hd->arch.root_table;
563 offset = gfn >> (PTE_PER_TABLE_SHIFT * (level - 1));
564
565 ASSERT(spin_is_locked(&hd->arch.mapping_lock) && is_hvm_domain(d));
566
567 while ( offset >= PTE_PER_TABLE_SIZE )
568 {
569 /* Allocate and install a new root table.
570 * Only upper I/O page table grows, no need to fix next level bits */
571 new_root = alloc_amd_iommu_pgtable();
572 if ( new_root == NULL )
573 {
574 AMD_IOMMU_DEBUG("%s Cannot allocate I/O page table\n",
575 __func__);
576 return -ENOMEM;
577 }
578
579 new_root_vaddr = __map_domain_page(new_root);
580 old_root_mfn = page_to_mfn(old_root);
581 set_iommu_pde_present(new_root_vaddr, old_root_mfn, level,
582 !!IOMMUF_writable, !!IOMMUF_readable);
583 level++;
584 old_root = new_root;
585 offset >>= PTE_PER_TABLE_SHIFT;
586 unmap_domain_page(new_root_vaddr);
587 }
588
589 if ( new_root != NULL )
590 {
591 hd->arch.paging_mode = level;
592 hd->arch.root_table = new_root;
593
594 if ( !pcidevs_locked() )
595 AMD_IOMMU_DEBUG("%s Try to access pdev_list "
596 "without aquiring pcidevs_lock.\n", __func__);
597
598 /* Update device table entries using new root table and paging mode */
599 for_each_pdev( d, pdev )
600 {
601 bdf = PCI_BDF2(pdev->bus, pdev->devfn);
602 iommu = find_iommu_for_device(pdev->seg, bdf);
603 if ( !iommu )
604 {
605 AMD_IOMMU_DEBUG("%s Fail to find iommu.\n", __func__);
606 return -ENODEV;
607 }
608
609 spin_lock_irqsave(&iommu->lock, flags);
610 do {
611 req_id = get_dma_requestor_id(pdev->seg, bdf);
612 device_entry = iommu->dev_table.buffer +
613 (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
614
615 /* valid = 0 only works for dom0 passthrough mode */
616 amd_iommu_set_root_page_table((u32 *)device_entry,
617 page_to_maddr(hd->arch.root_table),
618 d->domain_id,
619 hd->arch.paging_mode, 1);
620
621 amd_iommu_flush_device(iommu, req_id);
622 bdf += pdev->phantom_stride;
623 } while ( PCI_DEVFN2(bdf) != pdev->devfn &&
624 PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
625 spin_unlock_irqrestore(&iommu->lock, flags);
626 }
627
628 /* For safety, invalidate all entries */
629 amd_iommu_flush_all_pages(d);
630 }
631 return 0;
632 }
633
amd_iommu_map_page(struct domain * d,unsigned long gfn,unsigned long mfn,unsigned int flags)634 int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
635 unsigned int flags)
636 {
637 bool_t need_flush = 0;
638 struct domain_iommu *hd = dom_iommu(d);
639 int rc;
640 unsigned long pt_mfn[7];
641 unsigned int merge_level;
642
643 if ( iommu_use_hap_pt(d) )
644 return 0;
645
646 memset(pt_mfn, 0, sizeof(pt_mfn));
647
648 spin_lock(&hd->arch.mapping_lock);
649
650 rc = amd_iommu_alloc_root(hd);
651 if ( rc )
652 {
653 spin_unlock(&hd->arch.mapping_lock);
654 AMD_IOMMU_DEBUG("Root table alloc failed, gfn = %lx\n", gfn);
655 domain_crash(d);
656 return rc;
657 }
658
659 /* Since HVM domain is initialized with 2 level IO page table,
660 * we might need a deeper page table for lager gfn now */
661 if ( is_hvm_domain(d) )
662 {
663 if ( update_paging_mode(d, gfn) )
664 {
665 spin_unlock(&hd->arch.mapping_lock);
666 AMD_IOMMU_DEBUG("Update page mode failed gfn = %lx\n", gfn);
667 domain_crash(d);
668 return -EFAULT;
669 }
670 }
671
672 if ( iommu_pde_from_gfn(d, gfn, pt_mfn) || (pt_mfn[1] == 0) )
673 {
674 spin_unlock(&hd->arch.mapping_lock);
675 AMD_IOMMU_DEBUG("Invalid IO pagetable entry gfn = %lx\n", gfn);
676 domain_crash(d);
677 return -EFAULT;
678 }
679
680 /* Install 4k mapping first */
681 need_flush = set_iommu_pte_present(pt_mfn[1], gfn, mfn,
682 IOMMU_PAGING_MODE_LEVEL_1,
683 !!(flags & IOMMUF_writable),
684 !!(flags & IOMMUF_readable));
685
686 /* Do not increase pde count if io mapping has not been changed */
687 if ( !need_flush )
688 goto out;
689
690 /* 4K mapping for PV guests never changes,
691 * no need to flush if we trust non-present bits */
692 if ( is_hvm_domain(d) )
693 amd_iommu_flush_pages(d, gfn, 0);
694
695 for ( merge_level = IOMMU_PAGING_MODE_LEVEL_2;
696 merge_level <= hd->arch.paging_mode; merge_level++ )
697 {
698 if ( pt_mfn[merge_level] == 0 )
699 break;
700 if ( !iommu_update_pde_count(d, pt_mfn[merge_level],
701 gfn, mfn, merge_level) )
702 break;
703
704 if ( iommu_merge_pages(d, pt_mfn[merge_level], gfn,
705 flags, merge_level) )
706 {
707 spin_unlock(&hd->arch.mapping_lock);
708 AMD_IOMMU_DEBUG("Merge iommu page failed at level %d, "
709 "gfn = %lx mfn = %lx\n", merge_level, gfn, mfn);
710 domain_crash(d);
711 return -EFAULT;
712 }
713
714 /* Deallocate lower level page table */
715 free_amd_iommu_pgtable(mfn_to_page(pt_mfn[merge_level - 1]));
716 }
717
718 out:
719 spin_unlock(&hd->arch.mapping_lock);
720 return 0;
721 }
722
amd_iommu_unmap_page(struct domain * d,unsigned long gfn)723 int amd_iommu_unmap_page(struct domain *d, unsigned long gfn)
724 {
725 unsigned long pt_mfn[7];
726 struct domain_iommu *hd = dom_iommu(d);
727
728 if ( iommu_use_hap_pt(d) )
729 return 0;
730
731 memset(pt_mfn, 0, sizeof(pt_mfn));
732
733 spin_lock(&hd->arch.mapping_lock);
734
735 if ( !hd->arch.root_table )
736 {
737 spin_unlock(&hd->arch.mapping_lock);
738 return 0;
739 }
740
741 /* Since HVM domain is initialized with 2 level IO page table,
742 * we might need a deeper page table for lager gfn now */
743 if ( is_hvm_domain(d) )
744 {
745 int rc = update_paging_mode(d, gfn);
746
747 if ( rc )
748 {
749 spin_unlock(&hd->arch.mapping_lock);
750 AMD_IOMMU_DEBUG("Update page mode failed gfn = %lx\n", gfn);
751 if ( rc != -EADDRNOTAVAIL )
752 domain_crash(d);
753 return rc;
754 }
755 }
756
757 if ( iommu_pde_from_gfn(d, gfn, pt_mfn) || (pt_mfn[1] == 0) )
758 {
759 spin_unlock(&hd->arch.mapping_lock);
760 AMD_IOMMU_DEBUG("Invalid IO pagetable entry gfn = %lx\n", gfn);
761 domain_crash(d);
762 return -EFAULT;
763 }
764
765 /* mark PTE as 'page not present' */
766 clear_iommu_pte_present(pt_mfn[1], gfn);
767 spin_unlock(&hd->arch.mapping_lock);
768
769 amd_iommu_flush_pages(d, gfn, 0);
770
771 return 0;
772 }
773
amd_iommu_reserve_domain_unity_map(struct domain * domain,u64 phys_addr,unsigned long size,int iw,int ir)774 int amd_iommu_reserve_domain_unity_map(struct domain *domain,
775 u64 phys_addr,
776 unsigned long size, int iw, int ir)
777 {
778 unsigned long npages, i;
779 unsigned long gfn;
780 unsigned int flags = !!ir;
781 int rt = 0;
782
783 if ( iw )
784 flags |= IOMMUF_writable;
785
786 npages = region_to_pages(phys_addr, size);
787 gfn = phys_addr >> PAGE_SHIFT;
788 for ( i = 0; i < npages; i++ )
789 {
790 rt = amd_iommu_map_page(domain, gfn +i, gfn +i, flags);
791 if ( rt != 0 )
792 return rt;
793 }
794 return 0;
795 }
796
797 /* Share p2m table with iommu. */
amd_iommu_share_p2m(struct domain * d)798 void amd_iommu_share_p2m(struct domain *d)
799 {
800 struct domain_iommu *hd = dom_iommu(d);
801 struct page_info *p2m_table;
802 mfn_t pgd_mfn;
803
804 pgd_mfn = pagetable_get_mfn(p2m_get_pagetable(p2m_get_hostp2m(d)));
805 p2m_table = mfn_to_page(mfn_x(pgd_mfn));
806
807 if ( hd->arch.root_table != p2m_table )
808 {
809 free_amd_iommu_pgtable(hd->arch.root_table);
810 hd->arch.root_table = p2m_table;
811
812 /* When sharing p2m with iommu, paging mode = 4 */
813 hd->arch.paging_mode = IOMMU_PAGING_MODE_LEVEL_4;
814 AMD_IOMMU_DEBUG("Share p2m table with iommu: p2m table = %#lx\n",
815 mfn_x(pgd_mfn));
816 }
817 }
818