1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * MMU-based software IOTLB.
4 *
5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
6 *
7 * Author: Xie Yongji <xieyongji@bytedance.com>
8 *
9 */
10
11 #include <linux/slab.h>
12 #include <linux/file.h>
13 #include <linux/anon_inodes.h>
14 #include <linux/highmem.h>
15 #include <linux/vmalloc.h>
16 #include <linux/vdpa.h>
17
18 #include "iova_domain.h"
19
vduse_iotlb_add_range(struct vduse_iova_domain * domain,u64 start,u64 last,u64 addr,unsigned int perm,struct file * file,u64 offset)20 static int vduse_iotlb_add_range(struct vduse_iova_domain *domain,
21 u64 start, u64 last,
22 u64 addr, unsigned int perm,
23 struct file *file, u64 offset)
24 {
25 struct vdpa_map_file *map_file;
26 int ret;
27
28 map_file = kmalloc(sizeof(*map_file), GFP_ATOMIC);
29 if (!map_file)
30 return -ENOMEM;
31
32 map_file->file = get_file(file);
33 map_file->offset = offset;
34
35 ret = vhost_iotlb_add_range_ctx(domain->iotlb, start, last,
36 addr, perm, map_file);
37 if (ret) {
38 fput(map_file->file);
39 kfree(map_file);
40 return ret;
41 }
42 return 0;
43 }
44
vduse_iotlb_del_range(struct vduse_iova_domain * domain,u64 start,u64 last)45 static void vduse_iotlb_del_range(struct vduse_iova_domain *domain,
46 u64 start, u64 last)
47 {
48 struct vdpa_map_file *map_file;
49 struct vhost_iotlb_map *map;
50
51 while ((map = vhost_iotlb_itree_first(domain->iotlb, start, last))) {
52 map_file = (struct vdpa_map_file *)map->opaque;
53 fput(map_file->file);
54 kfree(map_file);
55 vhost_iotlb_map_free(domain->iotlb, map);
56 }
57 }
58
vduse_domain_set_map(struct vduse_iova_domain * domain,struct vhost_iotlb * iotlb)59 int vduse_domain_set_map(struct vduse_iova_domain *domain,
60 struct vhost_iotlb *iotlb)
61 {
62 struct vdpa_map_file *map_file;
63 struct vhost_iotlb_map *map;
64 u64 start = 0ULL, last = ULLONG_MAX;
65 int ret;
66
67 spin_lock(&domain->iotlb_lock);
68 vduse_iotlb_del_range(domain, start, last);
69
70 for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
71 map = vhost_iotlb_itree_next(map, start, last)) {
72 map_file = (struct vdpa_map_file *)map->opaque;
73 ret = vduse_iotlb_add_range(domain, map->start, map->last,
74 map->addr, map->perm,
75 map_file->file,
76 map_file->offset);
77 if (ret)
78 goto err;
79 }
80 spin_unlock(&domain->iotlb_lock);
81
82 return 0;
83 err:
84 vduse_iotlb_del_range(domain, start, last);
85 spin_unlock(&domain->iotlb_lock);
86 return ret;
87 }
88
vduse_domain_clear_map(struct vduse_iova_domain * domain,struct vhost_iotlb * iotlb)89 void vduse_domain_clear_map(struct vduse_iova_domain *domain,
90 struct vhost_iotlb *iotlb)
91 {
92 struct vhost_iotlb_map *map;
93 u64 start = 0ULL, last = ULLONG_MAX;
94
95 spin_lock(&domain->iotlb_lock);
96 for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
97 map = vhost_iotlb_itree_next(map, start, last)) {
98 vduse_iotlb_del_range(domain, map->start, map->last);
99 }
100 spin_unlock(&domain->iotlb_lock);
101 }
102
vduse_domain_map_bounce_page(struct vduse_iova_domain * domain,u64 iova,u64 size,u64 paddr)103 static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain,
104 u64 iova, u64 size, u64 paddr)
105 {
106 struct vduse_bounce_map *map;
107 u64 last = iova + size - 1;
108
109 while (iova <= last) {
110 map = &domain->bounce_maps[iova >> PAGE_SHIFT];
111 if (!map->bounce_page) {
112 map->bounce_page = alloc_page(GFP_ATOMIC);
113 if (!map->bounce_page)
114 return -ENOMEM;
115 }
116 map->orig_phys = paddr;
117 paddr += PAGE_SIZE;
118 iova += PAGE_SIZE;
119 }
120 return 0;
121 }
122
vduse_domain_unmap_bounce_page(struct vduse_iova_domain * domain,u64 iova,u64 size)123 static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain,
124 u64 iova, u64 size)
125 {
126 struct vduse_bounce_map *map;
127 u64 last = iova + size - 1;
128
129 while (iova <= last) {
130 map = &domain->bounce_maps[iova >> PAGE_SHIFT];
131 map->orig_phys = INVALID_PHYS_ADDR;
132 iova += PAGE_SIZE;
133 }
134 }
135
do_bounce(phys_addr_t orig,void * addr,size_t size,enum dma_data_direction dir)136 static void do_bounce(phys_addr_t orig, void *addr, size_t size,
137 enum dma_data_direction dir)
138 {
139 unsigned long pfn = PFN_DOWN(orig);
140 unsigned int offset = offset_in_page(orig);
141 struct page *page;
142 unsigned int sz = 0;
143
144 while (size) {
145 sz = min_t(size_t, PAGE_SIZE - offset, size);
146
147 page = pfn_to_page(pfn);
148 if (dir == DMA_TO_DEVICE)
149 memcpy_from_page(addr, page, offset, sz);
150 else
151 memcpy_to_page(page, offset, addr, sz);
152
153 size -= sz;
154 pfn++;
155 addr += sz;
156 offset = 0;
157 }
158 }
159
vduse_domain_bounce(struct vduse_iova_domain * domain,dma_addr_t iova,size_t size,enum dma_data_direction dir)160 static void vduse_domain_bounce(struct vduse_iova_domain *domain,
161 dma_addr_t iova, size_t size,
162 enum dma_data_direction dir)
163 {
164 struct vduse_bounce_map *map;
165 unsigned int offset;
166 void *addr;
167 size_t sz;
168
169 if (iova >= domain->bounce_size)
170 return;
171
172 while (size) {
173 map = &domain->bounce_maps[iova >> PAGE_SHIFT];
174 offset = offset_in_page(iova);
175 sz = min_t(size_t, PAGE_SIZE - offset, size);
176
177 if (WARN_ON(!map->bounce_page ||
178 map->orig_phys == INVALID_PHYS_ADDR))
179 return;
180
181 addr = kmap_local_page(map->bounce_page);
182 do_bounce(map->orig_phys + offset, addr + offset, sz, dir);
183 kunmap_local(addr);
184 size -= sz;
185 iova += sz;
186 }
187 }
188
189 static struct page *
vduse_domain_get_coherent_page(struct vduse_iova_domain * domain,u64 iova)190 vduse_domain_get_coherent_page(struct vduse_iova_domain *domain, u64 iova)
191 {
192 u64 start = iova & PAGE_MASK;
193 u64 last = start + PAGE_SIZE - 1;
194 struct vhost_iotlb_map *map;
195 struct page *page = NULL;
196
197 spin_lock(&domain->iotlb_lock);
198 map = vhost_iotlb_itree_first(domain->iotlb, start, last);
199 if (!map)
200 goto out;
201
202 page = pfn_to_page((map->addr + iova - map->start) >> PAGE_SHIFT);
203 get_page(page);
204 out:
205 spin_unlock(&domain->iotlb_lock);
206
207 return page;
208 }
209
210 static struct page *
vduse_domain_get_bounce_page(struct vduse_iova_domain * domain,u64 iova)211 vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
212 {
213 struct vduse_bounce_map *map;
214 struct page *page = NULL;
215
216 read_lock(&domain->bounce_lock);
217 map = &domain->bounce_maps[iova >> PAGE_SHIFT];
218 if (domain->user_bounce_pages || !map->bounce_page)
219 goto out;
220
221 page = map->bounce_page;
222 get_page(page);
223 out:
224 read_unlock(&domain->bounce_lock);
225
226 return page;
227 }
228
229 static void
vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain * domain)230 vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain)
231 {
232 struct vduse_bounce_map *map;
233 unsigned long pfn, bounce_pfns;
234
235 bounce_pfns = domain->bounce_size >> PAGE_SHIFT;
236
237 for (pfn = 0; pfn < bounce_pfns; pfn++) {
238 map = &domain->bounce_maps[pfn];
239 if (WARN_ON(map->orig_phys != INVALID_PHYS_ADDR))
240 continue;
241
242 if (!map->bounce_page)
243 continue;
244
245 __free_page(map->bounce_page);
246 map->bounce_page = NULL;
247 }
248 }
249
vduse_domain_add_user_bounce_pages(struct vduse_iova_domain * domain,struct page ** pages,int count)250 int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain,
251 struct page **pages, int count)
252 {
253 struct vduse_bounce_map *map;
254 int i, ret;
255
256 /* Now we don't support partial mapping */
257 if (count != (domain->bounce_size >> PAGE_SHIFT))
258 return -EINVAL;
259
260 write_lock(&domain->bounce_lock);
261 ret = -EEXIST;
262 if (domain->user_bounce_pages)
263 goto out;
264
265 for (i = 0; i < count; i++) {
266 map = &domain->bounce_maps[i];
267 if (map->bounce_page) {
268 /* Copy kernel page to user page if it's in use */
269 if (map->orig_phys != INVALID_PHYS_ADDR)
270 memcpy_to_page(pages[i], 0,
271 page_address(map->bounce_page),
272 PAGE_SIZE);
273 __free_page(map->bounce_page);
274 }
275 map->bounce_page = pages[i];
276 get_page(pages[i]);
277 }
278 domain->user_bounce_pages = true;
279 ret = 0;
280 out:
281 write_unlock(&domain->bounce_lock);
282
283 return ret;
284 }
285
vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain * domain)286 void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain)
287 {
288 struct vduse_bounce_map *map;
289 unsigned long i, count;
290
291 write_lock(&domain->bounce_lock);
292 if (!domain->user_bounce_pages)
293 goto out;
294
295 count = domain->bounce_size >> PAGE_SHIFT;
296 for (i = 0; i < count; i++) {
297 struct page *page = NULL;
298
299 map = &domain->bounce_maps[i];
300 if (WARN_ON(!map->bounce_page))
301 continue;
302
303 /* Copy user page to kernel page if it's in use */
304 if (map->orig_phys != INVALID_PHYS_ADDR) {
305 page = alloc_page(GFP_ATOMIC | __GFP_NOFAIL);
306 memcpy_from_page(page_address(page),
307 map->bounce_page, 0, PAGE_SIZE);
308 }
309 put_page(map->bounce_page);
310 map->bounce_page = page;
311 }
312 domain->user_bounce_pages = false;
313 out:
314 write_unlock(&domain->bounce_lock);
315 }
316
vduse_domain_reset_bounce_map(struct vduse_iova_domain * domain)317 void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain)
318 {
319 if (!domain->bounce_map)
320 return;
321
322 spin_lock(&domain->iotlb_lock);
323 if (!domain->bounce_map)
324 goto unlock;
325
326 vduse_iotlb_del_range(domain, 0, domain->bounce_size - 1);
327 domain->bounce_map = 0;
328 unlock:
329 spin_unlock(&domain->iotlb_lock);
330 }
331
vduse_domain_init_bounce_map(struct vduse_iova_domain * domain)332 static int vduse_domain_init_bounce_map(struct vduse_iova_domain *domain)
333 {
334 int ret = 0;
335
336 if (domain->bounce_map)
337 return 0;
338
339 spin_lock(&domain->iotlb_lock);
340 if (domain->bounce_map)
341 goto unlock;
342
343 ret = vduse_iotlb_add_range(domain, 0, domain->bounce_size - 1,
344 0, VHOST_MAP_RW, domain->file, 0);
345 if (ret)
346 goto unlock;
347
348 domain->bounce_map = 1;
349 unlock:
350 spin_unlock(&domain->iotlb_lock);
351 return ret;
352 }
353
354 static dma_addr_t
vduse_domain_alloc_iova(struct iova_domain * iovad,unsigned long size,unsigned long limit)355 vduse_domain_alloc_iova(struct iova_domain *iovad,
356 unsigned long size, unsigned long limit)
357 {
358 unsigned long shift = iova_shift(iovad);
359 unsigned long iova_len = iova_align(iovad, size) >> shift;
360 unsigned long iova_pfn;
361
362 iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true);
363
364 return (dma_addr_t)iova_pfn << shift;
365 }
366
vduse_domain_free_iova(struct iova_domain * iovad,dma_addr_t iova,size_t size)367 static void vduse_domain_free_iova(struct iova_domain *iovad,
368 dma_addr_t iova, size_t size)
369 {
370 unsigned long shift = iova_shift(iovad);
371 unsigned long iova_len = iova_align(iovad, size) >> shift;
372
373 free_iova_fast(iovad, iova >> shift, iova_len);
374 }
375
vduse_domain_map_page(struct vduse_iova_domain * domain,struct page * page,unsigned long offset,size_t size,enum dma_data_direction dir,unsigned long attrs)376 dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
377 struct page *page, unsigned long offset,
378 size_t size, enum dma_data_direction dir,
379 unsigned long attrs)
380 {
381 struct iova_domain *iovad = &domain->stream_iovad;
382 unsigned long limit = domain->bounce_size - 1;
383 phys_addr_t pa = page_to_phys(page) + offset;
384 dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
385
386 if (!iova)
387 return DMA_MAPPING_ERROR;
388
389 if (vduse_domain_init_bounce_map(domain))
390 goto err;
391
392 read_lock(&domain->bounce_lock);
393 if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa))
394 goto err_unlock;
395
396 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
397 vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE);
398
399 read_unlock(&domain->bounce_lock);
400
401 return iova;
402 err_unlock:
403 read_unlock(&domain->bounce_lock);
404 err:
405 vduse_domain_free_iova(iovad, iova, size);
406 return DMA_MAPPING_ERROR;
407 }
408
vduse_domain_unmap_page(struct vduse_iova_domain * domain,dma_addr_t dma_addr,size_t size,enum dma_data_direction dir,unsigned long attrs)409 void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
410 dma_addr_t dma_addr, size_t size,
411 enum dma_data_direction dir, unsigned long attrs)
412 {
413 struct iova_domain *iovad = &domain->stream_iovad;
414
415 read_lock(&domain->bounce_lock);
416 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
417 vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
418
419 vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size);
420 read_unlock(&domain->bounce_lock);
421 vduse_domain_free_iova(iovad, dma_addr, size);
422 }
423
vduse_domain_alloc_coherent(struct vduse_iova_domain * domain,size_t size,dma_addr_t * dma_addr,gfp_t flag,unsigned long attrs)424 void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
425 size_t size, dma_addr_t *dma_addr,
426 gfp_t flag, unsigned long attrs)
427 {
428 struct iova_domain *iovad = &domain->consistent_iovad;
429 unsigned long limit = domain->iova_limit;
430 dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
431 void *orig = alloc_pages_exact(size, flag);
432
433 if (!iova || !orig)
434 goto err;
435
436 spin_lock(&domain->iotlb_lock);
437 if (vduse_iotlb_add_range(domain, (u64)iova, (u64)iova + size - 1,
438 virt_to_phys(orig), VHOST_MAP_RW,
439 domain->file, (u64)iova)) {
440 spin_unlock(&domain->iotlb_lock);
441 goto err;
442 }
443 spin_unlock(&domain->iotlb_lock);
444
445 *dma_addr = iova;
446
447 return orig;
448 err:
449 *dma_addr = DMA_MAPPING_ERROR;
450 if (orig)
451 free_pages_exact(orig, size);
452 if (iova)
453 vduse_domain_free_iova(iovad, iova, size);
454
455 return NULL;
456 }
457
vduse_domain_free_coherent(struct vduse_iova_domain * domain,size_t size,void * vaddr,dma_addr_t dma_addr,unsigned long attrs)458 void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
459 void *vaddr, dma_addr_t dma_addr,
460 unsigned long attrs)
461 {
462 struct iova_domain *iovad = &domain->consistent_iovad;
463 struct vhost_iotlb_map *map;
464 struct vdpa_map_file *map_file;
465 phys_addr_t pa;
466
467 spin_lock(&domain->iotlb_lock);
468 map = vhost_iotlb_itree_first(domain->iotlb, (u64)dma_addr,
469 (u64)dma_addr + size - 1);
470 if (WARN_ON(!map)) {
471 spin_unlock(&domain->iotlb_lock);
472 return;
473 }
474 map_file = (struct vdpa_map_file *)map->opaque;
475 fput(map_file->file);
476 kfree(map_file);
477 pa = map->addr;
478 vhost_iotlb_map_free(domain->iotlb, map);
479 spin_unlock(&domain->iotlb_lock);
480
481 vduse_domain_free_iova(iovad, dma_addr, size);
482 free_pages_exact(phys_to_virt(pa), size);
483 }
484
vduse_domain_mmap_fault(struct vm_fault * vmf)485 static vm_fault_t vduse_domain_mmap_fault(struct vm_fault *vmf)
486 {
487 struct vduse_iova_domain *domain = vmf->vma->vm_private_data;
488 unsigned long iova = vmf->pgoff << PAGE_SHIFT;
489 struct page *page;
490
491 if (!domain)
492 return VM_FAULT_SIGBUS;
493
494 if (iova < domain->bounce_size)
495 page = vduse_domain_get_bounce_page(domain, iova);
496 else
497 page = vduse_domain_get_coherent_page(domain, iova);
498
499 if (!page)
500 return VM_FAULT_SIGBUS;
501
502 vmf->page = page;
503
504 return 0;
505 }
506
507 static const struct vm_operations_struct vduse_domain_mmap_ops = {
508 .fault = vduse_domain_mmap_fault,
509 };
510
vduse_domain_mmap(struct file * file,struct vm_area_struct * vma)511 static int vduse_domain_mmap(struct file *file, struct vm_area_struct *vma)
512 {
513 struct vduse_iova_domain *domain = file->private_data;
514
515 vm_flags_set(vma, VM_DONTDUMP | VM_DONTEXPAND);
516 vma->vm_private_data = domain;
517 vma->vm_ops = &vduse_domain_mmap_ops;
518
519 return 0;
520 }
521
vduse_domain_release(struct inode * inode,struct file * file)522 static int vduse_domain_release(struct inode *inode, struct file *file)
523 {
524 struct vduse_iova_domain *domain = file->private_data;
525
526 spin_lock(&domain->iotlb_lock);
527 vduse_iotlb_del_range(domain, 0, ULLONG_MAX);
528 vduse_domain_remove_user_bounce_pages(domain);
529 vduse_domain_free_kernel_bounce_pages(domain);
530 spin_unlock(&domain->iotlb_lock);
531 put_iova_domain(&domain->stream_iovad);
532 put_iova_domain(&domain->consistent_iovad);
533 vhost_iotlb_free(domain->iotlb);
534 vfree(domain->bounce_maps);
535 kfree(domain);
536
537 return 0;
538 }
539
540 static const struct file_operations vduse_domain_fops = {
541 .owner = THIS_MODULE,
542 .mmap = vduse_domain_mmap,
543 .release = vduse_domain_release,
544 };
545
vduse_domain_destroy(struct vduse_iova_domain * domain)546 void vduse_domain_destroy(struct vduse_iova_domain *domain)
547 {
548 fput(domain->file);
549 }
550
551 struct vduse_iova_domain *
vduse_domain_create(unsigned long iova_limit,size_t bounce_size)552 vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
553 {
554 struct vduse_iova_domain *domain;
555 struct file *file;
556 struct vduse_bounce_map *map;
557 unsigned long pfn, bounce_pfns;
558 int ret;
559
560 bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
561 if (iova_limit <= bounce_size)
562 return NULL;
563
564 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
565 if (!domain)
566 return NULL;
567
568 domain->iotlb = vhost_iotlb_alloc(0, 0);
569 if (!domain->iotlb)
570 goto err_iotlb;
571
572 domain->iova_limit = iova_limit;
573 domain->bounce_size = PAGE_ALIGN(bounce_size);
574 domain->bounce_maps = vzalloc(bounce_pfns *
575 sizeof(struct vduse_bounce_map));
576 if (!domain->bounce_maps)
577 goto err_map;
578
579 for (pfn = 0; pfn < bounce_pfns; pfn++) {
580 map = &domain->bounce_maps[pfn];
581 map->orig_phys = INVALID_PHYS_ADDR;
582 }
583 file = anon_inode_getfile("[vduse-domain]", &vduse_domain_fops,
584 domain, O_RDWR);
585 if (IS_ERR(file))
586 goto err_file;
587
588 domain->file = file;
589 rwlock_init(&domain->bounce_lock);
590 spin_lock_init(&domain->iotlb_lock);
591 init_iova_domain(&domain->stream_iovad,
592 PAGE_SIZE, IOVA_START_PFN);
593 ret = iova_domain_init_rcaches(&domain->stream_iovad);
594 if (ret)
595 goto err_iovad_stream;
596 init_iova_domain(&domain->consistent_iovad,
597 PAGE_SIZE, bounce_pfns);
598 ret = iova_domain_init_rcaches(&domain->consistent_iovad);
599 if (ret)
600 goto err_iovad_consistent;
601
602 return domain;
603 err_iovad_consistent:
604 put_iova_domain(&domain->stream_iovad);
605 err_iovad_stream:
606 fput(file);
607 err_file:
608 vfree(domain->bounce_maps);
609 err_map:
610 vhost_iotlb_free(domain->iotlb);
611 err_iotlb:
612 kfree(domain);
613 return NULL;
614 }
615
vduse_domain_init(void)616 int vduse_domain_init(void)
617 {
618 return iova_cache_get();
619 }
620
vduse_domain_exit(void)621 void vduse_domain_exit(void)
622 {
623 iova_cache_put();
624 }
625