1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Iterator helpers.
3 *
4 * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8 #include <linux/export.h>
9 #include <linux/slab.h>
10 #include <linux/mm.h>
11 #include <linux/uio.h>
12 #include <linux/scatterlist.h>
13 #include <linux/netfs.h>
14 #include "internal.h"
15
16 /**
17 * netfs_extract_user_iter - Extract the pages from a user iterator into a bvec
18 * @orig: The original iterator
19 * @orig_len: The amount of iterator to copy
20 * @new: The iterator to be set up
21 * @extraction_flags: Flags to qualify the request
22 *
23 * Extract the page fragments from the given amount of the source iterator and
24 * build up a second iterator that refers to all of those bits. This allows
25 * the original iterator to disposed of.
26 *
27 * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA be
28 * allowed on the pages extracted.
29 *
30 * On success, the number of elements in the bvec is returned, the original
31 * iterator will have been advanced by the amount extracted.
32 *
33 * The iov_iter_extract_mode() function should be used to query how cleanup
34 * should be performed.
35 */
netfs_extract_user_iter(struct iov_iter * orig,size_t orig_len,struct iov_iter * new,iov_iter_extraction_t extraction_flags)36 ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
37 struct iov_iter *new,
38 iov_iter_extraction_t extraction_flags)
39 {
40 struct bio_vec *bv = NULL;
41 struct page **pages;
42 unsigned int cur_npages;
43 unsigned int max_pages;
44 unsigned int npages = 0;
45 unsigned int i;
46 ssize_t ret;
47 size_t count = orig_len, offset, len;
48 size_t bv_size, pg_size;
49
50 if (WARN_ON_ONCE(!iter_is_ubuf(orig) && !iter_is_iovec(orig)))
51 return -EIO;
52
53 max_pages = iov_iter_npages(orig, INT_MAX);
54 bv_size = array_size(max_pages, sizeof(*bv));
55 bv = kvmalloc(bv_size, GFP_KERNEL);
56 if (!bv)
57 return -ENOMEM;
58
59 /* Put the page list at the end of the bvec list storage. bvec
60 * elements are larger than page pointers, so as long as we work
61 * 0->last, we should be fine.
62 */
63 pg_size = array_size(max_pages, sizeof(*pages));
64 pages = (void *)bv + bv_size - pg_size;
65
66 while (count && npages < max_pages) {
67 ret = iov_iter_extract_pages(orig, &pages, count,
68 max_pages - npages, extraction_flags,
69 &offset);
70 if (ret < 0) {
71 pr_err("Couldn't get user pages (rc=%zd)\n", ret);
72 break;
73 }
74
75 if (ret > count) {
76 pr_err("get_pages rc=%zd more than %zu\n", ret, count);
77 break;
78 }
79
80 count -= ret;
81 ret += offset;
82 cur_npages = DIV_ROUND_UP(ret, PAGE_SIZE);
83
84 if (npages + cur_npages > max_pages) {
85 pr_err("Out of bvec array capacity (%u vs %u)\n",
86 npages + cur_npages, max_pages);
87 break;
88 }
89
90 for (i = 0; i < cur_npages; i++) {
91 len = ret > PAGE_SIZE ? PAGE_SIZE : ret;
92 bvec_set_page(bv + npages + i, *pages++, len - offset, offset);
93 ret -= len;
94 offset = 0;
95 }
96
97 npages += cur_npages;
98 }
99
100 iov_iter_bvec(new, orig->data_source, bv, npages, orig_len - count);
101 return npages;
102 }
103 EXPORT_SYMBOL_GPL(netfs_extract_user_iter);
104
105 /*
106 * Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class
107 * iterators, and add them to the scatterlist.
108 */
netfs_extract_user_to_sg(struct iov_iter * iter,ssize_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)109 static ssize_t netfs_extract_user_to_sg(struct iov_iter *iter,
110 ssize_t maxsize,
111 struct sg_table *sgtable,
112 unsigned int sg_max,
113 iov_iter_extraction_t extraction_flags)
114 {
115 struct scatterlist *sg = sgtable->sgl + sgtable->nents;
116 struct page **pages;
117 unsigned int npages;
118 ssize_t ret = 0, res;
119 size_t len, off;
120
121 /* We decant the page list into the tail of the scatterlist */
122 pages = (void *)sgtable->sgl + array_size(sg_max, sizeof(struct scatterlist));
123 pages -= sg_max;
124
125 do {
126 res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max,
127 extraction_flags, &off);
128 if (res < 0)
129 goto failed;
130
131 len = res;
132 maxsize -= len;
133 ret += len;
134 npages = DIV_ROUND_UP(off + len, PAGE_SIZE);
135 sg_max -= npages;
136
137 for (; npages > 0; npages--) {
138 struct page *page = *pages;
139 size_t seg = min_t(size_t, PAGE_SIZE - off, len);
140
141 *pages++ = NULL;
142 sg_set_page(sg, page, len, off);
143 sgtable->nents++;
144 sg++;
145 len -= seg;
146 off = 0;
147 }
148 } while (maxsize > 0 && sg_max > 0);
149
150 return ret;
151
152 failed:
153 while (sgtable->nents > sgtable->orig_nents)
154 put_page(sg_page(&sgtable->sgl[--sgtable->nents]));
155 return res;
156 }
157
158 /*
159 * Extract up to sg_max pages from a BVEC-type iterator and add them to the
160 * scatterlist. The pages are not pinned.
161 */
netfs_extract_bvec_to_sg(struct iov_iter * iter,ssize_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)162 static ssize_t netfs_extract_bvec_to_sg(struct iov_iter *iter,
163 ssize_t maxsize,
164 struct sg_table *sgtable,
165 unsigned int sg_max,
166 iov_iter_extraction_t extraction_flags)
167 {
168 const struct bio_vec *bv = iter->bvec;
169 struct scatterlist *sg = sgtable->sgl + sgtable->nents;
170 unsigned long start = iter->iov_offset;
171 unsigned int i;
172 ssize_t ret = 0;
173
174 for (i = 0; i < iter->nr_segs; i++) {
175 size_t off, len;
176
177 len = bv[i].bv_len;
178 if (start >= len) {
179 start -= len;
180 continue;
181 }
182
183 len = min_t(size_t, maxsize, len - start);
184 off = bv[i].bv_offset + start;
185
186 sg_set_page(sg, bv[i].bv_page, len, off);
187 sgtable->nents++;
188 sg++;
189 sg_max--;
190
191 ret += len;
192 maxsize -= len;
193 if (maxsize <= 0 || sg_max == 0)
194 break;
195 start = 0;
196 }
197
198 if (ret > 0)
199 iov_iter_advance(iter, ret);
200 return ret;
201 }
202
203 /*
204 * Extract up to sg_max pages from a KVEC-type iterator and add them to the
205 * scatterlist. This can deal with vmalloc'd buffers as well as kmalloc'd or
206 * static buffers. The pages are not pinned.
207 */
netfs_extract_kvec_to_sg(struct iov_iter * iter,ssize_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)208 static ssize_t netfs_extract_kvec_to_sg(struct iov_iter *iter,
209 ssize_t maxsize,
210 struct sg_table *sgtable,
211 unsigned int sg_max,
212 iov_iter_extraction_t extraction_flags)
213 {
214 const struct kvec *kv = iter->kvec;
215 struct scatterlist *sg = sgtable->sgl + sgtable->nents;
216 unsigned long start = iter->iov_offset;
217 unsigned int i;
218 ssize_t ret = 0;
219
220 for (i = 0; i < iter->nr_segs; i++) {
221 struct page *page;
222 unsigned long kaddr;
223 size_t off, len, seg;
224
225 len = kv[i].iov_len;
226 if (start >= len) {
227 start -= len;
228 continue;
229 }
230
231 kaddr = (unsigned long)kv[i].iov_base + start;
232 off = kaddr & ~PAGE_MASK;
233 len = min_t(size_t, maxsize, len - start);
234 kaddr &= PAGE_MASK;
235
236 maxsize -= len;
237 ret += len;
238 do {
239 seg = min_t(size_t, len, PAGE_SIZE - off);
240 if (is_vmalloc_or_module_addr((void *)kaddr))
241 page = vmalloc_to_page((void *)kaddr);
242 else
243 page = virt_to_page(kaddr);
244
245 sg_set_page(sg, page, len, off);
246 sgtable->nents++;
247 sg++;
248 sg_max--;
249
250 len -= seg;
251 kaddr += PAGE_SIZE;
252 off = 0;
253 } while (len > 0 && sg_max > 0);
254
255 if (maxsize <= 0 || sg_max == 0)
256 break;
257 start = 0;
258 }
259
260 if (ret > 0)
261 iov_iter_advance(iter, ret);
262 return ret;
263 }
264
265 /*
266 * Extract up to sg_max folios from an XARRAY-type iterator and add them to
267 * the scatterlist. The pages are not pinned.
268 */
netfs_extract_xarray_to_sg(struct iov_iter * iter,ssize_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)269 static ssize_t netfs_extract_xarray_to_sg(struct iov_iter *iter,
270 ssize_t maxsize,
271 struct sg_table *sgtable,
272 unsigned int sg_max,
273 iov_iter_extraction_t extraction_flags)
274 {
275 struct scatterlist *sg = sgtable->sgl + sgtable->nents;
276 struct xarray *xa = iter->xarray;
277 struct folio *folio;
278 loff_t start = iter->xarray_start + iter->iov_offset;
279 pgoff_t index = start / PAGE_SIZE;
280 ssize_t ret = 0;
281 size_t offset, len;
282 XA_STATE(xas, xa, index);
283
284 rcu_read_lock();
285
286 xas_for_each(&xas, folio, ULONG_MAX) {
287 if (xas_retry(&xas, folio))
288 continue;
289 if (WARN_ON(xa_is_value(folio)))
290 break;
291 if (WARN_ON(folio_test_hugetlb(folio)))
292 break;
293
294 offset = offset_in_folio(folio, start);
295 len = min_t(size_t, maxsize, folio_size(folio) - offset);
296
297 sg_set_page(sg, folio_page(folio, 0), len, offset);
298 sgtable->nents++;
299 sg++;
300 sg_max--;
301
302 maxsize -= len;
303 ret += len;
304 if (maxsize <= 0 || sg_max == 0)
305 break;
306 }
307
308 rcu_read_unlock();
309 if (ret > 0)
310 iov_iter_advance(iter, ret);
311 return ret;
312 }
313
314 /**
315 * netfs_extract_iter_to_sg - Extract pages from an iterator and add ot an sglist
316 * @iter: The iterator to extract from
317 * @maxsize: The amount of iterator to copy
318 * @sgtable: The scatterlist table to fill in
319 * @sg_max: Maximum number of elements in @sgtable that may be filled
320 * @extraction_flags: Flags to qualify the request
321 *
322 * Extract the page fragments from the given amount of the source iterator and
323 * add them to a scatterlist that refers to all of those bits, to a maximum
324 * addition of @sg_max elements.
325 *
326 * The pages referred to by UBUF- and IOVEC-type iterators are extracted and
327 * pinned; BVEC-, KVEC- and XARRAY-type are extracted but aren't pinned; PIPE-
328 * and DISCARD-type are not supported.
329 *
330 * No end mark is placed on the scatterlist; that's left to the caller.
331 *
332 * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA
333 * be allowed on the pages extracted.
334 *
335 * If successul, @sgtable->nents is updated to include the number of elements
336 * added and the number of bytes added is returned. @sgtable->orig_nents is
337 * left unaltered.
338 *
339 * The iov_iter_extract_mode() function should be used to query how cleanup
340 * should be performed.
341 */
netfs_extract_iter_to_sg(struct iov_iter * iter,size_t maxsize,struct sg_table * sgtable,unsigned int sg_max,iov_iter_extraction_t extraction_flags)342 ssize_t netfs_extract_iter_to_sg(struct iov_iter *iter, size_t maxsize,
343 struct sg_table *sgtable, unsigned int sg_max,
344 iov_iter_extraction_t extraction_flags)
345 {
346 if (maxsize == 0)
347 return 0;
348
349 switch (iov_iter_type(iter)) {
350 case ITER_UBUF:
351 case ITER_IOVEC:
352 return netfs_extract_user_to_sg(iter, maxsize, sgtable, sg_max,
353 extraction_flags);
354 case ITER_BVEC:
355 return netfs_extract_bvec_to_sg(iter, maxsize, sgtable, sg_max,
356 extraction_flags);
357 case ITER_KVEC:
358 return netfs_extract_kvec_to_sg(iter, maxsize, sgtable, sg_max,
359 extraction_flags);
360 case ITER_XARRAY:
361 return netfs_extract_xarray_to_sg(iter, maxsize, sgtable, sg_max,
362 extraction_flags);
363 default:
364 pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter));
365 WARN_ON_ONCE(1);
366 return -EIO;
367 }
368 }
369 EXPORT_SYMBOL_GPL(netfs_extract_iter_to_sg);
370