Make iov_iter_get_pages*() wrap iov_iter_extract_pages() for kernel iterator types (e.g. ITER_BVEC, ITER_FOLIOQ, ITER_XARRAY). The pages obtained have their refcounts incremented afterwards if they're not slab pages. ITER_KVEC is left returning -EFAULT. Signed-off-by: David Howells Reviewed-by: Paulo Alcantara (Red Hat) cc: Matthew Wilcox cc: Christoph Hellwig cc: Jens Axboe cc: linux-block@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org --- lib/iov_iter.c | 164 ++++++------------------------------------------- 1 file changed, 19 insertions(+), 145 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 243662af1af7..cac7d7364bc2 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -910,118 +910,34 @@ static int want_pages_array(struct page ***res, size_t size, return count; } -static ssize_t iter_folioq_get_pages(struct iov_iter *iter, +/* + * Wrap iov_iter_extract_pages() and then pin the non-slab pages we got back. + * This only works for non-user iterator types as get_pages uses get_user_pages + * not pin_user_pages. + */ +static ssize_t iter_get_kernel_pages(struct iov_iter *iter, struct page ***ppages, size_t maxsize, unsigned maxpages, size_t *_start_offset) { - const struct folio_queue *folioq = iter->folioq; struct page **pages; - unsigned int slot = iter->folioq_slot; - size_t extracted = 0, count = iter->count, iov_offset = iter->iov_offset; + ssize_t ret, done; - if (slot >= folioq_nr_slots(folioq)) { - folioq = folioq->next; - slot = 0; - if (WARN_ON(iov_offset != 0)) - return -EIO; - } + ret = iov_iter_extract_pages(iter, ppages, maxsize, maxpages, + 0, _start_offset); + if (ret <= 0) + return ret; - maxpages = want_pages_array(ppages, maxsize, iov_offset & ~PAGE_MASK, maxpages); - if (!maxpages) - return -ENOMEM; - *_start_offset = iov_offset & ~PAGE_MASK; pages = *ppages; + for (done = ret + *_start_offset; done > 0; done -= PAGE_SIZE) { + struct folio *folio = page_folio(*pages); - for (;;) { - struct folio *folio = folioq_folio(folioq, slot); - size_t offset = iov_offset, fsize = folioq_folio_size(folioq, slot); - size_t part = PAGE_SIZE - offset % PAGE_SIZE; - - if (offset < fsize) { - part = umin(part, umin(maxsize - extracted, fsize - offset)); - count -= part; - iov_offset += part; - extracted += part; - - *pages = folio_page(folio, offset / PAGE_SIZE); - get_page(*pages); - pages++; - maxpages--; - } - - if (maxpages == 0 || extracted >= maxsize) - break; - - if (iov_offset >= fsize) { - iov_offset = 0; - slot++; - if (slot == folioq_nr_slots(folioq) && folioq->next) { - folioq = folioq->next; - slot = 0; - } - } - } - - iter->count = count; - iter->iov_offset = iov_offset; - iter->folioq = folioq; - iter->folioq_slot = slot; - return extracted; -} - -static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, - pgoff_t index, unsigned int nr_pages) -{ - XA_STATE(xas, xa, index); - struct folio *folio; - unsigned int ret = 0; - - rcu_read_lock(); - for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) { - if (xas_retry(&xas, folio)) - continue; - - /* Has the folio moved or been split? */ - if (unlikely(folio != xas_reload(&xas))) { - xas_reset(&xas); - continue; - } - - pages[ret] = folio_file_page(folio, xas.xa_index); - folio_get(folio); - if (++ret == nr_pages) - break; + if (!folio_test_slab(folio)) + folio_get(folio); + pages++; } - rcu_read_unlock(); return ret; } -static ssize_t iter_xarray_get_pages(struct iov_iter *i, - struct page ***pages, size_t maxsize, - unsigned maxpages, size_t *_start_offset) -{ - unsigned nr, offset, count; - pgoff_t index; - loff_t pos; - - pos = i->xarray_start + i->iov_offset; - index = pos >> PAGE_SHIFT; - offset = pos & ~PAGE_MASK; - *_start_offset = offset; - - count = want_pages_array(pages, maxsize, offset, maxpages); - if (!count) - return -ENOMEM; - nr = iter_xarray_populate_pages(*pages, i->xarray, index, count); - if (nr == 0) - return 0; - - maxsize = min_t(size_t, nr * PAGE_SIZE - offset, maxsize); - i->iov_offset += maxsize; - i->count -= maxsize; - return maxsize; -} - /* must be done on non-empty ITER_UBUF or ITER_IOVEC one */ static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size) { @@ -1044,22 +960,6 @@ static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size) BUG(); // if it had been empty, we wouldn't get called } -/* must be done on non-empty ITER_BVEC one */ -static struct page *first_bvec_segment(const struct iov_iter *i, - size_t *size, size_t *start) -{ - struct page *page; - size_t skip = i->iov_offset, len; - - len = i->bvec->bv_len - skip; - if (*size > len) - *size = len; - skip += i->bvec->bv_offset; - page = i->bvec->bv_page + skip / PAGE_SIZE; - *start = skip % PAGE_SIZE; - return page; -} - static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned int maxpages, size_t *start) @@ -1095,36 +995,10 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, iov_iter_advance(i, maxsize); return maxsize; } - if (iov_iter_is_bvec(i)) { - struct page **p; - struct page *page; - page = first_bvec_segment(i, &maxsize, start); - n = want_pages_array(pages, maxsize, *start, maxpages); - if (!n) - return -ENOMEM; - p = *pages; - for (int k = 0; k < n; k++) { - struct folio *folio = page_folio(page + k); - p[k] = page + k; - if (!folio_test_slab(folio)) - folio_get(folio); - } - maxsize = min_t(size_t, maxsize, n * PAGE_SIZE - *start); - i->count -= maxsize; - i->iov_offset += maxsize; - if (i->iov_offset == i->bvec->bv_len) { - i->iov_offset = 0; - i->bvec++; - i->nr_segs--; - } - return maxsize; - } - if (iov_iter_is_folioq(i)) - return iter_folioq_get_pages(i, pages, maxsize, maxpages, start); - if (iov_iter_is_xarray(i)) - return iter_xarray_get_pages(i, pages, maxsize, maxpages, start); - return -EFAULT; + if (iov_iter_is_kvec(i)) + return -EFAULT; + return iter_get_kernel_pages(i, pages, maxsize, maxpages, start); } ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages,