From: Chi Zhiling Optimize shmem file read by using filemap_get_folios_contig() to batch fetch contiguous folios from the page cache, reducing the overhead of repeated shmem_get_folio() calls. This patch checks the uptodate flag without holding the folio lock, so it may observe a non-uptodate state on a locked folio that is still being initialized. This is safe because only zero-filled data can be copied to the user buffer in that scenario. A non-uptodate folio in the swap cache cannot be added to the shmem page cache. This creates a semantic conflict, as shmem zeroes the folio out, but the swap cache would fill it by reading from the swap backing store. Signed-off-by: Chi Zhiling --- mm/shmem.c | 57 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index cac355685e49..61937582f08c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -891,6 +891,14 @@ int shmem_add_to_page_cache(struct folio *folio, VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); VM_BUG_ON_FOLIO(!folio_test_swapbacked(folio), folio); + /* + * Don't add a non-uptodate folio that is in swap cache to page + * cache, since shmem will zero it instead of reading from swap + * backing. + */ + VM_BUG_ON_FOLIO(folio_test_swapcache(folio) && + !folio_test_uptodate(folio), folio); + folio_ref_add(folio, nr); folio->mapping = mapping; folio->index = index; @@ -3382,11 +3390,13 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct address_space *mapping = inode->i_mapping; - pgoff_t index; + struct folio_batch fbatch; unsigned long offset; int error = 0; ssize_t retval = 0; + folio_batch_init(&fbatch); + for (;;) { struct folio *folio = NULL; unsigned long nr, ret; @@ -3395,15 +3405,33 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (unlikely(iocb->ki_pos >= i_size)) break; +fetch: + folio = folio_batch_next(&fbatch); + if (!folio) { + pgoff_t start = iocb->ki_pos >> PAGE_SHIFT; + pgoff_t end = (iocb->ki_pos + to->count - 1) >> PAGE_SHIFT; + + if (folio_batch_count(&fbatch)) { + for (int i = 0; i < folio_batch_count(&fbatch); i++) + folio_put(fbatch.folios[i]); + folio_batch_reinit(&fbatch); + } - index = iocb->ki_pos >> PAGE_SHIFT; - error = shmem_get_folio(inode, index, 0, &folio, SGP_READ); - if (folio) - folio_unlock(folio); - if (error) { - if (error == -EINVAL) - error = 0; - break; + filemap_get_folios_contig(inode->i_mapping, &start, end, &fbatch); + if (folio_batch_count(&fbatch)) + goto fetch; + + error = shmem_get_folio(inode, start, 0, &folio, SGP_READ); + if (unlikely(error)) { + if (error == -EINVAL) + error = 0; + break; + } + if (folio) { + folio_unlock(folio); + folio_batch_add(&fbatch, folio); + fbatch.i++; + } } /* @@ -3411,17 +3439,15 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) * are called without i_rwsem protection against truncate */ i_size = i_size_read(inode); - if (unlikely(iocb->ki_pos >= i_size)) { - if (folio) - folio_put(folio); + if (unlikely(iocb->ki_pos >= i_size)) break; - } + fsize = folio ? folio_size(folio) : PAGE_SIZE; offset = iocb->ki_pos & (fsize - 1); end_offset = min_t(loff_t, i_size, iocb->ki_pos + to->count); nr = min_t(loff_t, end_offset - iocb->ki_pos, fsize - offset); - if (folio) { + if (folio && folio_test_uptodate(folio)) { /* * If users can be writing to this page using arbitrary * virtual addresses, take care about potential aliasing @@ -3443,7 +3469,6 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) ret = copy_folio_to_iter(folio, offset, nr, to); else ret = copy_pages_to_iter(folio, offset, nr, to, &error); - folio_put(folio); } else if (user_backed_iter(to)) { /* * Copy to user tends to be so well optimized, but @@ -3474,6 +3499,8 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) cond_resched(); } + for (int i = 0; i < folio_batch_count(&fbatch); i++) + folio_put(fbatch.folios[i]); file_accessed(file); return retval ? retval : error; } -- 2.43.0