POSIX requires that "If the file size is increased, the extended area shall appear as if it were zero-filled". It is possible to use mmap to write past EOF and that data will become visible instead of zeroes. This behavior is reproduced by fstests test generic/363. Most traditional filesystems zero any post-eof portion of a folio at writeback time or when the file size is extended by truncate or extending writes. This ensures that the previously post-eof range of the folio is zeroed before it is exposed to the file. The tmpfs writeout path has been updated to zero post-eof folio ranges similar to traditional writeback. This ensures post-eof ranges are zeroed "on disk" and allows size extension zeroing to skip over swap entries as they are already appropriately zeroed. To that end, introduce a new zeroing helper for proper zeroing on file extending operations. This looks up resident folios between the original and new eof and for those that are uptodate, zeroes them before the associated ranges are exposed to the file. This preserves POSIX semantics and allows generic/363 to pass on tmpfs. Signed-off-by: Brian Foster --- mm/shmem.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index 7925ced8a05d..a4aceb474377 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1101,6 +1101,78 @@ static struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index) return folio; } +/* + * Zero a post-EOF range about to be exposed by size extension. Zero from the + * current i_size through lend, the latter of which typically refers to the + * start offset of an extending operation. Skip swap entries because associated + * folios were zeroed at swapout time. + */ +static void shmem_zero_eof(struct inode *inode, loff_t lend) +{ + struct address_space *mapping = inode->i_mapping; + loff_t lstart = i_size_read(inode); + pgoff_t index = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT; + pgoff_t end = lend >> PAGE_SHIFT; + struct folio_batch fbatch; + struct folio *folio; + int i; + bool same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); + + folio = filemap_lock_folio(mapping, lstart >> PAGE_SHIFT); + if (!IS_ERR(folio)) { + same_folio = lend < folio_next_pos(folio); + index = folio_next_index(folio); + + if (folio_test_uptodate(folio)) { + size_t from = offset_in_folio(folio, lstart); + size_t len = min_t(loff_t, folio_size(folio) - from, + lend - lstart); + + folio_zero_range(folio, from, len); + } + + folio_unlock(folio); + folio_put(folio); + } + + if (!same_folio) { + folio = filemap_lock_folio(mapping, lend >> PAGE_SHIFT); + if (!IS_ERR(folio)) { + end = folio->index; + + if (folio_test_uptodate(folio)) { + size_t len = lend - folio_pos(folio); + folio_zero_range(folio, 0, len); + } + + folio_unlock(folio); + folio_put(folio); + } + } + + /* + * Zero uptodate folios fully within the target range. Uptodate folios + * beyond EOF are generally unexpected, but can exist if a larger + * falloc'd and uptodate EOF folio is split. + */ + folio_batch_init(&fbatch); + while (index < end) { + if (!filemap_get_folios(mapping, &index, end - 1, &fbatch)) + break; + for (i = 0; i < folio_batch_count(&fbatch); i++) { + folio = fbatch.folios[i]; + + folio_lock(folio); + if (folio_test_uptodate(folio) && + folio->mapping == mapping) { + folio_zero_segment(folio, 0, folio_size(folio)); + } + folio_unlock(folio); + } + folio_batch_release(&fbatch); + } +} + /* * Remove range of pages and swap entries from page cache, and free them. * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate. @@ -1331,6 +1403,8 @@ static int shmem_setattr(struct mnt_idmap *idmap, oldsize, newsize); if (error) return error; + if (newsize > oldsize) + shmem_zero_eof(inode, newsize); i_size_write(inode, newsize); update_mtime = true; } else { @@ -3512,6 +3586,8 @@ static ssize_t shmem_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = file_update_time(file); if (ret) goto unlock; + if (iocb->ki_pos > i_size_read(inode)) + shmem_zero_eof(inode, iocb->ki_pos); ret = generic_perform_write(iocb, from); unlock: inode_unlock(inode); @@ -3844,8 +3920,10 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, cond_resched(); } - if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) + if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) { + shmem_zero_eof(inode, offset + len); i_size_write(inode, offset + len); + } undone: spin_lock(&inode->i_lock); inode->i_private = NULL; -- 2.51.1