From: Zhang Yi Currently, i_disksize is updated after ordered data writeback to prevent exposing stale data in the post-EOF block, and operations like append allocate, zero range, and truncate defer the i_disksize update until ordered I/O completes. However, insert range and collapse range still directly update i_disksize. This is safe because they have already called filemap_write_and_wait_range() to flush data up to LLONG_MAX, ensuring that ordered I/O has completed if any dirty data was present. One exception is when the ordered I/O is caused by a previous truncate up. In this case, there is no dirty data to flush. Therefore, add an explicit wait for I/O completion to handle this case. This will not have significant impact on performance. Finally, also add a WARN_ON_ONCE check before updating i_disksize to detect any unexpected cases that could still expose stale data. Signed-off-by: Zhang Yi --- fs/ext4/extents.c | 18 ++++++++++++++++++ fs/ext4/inode.c | 4 +++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 125f628e738a..85c74c37f0ca 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5565,6 +5565,14 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len) if (ret) return ret; + /* + * Wait for ordered I/O to be complete. Updating i_disksize beyond + * the current i_disksize here risks exposuring stale data. + */ + if (ext4_inode_buffered_iomap(inode)) + wait_event(EXT4_I(inode)->i_ordered_wq, + READ_ONCE(EXT4_I(inode)->i_ordered_len) == 0); + truncate_pagecache(inode, start); credits = ext4_chunk_trans_extent(inode, 0); @@ -5597,6 +5605,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len) goto out_handle; } + WARN_ON_ONCE(READ_ONCE(EXT4_I(inode)->i_ordered_len) != 0); new_size = inode->i_size - len; i_size_write(inode, new_size); EXT4_I(inode)->i_disksize = new_size; @@ -5661,6 +5670,14 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len) if (ret) return ret; + /* + * Wait for ordered I/O to be complete. Updating i_disksize beyond + * the current i_disksize here risks exposuring stale data. + */ + if (ext4_inode_buffered_iomap(inode)) + wait_event(EXT4_I(inode)->i_ordered_wq, + READ_ONCE(EXT4_I(inode)->i_ordered_len) == 0); + truncate_pagecache(inode, start); credits = ext4_chunk_trans_extent(inode, 0); @@ -5671,6 +5688,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len) ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); /* Expand file to avoid data loss if there is error while shifting */ + WARN_ON_ONCE(READ_ONCE(EXT4_I(inode)->i_ordered_len) != 0); inode->i_size += len; EXT4_I(inode)->i_disksize += len; ret = ext4_mark_inode_dirty(handle, inode); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 23efb44f0c27..e47b504e85c9 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4899,7 +4899,9 @@ int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end) * i_size by the end_io handler once the ongoing I/O * completes. * - * - TODO: handle insert range and collapse range. + * - Insert range and collapse range operations: + * Wait synchronously for the relevant I/O to complete + * before updating i_disksize. */ } else if (ext4_inode_buffered_iomap(inode)) { err = ext4_iomap_submit_zero_block(inode, from, end); -- 2.52.0