From: Zhang Yi Move ext4_block_zero_eof() and ext4_zero_partial_blocks() calls out of the active handle context, making them independent operations, and also add return value checks. This is safe because it still ensures data is updated before metadata for data=ordered mode and data=journal mode because we still zero data and ordering data before modifying the metadata. This change is required for iomap infrastructure conversion because the iomap buffered I/O path does not use the same journal infrastructure for partial block zeroing. The lock ordering of folio lock and starting transactions is "folio lock -> transaction start", which is opposite of the current path. Therefore, zeroing partial blocks cannot be performed under the active handle. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara --- fs/ext4/extents.c | 32 +++++++++++++++----------------- fs/ext4/inode.c | 47 ++++++++++++++++++++++++++--------------------- 2 files changed, 41 insertions(+), 38 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 57a686b600d9..002b1ec8cee2 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4585,6 +4585,13 @@ static int ext4_alloc_file_blocks(struct file *file, loff_t offset, loff_t len, credits = ext4_chunk_trans_blocks(inode, len_lblk); depth = ext_depth(inode); + /* Zero to the end of the block containing i_size */ + if (new_size > old_size) { + ret = ext4_block_zero_eof(inode, old_size, LLONG_MAX); + if (ret) + return ret; + } + retry: while (len_lblk) { /* @@ -4623,10 +4630,8 @@ static int ext4_alloc_file_blocks(struct file *file, loff_t offset, loff_t len, if (ext4_update_inode_size(inode, epos) & 0x1) inode_set_mtime_to_ts(inode, inode_get_ctime(inode)); - if (epos > old_size) { + if (epos > old_size) pagecache_isize_extended(inode, old_size, epos); - ext4_block_zero_eof(inode, old_size, epos); - } } ret2 = ext4_mark_inode_dirty(handle, inode); ext4_update_inode_fsync_trans(handle, inode, 1); @@ -4668,7 +4673,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, loff_t align_start, align_end, new_size = 0; loff_t end = offset + len; unsigned int blocksize = i_blocksize(inode); - int ret, flags, credits; + int ret, flags; trace_ext4_zero_range(inode, offset, len, mode); WARN_ON_ONCE(!inode_is_locked(inode)); @@ -4722,25 +4727,18 @@ static long ext4_zero_range(struct file *file, loff_t offset, if (IS_ALIGNED(offset | end, blocksize)) return ret; - /* - * In worst case we have to writeout two nonadjacent unwritten - * blocks and update the inode - */ - credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1; - if (ext4_should_journal_data(inode)) - credits += 2; - handle = ext4_journal_start(inode, EXT4_HT_MISC, credits); + /* Zero out partial block at the edges of the range */ + ret = ext4_zero_partial_blocks(inode, offset, len); + if (ret) + return ret; + + handle = ext4_journal_start(inode, EXT4_HT_MISC, 1); if (IS_ERR(handle)) { ret = PTR_ERR(handle); ext4_std_error(inode->i_sb, ret); return ret; } - /* Zero out partial block at the edges of the range */ - ret = ext4_zero_partial_blocks(inode, offset, len); - if (ret) - goto out_handle; - if (new_size) ext4_update_inode_size(inode, new_size); ret = ext4_mark_inode_dirty(handle, inode); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 13655ea709fe..8aa4369e3150 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4441,8 +4441,12 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) if (ret) return ret; + ret = ext4_zero_partial_blocks(inode, offset, length); + if (ret) + return ret; + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) - credits = ext4_chunk_trans_extent(inode, 2); + credits = ext4_chunk_trans_extent(inode, 0); else credits = ext4_blocks_for_truncate(inode); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); @@ -4452,10 +4456,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) return ret; } - ret = ext4_zero_partial_blocks(inode, offset, length); - if (ret) - goto out_handle; - /* If there are blocks to remove, do it */ start_lblk = EXT4_B_TO_LBLK(inode, offset); end_lblk = end >> inode->i_blkbits; @@ -4587,6 +4587,11 @@ int ext4_truncate(struct inode *inode) err = ext4_inode_attach_jinode(inode); if (err) goto out_trace; + + /* Zero to the end of the block containing i_size */ + err = ext4_block_zero_eof(inode, inode->i_size, LLONG_MAX); + if (err) + goto out_trace; } if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) @@ -4600,10 +4605,6 @@ int ext4_truncate(struct inode *inode) goto out_trace; } - /* Zero to the end of the block containing i_size */ - if (inode->i_size & (inode->i_sb->s_blocksize - 1)) - ext4_block_zero_eof(inode, inode->i_size, LLONG_MAX); - /* * We add the inode to the orphan list, so that if this * truncate spans multiple transactions, and we crash, we will @@ -5961,15 +5962,6 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry, goto out_mmap_sem; } - handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); - if (IS_ERR(handle)) { - error = PTR_ERR(handle); - goto out_mmap_sem; - } - if (ext4_handle_valid(handle) && shrink) { - error = ext4_orphan_add(handle, inode); - orphan = 1; - } /* * Update c/mtime and tail zero the EOF folio on * truncate up. ext4_truncate() handles the shrink case @@ -5978,9 +5970,22 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (!shrink) { inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); - if (oldsize & (inode->i_sb->s_blocksize - 1)) - ext4_block_zero_eof(inode, oldsize, - LLONG_MAX); + if (oldsize & (inode->i_sb->s_blocksize - 1)) { + error = ext4_block_zero_eof(inode, + oldsize, LLONG_MAX); + if (error) + goto out_mmap_sem; + } + } + + handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + goto out_mmap_sem; + } + if (ext4_handle_valid(handle) && shrink) { + error = ext4_orphan_add(handle, inode); + orphan = 1; } if (shrink) -- 2.52.0