From: Zhang Yi Introduce ext4_enable_buffered_iomap() to determine whether a regular file inode should use the iomap buffered I/O path. We now support the default filesystem features, mount options, and the bigalloc feature. However, inline data, fsverity, fscrypt, indirect inode type, and data=journal mode are not fully supported. The decision is made at inode initialization time in __ext4_new_inode() and __ext4_iget() by setting the EXT4_STATE_BUFFERED_IOMAP state flag. If any of these unsupported features are met, the inode silently falls back to the traditional buffer_head path. Switching the buffered I/O path on an active inode is not supported, with the exception of changing a per-inode journal flag. For features like encryption, verity, and inline data that can be dynamically enabled at the superblock level, checking the global feature flag avoids the complexity of toggling the path on individual inodes. Additionally: - Extend ext4_inode_journal_mode() to force ordered mode for inodes using the iomap path under a data=journal mount. For the global data journal mode (EXT4_MOUNT_JOURNAL_DATA), dynamic enablement is deferred until the next inode re-initialization. For the per-inode data journal mode (EXT4_INODE_JOURNAL_DATA), dynamic changes take effect immediately, as it is safe to switch address_space operations and drop all page cache under i_rwsem and filemap_invalidate_lock. - Add WARN_ON_ONCE() guards in _ext4_get_block() and ext4_do_writepages() to catch inodes using the iomap path from accidentally entering the legacy buffer_head writeback path. - Reject extent-to-indirect migration via ext4_ind_migrate() for inodes on the iomap path. Signed-off-by: Zhang Yi --- fs/ext4/ext4.h | 1 + fs/ext4/ext4_jbd2.c | 8 +++-- fs/ext4/ialloc.c | 1 + fs/ext4/inode.c | 77 +++++++++++++++++++++++++++++++++++++++++++-- fs/ext4/migrate.c | 2 ++ 5 files changed, 85 insertions(+), 4 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0a3bb44f1e6e..afba952abd28 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3068,6 +3068,7 @@ int ext4_walk_page_buffers(handle_t *handle, int do_journal_get_write_access(handle_t *handle, struct inode *inode, struct buffer_head *bh); void ext4_set_inode_mapping_order(struct inode *inode); +void ext4_enable_buffered_iomap(struct inode *inode); int ext4_nonda_switch(struct super_block *sb); #define FALL_BACK_TO_NONDELALLOC 1 #define CONVERT_INLINE_DATA 2 diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 9a8c225f2753..4534cf6f5e76 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -17,8 +17,12 @@ int ext4_inode_journal_mode(struct inode *inode) test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && !test_opt(inode->i_sb, DELALLOC))) { - /* We do not support data journalling for encrypted data */ - if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode)) + /* + * We do not support data journalling for encrypted data + * and buffered IOMAP path. + */ + if (S_ISREG(inode->i_mode) && + (IS_ENCRYPTED(inode) || ext4_inode_buffered_iomap(inode))) return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */ return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */ } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 3fd8f0099852..ea64b9e9e382 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1340,6 +1340,7 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap, } } + ext4_enable_buffered_iomap(inode); ext4_set_inode_mapping_order(inode); ext4_update_inode_fsync_trans(handle, inode, 1); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cf83b4e619e0..0407e7b54dcd 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -918,6 +918,9 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, if (ext4_has_inline_data(inode)) return -ERANGE; + /* inode using the iomap buffered I/O path should not go here. */ + if (WARN_ON_ONCE(ext4_inode_buffered_iomap(inode))) + return -EINVAL; map.m_lblk = iblock; map.m_len = bh->b_size >> inode->i_blkbits; @@ -2797,6 +2800,12 @@ static int ext4_do_writepages(struct mpage_da_data *mpd) if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) goto out_writepages; + /* inode using the iomap buffered I/O path should not go here. */ + if (WARN_ON_ONCE(ext4_inode_buffered_iomap(inode))) { + ret = -EINVAL; + goto out_writepages; + } + /* * If the filesystem has aborted, it is read-only, so return * right away instead of dumping stack traces later on that @@ -3929,6 +3938,9 @@ static int ext4_iomap_map_blocks(struct inode *inode, loff_t offset, { u8 blkbits = inode->i_blkbits; + /* inode using the buffer_head buffered I/O path should not go here. */ + if (WARN_ON_ONCE(!ext4_inode_buffered_iomap(inode))) + return -EINVAL; if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK) return -EINVAL; @@ -4406,6 +4418,10 @@ static int ext4_iomap_writepages(struct address_space *mapping, .ops = &ext4_writeback_ops, }; + /* inode using the buffer_head buffered I/O path should not go here. */ + if (WARN_ON_ONCE(!ext4_inode_buffered_iomap(inode))) + return -EINVAL; + ret = ext4_emergency_state(sb); if (unlikely(ret)) return ret; @@ -5864,6 +5880,59 @@ static int check_igot_inode(struct inode *inode, ext4_iget_flags flags, return -EFSCORRUPTED; } +/* + * Determine whether an inode should use the iomap buffered I/O path. + * EXT4_STATE_BUFFERED_IOMAP is generally set at inode initialization + * time. Online switching of the buffered I/O path on an active inode is + * NOT supported, with the exception of changing a per-inode journal + * flag. + * + * For features like inline data, fsverity, and encryption that can be + * dynamically enabled or disabled, we check the superblock-level + * feature flags. If any of these is globally enabled, no inode is + * allowed into the iomap buffered I/O path. This avoids the complexity + * of dynamic toggling. + * + * For the global data journal mode (EXT4_MOUNT_JOURNAL_DATA), dynamic + * change through remount is deferred. It will only become available + * after the inode is re-initialized (i.e., after the last reference + * drops and the inode is re-read from disk with the journal flag + * cleared). + * + * For the per-inode data journal mode (EXT4_INODE_JOURNAL_DATA), + * dynamic changes take effect immediately. This is safe because + * address_space operations can be switched and all page cache can be + * dropped under i_rwsem and filemap_invalidate_lock. + * + * For extent-to-indirect block migration (via EXT4_IOC_SETFLAGS + * clearing EXT4_EXTENTS_FL), this operation is directly rejected for + * inodes using the iomap path. + */ +void ext4_enable_buffered_iomap(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + + if (!S_ISREG(inode->i_mode)) + return; + if (ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE)) + return; + + /* Unsupported Features */ + if (ext4_has_feature_inline_data(sb)) + return; + if (ext4_has_feature_verity(sb)) + return; + if (ext4_has_feature_encrypt(sb)) + return; + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || + ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) + return; + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) + return; + + ext4_set_inode_state(inode, EXT4_STATE_BUFFERED_IOMAP); +} + void ext4_set_inode_mapping_order(struct inode *inode) { struct super_block *sb = inode->i_sb; @@ -6149,6 +6218,8 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, if (ret) goto bad_inode; + ext4_enable_buffered_iomap(inode); + if (S_ISREG(inode->i_mode)) { inode->i_op = &ext4_file_inode_operations; inode->i_fop = &ext4_file_operations; @@ -7326,9 +7397,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) * the inode's in-core data-journaling state flag now. */ - if (val) + if (val) { ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); - else { + ext4_clear_inode_state(inode, EXT4_STATE_BUFFERED_IOMAP); + } else { err = jbd2_journal_flush(journal, 0); if (err < 0) { jbd2_journal_unlock_updates(journal); @@ -7337,6 +7409,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) return err; } ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); + ext4_enable_buffered_iomap(inode); } ext4_set_aops(inode); ext4_set_inode_mapping_order(inode); diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 477d43d7e294..3b49ecf427ae 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -620,6 +620,8 @@ int ext4_ind_migrate(struct inode *inode) if (ext4_has_feature_bigalloc(inode->i_sb)) return -EOPNOTSUPP; + if (ext4_inode_buffered_iomap(inode)) + return -EOPNOTSUPP; /* * In order to get correct extent info, force all delayed allocation -- 2.52.0