Currently every ext4 inode gets mapping_metadata_bhs struct although it is only needed when running without a journal and only for inodes where any metadata was dirtied. Allocate mapping_metadata_bhs struct on demand when dirtying the first metadata buffer for the inode. Signed-off-by: Jan Kara --- fs/ext4/ext4.h | 2 +- fs/ext4/ext4_jbd2.c | 24 +++++++++++++++++++++--- fs/ext4/fsync.c | 12 ++++++++---- fs/ext4/inode.c | 9 +++++---- fs/ext4/super.c | 8 +++++--- 5 files changed, 40 insertions(+), 15 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 94283a991e5c..6bb29a20420f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1117,7 +1117,7 @@ struct ext4_inode_info { struct rw_semaphore i_data_sem; struct inode vfs_inode; struct jbd2_inode *jinode; - struct mapping_metadata_bhs i_metadata_bhs; + struct mapping_metadata_bhs *i_metadata_bhs; /* * File creation time. Its function is same as that of diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 9a8c225f2753..74f05bd0cdde 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -350,6 +350,21 @@ int __ext4_journal_get_create_access(const char *where, unsigned int line, return 0; } +static void ext4_inode_attach_mmb(struct inode *inode) +{ + struct mapping_metadata_bhs *mmb; + + /* + * It's difficult to handle failure when marking buffer dirty without + * leaving filesystem corrupyted + */ + mmb = kmalloc_obj(*mmb, GFP_KERNEL | __GFP_NOFAIL); + mmb_init(mmb, inode->i_mapping); + /* Someone swapped another mmb before us? */ + if (cmpxchg(&EXT4_I(inode)->i_metadata_bhs, NULL, mmb)) + kfree(mmb); +} + int __ext4_handle_dirty_metadata(const char *where, unsigned int line, handle_t *handle, struct inode *inode, struct buffer_head *bh) @@ -389,11 +404,14 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, err); } } else { - if (inode) + if (inode) { + if (!EXT4_I(inode)->i_metadata_bhs) + ext4_inode_attach_mmb(inode); mmb_mark_buffer_dirty(bh, - &EXT4_I(inode)->i_metadata_bhs); - else + EXT4_I(inode)->i_metadata_bhs); + } else { mark_buffer_dirty(bh); + } if (inode && inode_needs_sync(inode)) { sync_dirty_buffer(bh); if (buffer_req(bh) && !buffer_uptodate(bh)) { diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 924726dcc85f..e25d365e1179 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -46,6 +46,7 @@ static int ext4_sync_parent(struct inode *inode) { struct dentry *dentry, *next; + struct mapping_metadata_bhs *mmb; int ret = 0; if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) @@ -68,9 +69,12 @@ static int ext4_sync_parent(struct inode *inode) * through ext4_evict_inode()) and so we are safe to flush * metadata blocks and the inode. */ - ret = mmb_sync(&EXT4_I(inode)->i_metadata_bhs); - if (ret) - break; + mmb = READ_ONCE(EXT4_I(inode)->i_metadata_bhs); + if (mmb) { + ret = mmb_sync(mmb); + if (ret) + break; + } ret = sync_inode_metadata(inode, 1); if (ret) break; @@ -89,7 +93,7 @@ static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end, }; int ret; - ret = mmb_fsync_noflush(file, &EXT4_I(inode)->i_metadata_bhs, + ret = mmb_fsync_noflush(file, EXT4_I(inode)->i_metadata_bhs, start, end, datasync); if (ret) return ret; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c2c2d6ac7f3d..3e66e9510909 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -195,9 +195,8 @@ void ext4_evict_inode(struct inode *inode) ext4_warning_inode(inode, "data will be lost"); truncate_inode_pages_final(&inode->i_data); - /* Avoid mballoc special inode which has no proper iops */ - if (!EXT4_SB(inode->i_sb)->s_journal) - mmb_sync(&EXT4_I(inode)->i_metadata_bhs); + if (EXT4_I(inode)->i_metadata_bhs) + mmb_sync(EXT4_I(inode)->i_metadata_bhs); goto no_delete; } @@ -3451,6 +3450,7 @@ static bool ext4_release_folio(struct folio *folio, gfp_t wait) static bool ext4_inode_datasync_dirty(struct inode *inode) { journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; + struct mapping_metadata_bhs *mmb; if (journal) { if (jbd2_transaction_committed(journal, @@ -3461,8 +3461,9 @@ static bool ext4_inode_datasync_dirty(struct inode *inode) return true; } + mmb = READ_ONCE(EXT4_I(inode)->i_metadata_bhs); /* Any metadata buffers to write? */ - if (mmb_has_buffers(&EXT4_I(inode)->i_metadata_bhs)) + if (mmb && mmb_has_buffers(mmb)) return true; return inode_state_read_once(inode) & I_DIRTY_DATASYNC; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6a77db4d3124..92134ea4620c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1430,7 +1430,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); ext4_fc_init_inode(&ei->vfs_inode); spin_lock_init(&ei->i_fc_lock); - mmb_init(&ei->i_metadata_bhs, &ei->vfs_inode.i_data); + ei->i_metadata_bhs = NULL; return &ei->vfs_inode; } @@ -1527,8 +1527,10 @@ static void destroy_inodecache(void) void ext4_clear_inode(struct inode *inode) { ext4_fc_del(inode); - if (!EXT4_SB(inode->i_sb)->s_journal) - mmb_invalidate(&EXT4_I(inode)->i_metadata_bhs); + if (EXT4_I(inode)->i_metadata_bhs) { + mmb_invalidate(EXT4_I(inode)->i_metadata_bhs); + kfree(EXT4_I(inode)->i_metadata_bhs); + } clear_inode(inode); ext4_discard_preallocations(inode); /* -- 2.51.0