AFFS did all the hard work of tracking metadata bhs dirtied for an inode but it actually never used this information as affs_file_fsync() just calls sync_blockdev() to writeback all filesystem metadata bhs. After a discussion with AFFS maintainer nobody cares about AFFS performance so let's keep this affs_file_fsync() behavior and just drop all the pointless tracking from AFFS. CC: David Sterba Signed-off-by: Jan Kara --- fs/affs/affs.h | 1 - fs/affs/amigaffs.c | 12 ++++++------ fs/affs/file.c | 25 +++++++++++-------------- fs/affs/inode.c | 13 +++++-------- fs/affs/namei.c | 9 ++++----- fs/affs/super.c | 1 - 6 files changed, 26 insertions(+), 35 deletions(-) diff --git a/fs/affs/affs.h b/fs/affs/affs.h index a0caf6ace860..406a0ef63e7b 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -44,7 +44,6 @@ struct affs_inode_info { struct mutex i_link_lock; /* Protects internal inode access. */ struct mutex i_ext_lock; /* Protects internal inode access. */ #define i_hash_lock i_ext_lock - struct mapping_metadata_bhs i_metadata_bhs; u32 i_blkcnt; /* block count */ u32 i_extcnt; /* extended block count */ u32 *i_lc; /* linear cache of extended blocks */ diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index bed4fc805e8e..6cc0fc9a4cbf 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -57,7 +57,7 @@ affs_insert_hash(struct inode *dir, struct buffer_head *bh) AFFS_TAIL(sb, dir_bh)->hash_chain = cpu_to_be32(ino); affs_adjust_checksum(dir_bh, ino); - mmb_mark_buffer_dirty(dir_bh, &AFFS_I(dir)->i_metadata_bhs); + mark_buffer_dirty(dir_bh); affs_brelse(dir_bh); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); @@ -100,7 +100,7 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh) else AFFS_TAIL(sb, bh)->hash_chain = ino; affs_adjust_checksum(bh, be32_to_cpu(ino) - hash_ino); - mmb_mark_buffer_dirty(bh, &AFFS_I(dir)->i_metadata_bhs); + mark_buffer_dirty(bh); AFFS_TAIL(sb, rem_bh)->parent = 0; retval = 0; break; @@ -180,7 +180,7 @@ affs_remove_link(struct dentry *dentry) affs_unlock_dir(dir); goto done; } - mmb_mark_buffer_dirty(link_bh, &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(link_bh); memcpy(AFFS_TAIL(sb, bh)->name, AFFS_TAIL(sb, link_bh)->name, 32); retval = affs_insert_hash(dir, bh); @@ -188,7 +188,7 @@ affs_remove_link(struct dentry *dentry) affs_unlock_dir(dir); goto done; } - mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(bh); affs_unlock_dir(dir); iput(dir); @@ -203,7 +203,7 @@ affs_remove_link(struct dentry *dentry) __be32 ino2 = AFFS_TAIL(sb, link_bh)->link_chain; AFFS_TAIL(sb, bh)->link_chain = ino2; affs_adjust_checksum(bh, be32_to_cpu(ino2) - link_ino); - mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(bh); retval = 0; /* Fix the link count, if bh is a normal header block without links */ switch (be32_to_cpu(AFFS_TAIL(sb, bh)->stype)) { @@ -306,7 +306,7 @@ affs_remove_header(struct dentry *dentry) retval = affs_remove_hash(dir, bh); if (retval) goto done_unlock; - mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(bh); affs_unlock_dir(dir); diff --git a/fs/affs/file.c b/fs/affs/file.c index 144b17482d12..23e088a7ed4f 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -140,14 +140,14 @@ affs_alloc_extblock(struct inode *inode, struct buffer_head *bh, u32 ext) AFFS_TAIL(sb, new_bh)->parent = cpu_to_be32(inode->i_ino); affs_fix_checksum(sb, new_bh); - mmb_mark_buffer_dirty(new_bh, &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(new_bh); tmp = be32_to_cpu(AFFS_TAIL(sb, bh)->extension); if (tmp) affs_warning(sb, "alloc_ext", "previous extension set (%x)", tmp); AFFS_TAIL(sb, bh)->extension = cpu_to_be32(blocknr); affs_adjust_checksum(bh, blocknr - tmp); - mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(bh); AFFS_I(inode)->i_extcnt++; mark_inode_dirty(inode); @@ -581,7 +581,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize) memset(AFFS_DATA(bh) + boff, 0, tmp); be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp); affs_fix_checksum(sb, bh); - mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(bh); size += tmp; bidx++; } else if (bidx) { @@ -603,7 +603,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize) AFFS_DATA_HEAD(bh)->size = cpu_to_be32(tmp); affs_fix_checksum(sb, bh); bh->b_state &= ~(1UL << BH_New); - mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(bh); if (prev_bh) { u32 tmp_next = be32_to_cpu(AFFS_DATA_HEAD(prev_bh)->next); @@ -613,8 +613,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize) bidx, tmp_next); AFFS_DATA_HEAD(prev_bh)->next = cpu_to_be32(bh->b_blocknr); affs_adjust_checksum(prev_bh, bh->b_blocknr - tmp_next); - mmb_mark_buffer_dirty(prev_bh, - &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(prev_bh); affs_brelse(prev_bh); } size += bsize; @@ -733,7 +732,7 @@ static int affs_write_end_ofs(const struct kiocb *iocb, AFFS_DATA_HEAD(bh)->size = cpu_to_be32( max(boff + tmp, be32_to_cpu(AFFS_DATA_HEAD(bh)->size))); affs_fix_checksum(sb, bh); - mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(bh); written += tmp; from += tmp; bidx++; @@ -766,13 +765,12 @@ static int affs_write_end_ofs(const struct kiocb *iocb, bidx, tmp_next); AFFS_DATA_HEAD(prev_bh)->next = cpu_to_be32(bh->b_blocknr); affs_adjust_checksum(prev_bh, bh->b_blocknr - tmp_next); - mmb_mark_buffer_dirty(prev_bh, - &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(prev_bh); } } affs_brelse(prev_bh); affs_fix_checksum(sb, bh); - mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(bh); written += bsize; from += bsize; bidx++; @@ -801,14 +799,13 @@ static int affs_write_end_ofs(const struct kiocb *iocb, bidx, tmp_next); AFFS_DATA_HEAD(prev_bh)->next = cpu_to_be32(bh->b_blocknr); affs_adjust_checksum(prev_bh, bh->b_blocknr - tmp_next); - mmb_mark_buffer_dirty(prev_bh, - &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(prev_bh); } } else if (be32_to_cpu(AFFS_DATA_HEAD(bh)->size) < tmp) AFFS_DATA_HEAD(bh)->size = cpu_to_be32(tmp); affs_brelse(prev_bh); affs_fix_checksum(sb, bh); - mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(bh); written += tmp; from += tmp; bidx++; @@ -945,7 +942,7 @@ affs_truncate(struct inode *inode) } AFFS_TAIL(sb, ext_bh)->extension = 0; affs_fix_checksum(sb, ext_bh); - mmb_mark_buffer_dirty(ext_bh, &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(ext_bh); affs_brelse(ext_bh); if (inode->i_size) { diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 5dd1b016bcb0..d4a3f381c4bc 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -206,7 +206,7 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc) } } affs_fix_checksum(sb, bh); - mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(bh); affs_brelse(bh); affs_free_prealloc(inode); return 0; @@ -266,11 +266,8 @@ affs_evict_inode(struct inode *inode) if (!inode->i_nlink) { inode->i_size = 0; affs_truncate(inode); - } else { - mmb_sync(&AFFS_I(inode)->i_metadata_bhs); } - mmb_invalidate(&AFFS_I(inode)->i_metadata_bhs); clear_inode(inode); affs_free_prealloc(inode); cache_page = (unsigned long)AFFS_I(inode)->i_lc; @@ -305,7 +302,7 @@ affs_new_inode(struct inode *dir) bh = affs_getzeroblk(sb, block); if (!bh) goto err_bh; - mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(bh); affs_brelse(bh); inode->i_uid = current_fsuid(); @@ -393,17 +390,17 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3 AFFS_TAIL(sb, bh)->link_chain = chain; AFFS_TAIL(sb, inode_bh)->link_chain = cpu_to_be32(block); affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain)); - mmb_mark_buffer_dirty(inode_bh, &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(inode_bh); set_nlink(inode, 2); ihold(inode); } affs_fix_checksum(sb, bh); - mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(bh); dentry->d_fsdata = (void *)(long)bh->b_blocknr; affs_lock_dir(dir); retval = affs_insert_hash(dir, bh); - mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(bh); affs_unlock_dir(dir); affs_unlock_link(inode); diff --git a/fs/affs/namei.c b/fs/affs/namei.c index c3c6532da4b0..57d8d755aada 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -373,7 +373,7 @@ affs_symlink(struct mnt_idmap *idmap, struct inode *dir, } *p = 0; inode->i_size = i + 1; - mmb_mark_buffer_dirty(bh, &AFFS_I(inode)->i_metadata_bhs); + mark_buffer_dirty(bh); affs_brelse(bh); mark_inode_dirty(inode); @@ -443,8 +443,7 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry, /* TODO: move it back to old_dir, if error? */ done: - mmb_mark_buffer_dirty(bh, - &AFFS_I(retval ? old_dir : new_dir)->i_metadata_bhs); + mark_buffer_dirty(bh); affs_brelse(bh); return retval; } @@ -497,8 +496,8 @@ affs_xrename(struct inode *old_dir, struct dentry *old_dentry, retval = affs_insert_hash(old_dir, bh_new); affs_unlock_dir(old_dir); done: - mmb_mark_buffer_dirty(bh_old, &AFFS_I(new_dir)->i_metadata_bhs); - mmb_mark_buffer_dirty(bh_new, &AFFS_I(old_dir)->i_metadata_bhs); + mark_buffer_dirty(bh_old); + mark_buffer_dirty(bh_new); affs_brelse(bh_old); affs_brelse(bh_new); return retval; diff --git a/fs/affs/super.c b/fs/affs/super.c index 079f36e1ddec..8451647f3fea 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -108,7 +108,6 @@ static struct inode *affs_alloc_inode(struct super_block *sb) i->i_lc = NULL; i->i_ext_bh = NULL; i->i_pa_cnt = 0; - mmb_init(&i->i_metadata_bhs, &i->vfs_inode.i_data); return &i->vfs_inode; } -- 2.51.0 Currently every ext4 inode gets mapping_metadata_bhs struct although it is only needed when running without a journal and only for inodes where any metadata was dirtied. Allocate mapping_metadata_bhs struct on demand when dirtying the first metadata buffer for the inode. Signed-off-by: Jan Kara --- fs/ext4/ext4.h | 2 +- fs/ext4/ext4_jbd2.c | 24 +++++++++++++++++++++--- fs/ext4/fsync.c | 12 ++++++++---- fs/ext4/inode.c | 9 +++++---- fs/ext4/super.c | 8 +++++--- 5 files changed, 40 insertions(+), 15 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 94283a991e5c..6bb29a20420f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1117,7 +1117,7 @@ struct ext4_inode_info { struct rw_semaphore i_data_sem; struct inode vfs_inode; struct jbd2_inode *jinode; - struct mapping_metadata_bhs i_metadata_bhs; + struct mapping_metadata_bhs *i_metadata_bhs; /* * File creation time. Its function is same as that of diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 9a8c225f2753..74f05bd0cdde 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -350,6 +350,21 @@ int __ext4_journal_get_create_access(const char *where, unsigned int line, return 0; } +static void ext4_inode_attach_mmb(struct inode *inode) +{ + struct mapping_metadata_bhs *mmb; + + /* + * It's difficult to handle failure when marking buffer dirty without + * leaving filesystem corrupyted + */ + mmb = kmalloc_obj(*mmb, GFP_KERNEL | __GFP_NOFAIL); + mmb_init(mmb, inode->i_mapping); + /* Someone swapped another mmb before us? */ + if (cmpxchg(&EXT4_I(inode)->i_metadata_bhs, NULL, mmb)) + kfree(mmb); +} + int __ext4_handle_dirty_metadata(const char *where, unsigned int line, handle_t *handle, struct inode *inode, struct buffer_head *bh) @@ -389,11 +404,14 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, err); } } else { - if (inode) + if (inode) { + if (!EXT4_I(inode)->i_metadata_bhs) + ext4_inode_attach_mmb(inode); mmb_mark_buffer_dirty(bh, - &EXT4_I(inode)->i_metadata_bhs); - else + EXT4_I(inode)->i_metadata_bhs); + } else { mark_buffer_dirty(bh); + } if (inode && inode_needs_sync(inode)) { sync_dirty_buffer(bh); if (buffer_req(bh) && !buffer_uptodate(bh)) { diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 924726dcc85f..e25d365e1179 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -46,6 +46,7 @@ static int ext4_sync_parent(struct inode *inode) { struct dentry *dentry, *next; + struct mapping_metadata_bhs *mmb; int ret = 0; if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) @@ -68,9 +69,12 @@ static int ext4_sync_parent(struct inode *inode) * through ext4_evict_inode()) and so we are safe to flush * metadata blocks and the inode. */ - ret = mmb_sync(&EXT4_I(inode)->i_metadata_bhs); - if (ret) - break; + mmb = READ_ONCE(EXT4_I(inode)->i_metadata_bhs); + if (mmb) { + ret = mmb_sync(mmb); + if (ret) + break; + } ret = sync_inode_metadata(inode, 1); if (ret) break; @@ -89,7 +93,7 @@ static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end, }; int ret; - ret = mmb_fsync_noflush(file, &EXT4_I(inode)->i_metadata_bhs, + ret = mmb_fsync_noflush(file, EXT4_I(inode)->i_metadata_bhs, start, end, datasync); if (ret) return ret; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c2c2d6ac7f3d..3e66e9510909 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -195,9 +195,8 @@ void ext4_evict_inode(struct inode *inode) ext4_warning_inode(inode, "data will be lost"); truncate_inode_pages_final(&inode->i_data); - /* Avoid mballoc special inode which has no proper iops */ - if (!EXT4_SB(inode->i_sb)->s_journal) - mmb_sync(&EXT4_I(inode)->i_metadata_bhs); + if (EXT4_I(inode)->i_metadata_bhs) + mmb_sync(EXT4_I(inode)->i_metadata_bhs); goto no_delete; } @@ -3451,6 +3450,7 @@ static bool ext4_release_folio(struct folio *folio, gfp_t wait) static bool ext4_inode_datasync_dirty(struct inode *inode) { journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; + struct mapping_metadata_bhs *mmb; if (journal) { if (jbd2_transaction_committed(journal, @@ -3461,8 +3461,9 @@ static bool ext4_inode_datasync_dirty(struct inode *inode) return true; } + mmb = READ_ONCE(EXT4_I(inode)->i_metadata_bhs); /* Any metadata buffers to write? */ - if (mmb_has_buffers(&EXT4_I(inode)->i_metadata_bhs)) + if (mmb && mmb_has_buffers(mmb)) return true; return inode_state_read_once(inode) & I_DIRTY_DATASYNC; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6a77db4d3124..92134ea4620c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1430,7 +1430,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); ext4_fc_init_inode(&ei->vfs_inode); spin_lock_init(&ei->i_fc_lock); - mmb_init(&ei->i_metadata_bhs, &ei->vfs_inode.i_data); + ei->i_metadata_bhs = NULL; return &ei->vfs_inode; } @@ -1527,8 +1527,10 @@ static void destroy_inodecache(void) void ext4_clear_inode(struct inode *inode) { ext4_fc_del(inode); - if (!EXT4_SB(inode->i_sb)->s_journal) - mmb_invalidate(&EXT4_I(inode)->i_metadata_bhs); + if (EXT4_I(inode)->i_metadata_bhs) { + mmb_invalidate(EXT4_I(inode)->i_metadata_bhs); + kfree(EXT4_I(inode)->i_metadata_bhs); + } clear_inode(inode); ext4_discard_preallocations(inode); /* -- 2.51.0 Currently metadata bh tracking does not track inode buffers because they are usually shared by several inodes and so our linked list tracking cannot be used. On fsync we call sync_inode_metadata() to write inode instead where filesystems' .write_inode methods detect data integrity writeback and take care to submit inode buffer to disk and wait for it in that case. This is however racy as for example flush worker can submit normal (WB_SYNC_NONE) inode writeback first, which makes the inode clean and copies the inode to the buffer but doesn't submit the buffer for IO. Thus sync_inode_metadata() call does nothing and we fail to persist inode buffer to disk on fsync(2). Fix the problem by allowing filesystem to set the number of block backing the inode in mmb structure and mmb_sync() then takes care to writeout corresponding buffer and wait for it. Signed-off-by: Jan Kara --- fs/buffer.c | 34 +++++++++++++++++++++++----------- include/linux/fs.h | 1 + 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index b0b3792b1496..dba29a45346b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -477,12 +477,14 @@ EXPORT_SYMBOL(mark_buffer_async_write); * using RCU, grab the lock, verify we didn't race with somebody detaching the * bh / moving it to different inode and only then proceeding. */ +#define INVALID_BLK (~0ULL) void mmb_init(struct mapping_metadata_bhs *mmb, struct address_space *mapping) { spin_lock_init(&mmb->lock); INIT_LIST_HEAD(&mmb->list); mmb->mapping = mapping; + mmb->inode_blk = INVALID_BLK; } EXPORT_SYMBOL(mmb_init); @@ -593,8 +595,18 @@ int mmb_sync(struct mapping_metadata_bhs *mmb) } } } - spin_unlock(&mmb->lock); + + /* Writeout inode buffer head */ + if (mmb->inode_blk != INVALID_BLK) { + bh = sb_find_get_block(mmb->mapping->host->i_sb, mmb->inode_blk); + write_dirty_buffer(bh, REQ_SYNC); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) + err = -EIO; + brelse(bh); + } + blk_finish_plug(&plug); spin_lock(&mmb->lock); @@ -646,18 +658,18 @@ int mmb_fsync_noflush(struct file *file, struct mapping_metadata_bhs *mmb, if (err) return err; - if (mmb) - ret = mmb_sync(mmb); if (!(inode_state_read_once(inode) & I_DIRTY_ALL)) - goto out; + goto sync_buffers; if (datasync && !(inode_state_read_once(inode) & I_DIRTY_DATASYNC)) - goto out; - - err = sync_inode_metadata(inode, 1); - if (ret == 0) - ret = err; - -out: + goto sync_buffers; + + ret = sync_inode_metadata(inode, 1); +sync_buffers: + if (mmb) { + err = mmb_sync(mmb); + if (ret == 0) + ret = err; + } /* check and advance again to catch errors after syncing out buffers */ err = file_check_and_advance_wb_err(file); if (ret == 0) diff --git a/include/linux/fs.h b/include/linux/fs.h index 11559c513dfb..435a41e4c90f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -446,6 +446,7 @@ extern const struct address_space_operations empty_aops; /* Structure for tracking metadata buffer heads associated with the mapping */ struct mapping_metadata_bhs { struct address_space *mapping; /* Mapping bhs are associated with */ + sector_t inode_blk; /* Number of block containing the inode */ spinlock_t lock; /* Lock protecting bh list */ struct list_head list; /* The list of bhs (b_assoc_buffers) */ }; -- 2.51.0 Use mmb inode buffer writeout infrastructure to reliably write out inode's inode table block on fsync(2). Signed-off-by: Jan Kara --- fs/ext2/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 74aca5eb572d..6ce832da944f 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1612,6 +1612,7 @@ static int __ext2_write_inode(struct inode *inode, int do_sync) } else for (n = 0; n < EXT2_N_BLOCKS; n++) raw_inode->i_block[n] = ei->i_data[n]; mark_buffer_dirty(bh); + ei->i_metadata_bhs.inode_blk = bh->b_blocknr; if (do_sync) { sync_dirty_buffer(bh); if (buffer_req(bh) && !buffer_uptodate(bh)) { @@ -1627,7 +1628,7 @@ static int __ext2_write_inode(struct inode *inode, int do_sync) int ext2_write_inode(struct inode *inode, struct writeback_control *wbc) { - return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); + return __ext2_write_inode(inode, 0); } int ext2_getattr(struct mnt_idmap *idmap, const struct path *path, -- 2.51.0 Use mmb inode buffer writeout infrastructure to reliably write out inode's block on fsync(2). Signed-off-by: Jan Kara --- fs/udf/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 67bcf83758c8..4102d3482319 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1707,7 +1707,7 @@ void udf_update_extra_perms(struct inode *inode, umode_t mode) int udf_write_inode(struct inode *inode, struct writeback_control *wbc) { - return udf_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); + return udf_update_inode(inode, 0); } static int udf_sync_inode(struct inode *inode) @@ -1937,6 +1937,7 @@ static int udf_update_inode(struct inode *inode, int do_sync) /* write the data blocks */ mark_buffer_dirty(bh); + iinfo->i_metadata_bhs.inode_blk = bh->b_blocknr; if (do_sync) { sync_dirty_buffer(bh); if (buffer_write_io_error(bh)) { -- 2.51.0 Use mmb inode buffer writeout infrastructure to reliably write out inode's buffer on fsync(2). Signed-off-by: Jan Kara --- fs/fat/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 28f78df086ef..4ca00b7a618b 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -907,6 +907,7 @@ static int __fat_write_inode(struct inode *inode, int wait) } spin_unlock(&sbi->inode_hash_lock); mark_buffer_dirty(bh); + MSDOS_I(inode)->i_metadata_bhs.inode_blk = bh->b_blocknr; err = 0; if (wait) err = sync_dirty_buffer(bh); @@ -925,7 +926,7 @@ static int fat_write_inode(struct inode *inode, struct writeback_control *wbc) err = fat_clusters_flush(sb); mutex_unlock(&MSDOS_SB(sb)->s_lock); } else - err = __fat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); + err = __fat_write_inode(inode, 0); return err; } -- 2.51.0 Use mmb inode buffer writeout infrastructure to reliably write out inode's buffer on fsync(2). Signed-off-by: Jan Kara --- fs/minix/inode.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 9c6bac248907..e3e05c9308bd 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -693,14 +693,7 @@ static int minix_write_inode(struct inode *inode, struct writeback_control *wbc) bh = V2_minix_update_inode(inode); if (!bh) return -EIO; - if (wbc->sync_mode == WB_SYNC_ALL && buffer_dirty(bh)) { - sync_dirty_buffer(bh); - if (buffer_req(bh) && !buffer_uptodate(bh)) { - printk("IO error syncing minix inode [%s:%08llx]\n", - inode->i_sb->s_id, inode->i_ino); - err = -EIO; - } - } + minix_i(inode)->i_metadata_bhs.inode_blk = bh->b_blocknr; brelse (bh); return err; } -- 2.51.0 Use mmb inode buffer writeout infrastructure to reliably write out inode's buffer on fsync(2). Signed-off-by: Jan Kara --- fs/bfs/inode.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 19e49c8cf750..16d351b2f122 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -165,11 +165,7 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1); mark_buffer_dirty(bh); - if (wbc->sync_mode == WB_SYNC_ALL) { - sync_dirty_buffer(bh); - if (buffer_req(bh) && !buffer_uptodate(bh)) - err = -EIO; - } + BFS_I(inode)->i_metadata_bhs.inode_blk = bh->b_blocknr; brelse(bh); mutex_unlock(&info->bfs_lock); return err; -- 2.51.0 Use mmb inode buffer writeout infrastructure to reliably write out inode's inode table block on fsync(2) in nojournal mode (from ext4_sync_parent() and ext4_fsync_nojournal()). This significantly simplifies the code as we don't have to explicitely handle inode buffer writeback in ext4_write_inode() and thus we can also remove sync_inode_metadata() calls from ext4_sync_parent() and ext4_write_inode() call from ext4_fsync_nojournal(). Signed-off-by: Jan Kara --- fs/ext4/ext4_jbd2.c | 2 +- fs/ext4/ext4_jbd2.h | 2 ++ fs/ext4/fsync.c | 12 ------------ fs/ext4/inode.c | 24 +++++------------------- 4 files changed, 8 insertions(+), 32 deletions(-) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 74f05bd0cdde..6bbaf72108fd 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -350,7 +350,7 @@ int __ext4_journal_get_create_access(const char *where, unsigned int line, return 0; } -static void ext4_inode_attach_mmb(struct inode *inode) +void ext4_inode_attach_mmb(struct inode *inode) { struct mapping_metadata_bhs *mmb; diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 63d17c5201b5..2a01b8279c88 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -122,6 +122,8 @@ #define EXT4_HT_EXT_CONVERT 11 #define EXT4_HT_MAX 12 +void ext4_inode_attach_mmb(struct inode *inode); + int ext4_mark_iloc_dirty(handle_t *handle, struct inode *inode, diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index e25d365e1179..af84489e57c6 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -75,9 +75,6 @@ static int ext4_sync_parent(struct inode *inode) if (ret) break; } - ret = sync_inode_metadata(inode, 1); - if (ret) - break; } dput(dentry); return ret; @@ -87,10 +84,6 @@ static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end, int datasync, bool *needs_barrier) { struct inode *inode = file->f_inode; - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = 0, - }; int ret; ret = mmb_fsync_noflush(file, EXT4_I(inode)->i_metadata_bhs, @@ -98,11 +91,6 @@ static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end, if (ret) return ret; - /* Force writeout of inode table buffer to disk */ - ret = ext4_write_inode(inode, &wbc); - if (ret) - return ret; - ret = ext4_sync_parent(inode); if (test_opt(inode->i_sb, BARRIER)) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3e66e9510909..09506b4de1b2 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5786,24 +5786,6 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) err = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal, EXT4_I(inode)->i_sync_tid); - } else { - struct ext4_iloc iloc; - - err = __ext4_get_inode_loc_noinmem(inode, &iloc); - if (err) - return err; - /* - * sync(2) will flush the whole buffer cache. No need to do - * it here separately for each inode. - */ - if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) - sync_dirty_buffer(iloc.bh); - if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { - ext4_error_inode_block(inode, iloc.bh->b_blocknr, EIO, - "IO error syncing inode"); - err = -EIO; - } - brelse(iloc.bh); } return err; } @@ -6348,7 +6330,11 @@ int ext4_mark_iloc_dirty(handle_t *handle, /* the do_update_inode consumes one bh->b_count */ get_bh(iloc->bh); - + if (!ext4_handle_valid(handle)) { + if (!EXT4_I(inode)->i_metadata_bhs) + ext4_inode_attach_mmb(inode); + EXT4_I(inode)->i_metadata_bhs->inode_blk = iloc->bh->b_blocknr; + } /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ err = ext4_do_update_inode(handle, inode, iloc); put_bh(iloc->bh); -- 2.51.0