Currently metadata bh tracking does not track inode buffers because they are usually shared by several inodes and so our linked list tracking cannot be used. On fsync we call sync_inode_metadata() to write inode instead where filesystems' .write_inode methods detect data integrity writeback and take care to submit inode buffer to disk and wait for it in that case. This is however racy as for example flush worker can submit normal (WB_SYNC_NONE) inode writeback first, which makes the inode clean and copies the inode to the buffer but doesn't submit the buffer for IO. Thus sync_inode_metadata() call does nothing and we fail to persist inode buffer to disk on fsync(2). Fix the problem by allowing filesystem to set the number of block backing the inode in mmb structure and mmb_sync() then takes care to writeout corresponding buffer and wait for it. Signed-off-by: Jan Kara --- fs/buffer.c | 34 +++++++++++++++++++++++----------- include/linux/fs.h | 1 + 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index b0b3792b1496..dba29a45346b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -477,12 +477,14 @@ EXPORT_SYMBOL(mark_buffer_async_write); * using RCU, grab the lock, verify we didn't race with somebody detaching the * bh / moving it to different inode and only then proceeding. */ +#define INVALID_BLK (~0ULL) void mmb_init(struct mapping_metadata_bhs *mmb, struct address_space *mapping) { spin_lock_init(&mmb->lock); INIT_LIST_HEAD(&mmb->list); mmb->mapping = mapping; + mmb->inode_blk = INVALID_BLK; } EXPORT_SYMBOL(mmb_init); @@ -593,8 +595,18 @@ int mmb_sync(struct mapping_metadata_bhs *mmb) } } } - spin_unlock(&mmb->lock); + + /* Writeout inode buffer head */ + if (mmb->inode_blk != INVALID_BLK) { + bh = sb_find_get_block(mmb->mapping->host->i_sb, mmb->inode_blk); + write_dirty_buffer(bh, REQ_SYNC); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) + err = -EIO; + brelse(bh); + } + blk_finish_plug(&plug); spin_lock(&mmb->lock); @@ -646,18 +658,18 @@ int mmb_fsync_noflush(struct file *file, struct mapping_metadata_bhs *mmb, if (err) return err; - if (mmb) - ret = mmb_sync(mmb); if (!(inode_state_read_once(inode) & I_DIRTY_ALL)) - goto out; + goto sync_buffers; if (datasync && !(inode_state_read_once(inode) & I_DIRTY_DATASYNC)) - goto out; - - err = sync_inode_metadata(inode, 1); - if (ret == 0) - ret = err; - -out: + goto sync_buffers; + + ret = sync_inode_metadata(inode, 1); +sync_buffers: + if (mmb) { + err = mmb_sync(mmb); + if (ret == 0) + ret = err; + } /* check and advance again to catch errors after syncing out buffers */ err = file_check_and_advance_wb_err(file); if (ret == 0) diff --git a/include/linux/fs.h b/include/linux/fs.h index 11559c513dfb..435a41e4c90f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -446,6 +446,7 @@ extern const struct address_space_operations empty_aops; /* Structure for tracking metadata buffer heads associated with the mapping */ struct mapping_metadata_bhs { struct address_space *mapping; /* Mapping bhs are associated with */ + sector_t inode_blk; /* Number of block containing the inode */ spinlock_t lock; /* Lock protecting bh list */ struct list_head list; /* The list of bhs (b_assoc_buffers) */ }; -- 2.51.0