There's only single caller of fsync_buffers_list() so untangle the code a bit by folding fsync_buffers_list() into sync_mapping_buffers(). Also merge the comments and update them to reflect current state of code. Signed-off-by: Jan Kara --- fs/buffer.c | 180 +++++++++++++++++++++++----------------------------- 1 file changed, 80 insertions(+), 100 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 1c0e7c81a38b..18012afb8289 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -54,7 +54,6 @@ #include "internal.h" -static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, enum rw_hint hint, struct writeback_control *wbc); @@ -531,22 +530,96 @@ EXPORT_SYMBOL_GPL(inode_has_buffers); * @mapping: the mapping which wants those buffers written * * Starts I/O against the buffers at mapping->i_private_list, and waits upon - * that I/O. + * that I/O. Basically, this is a convenience function for fsync(). @mapping + * is a file or directory which needs those buffers to be written for a + * successful fsync(). * - * Basically, this is a convenience function for fsync(). - * @mapping is a file or directory which needs those buffers to be written for - * a successful fsync(). + * We have conflicting pressures: we want to make sure that all + * initially dirty buffers get waited on, but that any subsequently + * dirtied buffers don't. After all, we don't want fsync to last + * forever if somebody is actively writing to the file. + * + * Do this in two main stages: first we copy dirty buffers to a + * temporary inode list, queueing the writes as we go. Then we clean + * up, waiting for those writes to complete. mark_buffer_dirty_inode() + * doesn't touch b_assoc_buffers list if b_assoc_map is not NULL so we + * are sure the buffer stays on our list until IO completes (at which point + * it can be reaped). */ int sync_mapping_buffers(struct address_space *mapping) { struct address_space *buffer_mapping = mapping->host->i_sb->s_bdev->bd_mapping; + struct buffer_head *bh; + int err = 0; + struct blk_plug plug; + LIST_HEAD(tmp); if (list_empty(&mapping->i_private_list)) return 0; - return fsync_buffers_list(&buffer_mapping->i_private_lock, - &mapping->i_private_list); + blk_start_plug(&plug); + + spin_lock(&buffer_mapping->i_private_lock); + while (!list_empty(&mapping->i_private_list)) { + bh = BH_ENTRY(list->next); + WARN_ON_ONCE(bh->b_assoc_map != mapping); + __remove_assoc_queue(bh); + /* Avoid race with mark_buffer_dirty_inode() which does + * a lockless check and we rely on seeing the dirty bit */ + smp_mb(); + if (buffer_dirty(bh) || buffer_locked(bh)) { + list_add(&bh->b_assoc_buffers, &tmp); + bh->b_assoc_map = mapping; + if (buffer_dirty(bh)) { + get_bh(bh); + spin_unlock(&buffer_mapping->i_private_lock); + /* + * Ensure any pending I/O completes so that + * write_dirty_buffer() actually writes the + * current contents - it is a noop if I/O is + * still in flight on potentially older + * contents. + */ + write_dirty_buffer(bh, REQ_SYNC); + + /* + * Kick off IO for the previous mapping. Note + * that we will not run the very last mapping, + * wait_on_buffer() will do that for us + * through sync_buffer(). + */ + brelse(bh); + spin_lock(&buffer_mapping->i_private_lock); + } + } + } + + spin_unlock(&buffer_mapping->i_private_lock); + blk_finish_plug(&plug); + spin_lock(&buffer_mapping->i_private_lock); + + while (!list_empty(&tmp)) { + bh = BH_ENTRY(tmp.prev); + get_bh(bh); + __remove_assoc_queue(bh); + /* Avoid race with mark_buffer_dirty_inode() which does + * a lockless check and we rely on seeing the dirty bit */ + smp_mb(); + if (buffer_dirty(bh)) { + list_add(&bh->b_assoc_buffers, + &mapping->i_private_list); + bh->b_assoc_map = mapping; + } + spin_unlock(&buffer_mapping->i_private_lock); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) + err = -EIO; + brelse(bh); + spin_lock(&buffer_mapping->i_private_lock); + } + spin_unlock(&buffer_mapping->i_private_lock); + return err; } EXPORT_SYMBOL(sync_mapping_buffers); @@ -719,99 +792,6 @@ bool block_dirty_folio(struct address_space *mapping, struct folio *folio) } EXPORT_SYMBOL(block_dirty_folio); -/* - * Write out and wait upon a list of buffers. - * - * We have conflicting pressures: we want to make sure that all - * initially dirty buffers get waited on, but that any subsequently - * dirtied buffers don't. After all, we don't want fsync to last - * forever if somebody is actively writing to the file. - * - * Do this in two main stages: first we copy dirty buffers to a - * temporary inode list, queueing the writes as we go. Then we clean - * up, waiting for those writes to complete. - * - * During this second stage, any subsequent updates to the file may end - * up refiling the buffer on the original inode's dirty list again, so - * there is a chance we will end up with a buffer queued for write but - * not yet completed on that list. So, as a final cleanup we go through - * the osync code to catch these locked, dirty buffers without requeuing - * any newly dirty buffers for write. - */ -static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) -{ - struct buffer_head *bh; - struct address_space *mapping; - int err = 0; - struct blk_plug plug; - LIST_HEAD(tmp); - - blk_start_plug(&plug); - - spin_lock(lock); - while (!list_empty(list)) { - bh = BH_ENTRY(list->next); - mapping = bh->b_assoc_map; - __remove_assoc_queue(bh); - /* Avoid race with mark_buffer_dirty_inode() which does - * a lockless check and we rely on seeing the dirty bit */ - smp_mb(); - if (buffer_dirty(bh) || buffer_locked(bh)) { - list_add(&bh->b_assoc_buffers, &tmp); - bh->b_assoc_map = mapping; - if (buffer_dirty(bh)) { - get_bh(bh); - spin_unlock(lock); - /* - * Ensure any pending I/O completes so that - * write_dirty_buffer() actually writes the - * current contents - it is a noop if I/O is - * still in flight on potentially older - * contents. - */ - write_dirty_buffer(bh, REQ_SYNC); - - /* - * Kick off IO for the previous mapping. Note - * that we will not run the very last mapping, - * wait_on_buffer() will do that for us - * through sync_buffer(). - */ - brelse(bh); - spin_lock(lock); - } - } - } - - spin_unlock(lock); - blk_finish_plug(&plug); - spin_lock(lock); - - while (!list_empty(&tmp)) { - bh = BH_ENTRY(tmp.prev); - get_bh(bh); - mapping = bh->b_assoc_map; - __remove_assoc_queue(bh); - /* Avoid race with mark_buffer_dirty_inode() which does - * a lockless check and we rely on seeing the dirty bit */ - smp_mb(); - if (buffer_dirty(bh)) { - list_add(&bh->b_assoc_buffers, - &mapping->i_private_list); - bh->b_assoc_map = mapping; - } - spin_unlock(lock); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - err = -EIO; - brelse(bh); - spin_lock(lock); - } - - spin_unlock(lock); - return err; -} - /* * Invalidate any and all dirty buffers on a given inode. We are * probably unmounting the fs, but that doesn't mean we have already -- 2.51.0