ext4_do_writepages() calls ext4_destroy_inline_data() which acquires
xattr_sem while s_writepages_rwsem is held (read).  This creates a
circular lock dependency:

  CPU0                               CPU1
  ----                               ----
  ext4_writepages()
    ext4_writepages_down_read()
      [holds s_writepages_rwsem]
                                     ext4_evict_inode()
                                       __ext4_mark_inode_dirty()
                                         ext4_expand_extra_isize_ea()
                                           ext4_xattr_block_set()
                                             [holds xattr_sem]
                                             iput(old_bh inode)
                                               write_inode_now()
                                                 ext4_writepages()
                                                   ext4_writepages_down_read()
                                                   [BLOCKED on s_writepages_rwsem]
    ext4_do_writepages()
      ext4_destroy_inline_data()
        down_write(xattr_sem)
        [BLOCKED on xattr_sem]

Fix by temporarily dropping s_writepages_rwsem for the entire inline
data handling block, including the journal handle start/stop.  The
rwsem must be dropped before ext4_journal_start() -- not between
journal_start and journal_stop -- to avoid a secondary deadlock with
ext4_change_inode_journal_flag() which takes rwsem (write) and then
calls jbd2_journal_lock_updates() waiting for active handles to stop.

This is safe because:

 - This code runs before any block mapping or IO submission, so no
   writepages state depends on the rwsem being held at this point.

 - Inline data destruction is a one-way format transition (once cleared,
   EXT4_INODE_INLINE_DATA is never set again).  The rwsem is
   re-acquired after journal_stop, ensuring format stability for the
   remainder of writepages.

 - The can_map flag identifies the ext4_writepages() path (holds rwsem)
   vs ext4_normal_submit_inode_data_buffers() (does not), so the
   drop/reacquire is skipped when the rwsem is not held.

Also check the return value of ext4_destroy_inline_data() to avoid
proceeding with an inconsistent inode format on failure.

Reported-by: syzbot+bb2455d02bda0b5701e3@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=bb2455d02bda0b5701e3
Fixes: c8585c6fcaf2 ("ext4: fix races between changing inode journal mode and ext4_writepages")
Signed-off-by: Yun Zhou <yun.zhou@windriver.com>
---
v3: Drop s_writepages_rwsem before ext4_journal_start() and reacquire
    after ext4_journal_stop(), instead of dropping between journal_start
    and journal_stop as in v2.  This avoids two issues identified in v2
    review:
    - memalloc_nofs_restore() in ext4_writepages_up_read() would clear
      PF_MEMALLOC_NOFS while the jbd2 handle is active.
    - Reacquiring s_writepages_rwsem while holding a handle creates an
      ABBA deadlock with ext4_change_inode_journal_flag() which takes
      the rwsem (write) then calls jbd2_journal_lock_updates().

v2: Instead of moving inline data handling to ext4_writepages(),
    temporarily drop s_writepages_rwsem around ext4_destroy_inline_data()
    in ext4_do_writepages(). The move approach had a race where concurrent
    writes could create dirty pages with inline data after the early check,
    and unconditional destruction without dirty pages would lose data.

v1: Moved inline data cleanup from ext4_do_writepages() to
      ext4_writepages() before acquiring s_writepages_rwsem.

 fs/ext4/inode.c | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c2c2d6ac7f3d..cd7588a3fa45 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1694,6 +1694,9 @@ struct mpage_da_data {
 	struct writeback_control *wbc;
 	unsigned int can_map:1;	/* Can writepages call map blocks? */
 
+	/* Saved memalloc context from ext4_writepages_down_read() */
+	int alloc_ctx;
+
 	/* These are internal state of ext4_do_writepages() */
 	loff_t start_pos;	/* The start pos to write */
 	loff_t next_pos;	/* Current pos to examine */
@@ -2816,16 +2819,35 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
 	 * we'd better clear the inline data here.
 	 */
 	if (ext4_has_inline_data(inode)) {
-		/* Just inode will be modified... */
+		/*
+		 * Temporarily drop s_writepages_rwsem because
+		 * ext4_destroy_inline_data() acquires xattr_sem, which has
+		 * a higher lock ordering rank.  Holding both would create a
+		 * circular dependency with ext4_xattr_block_set() -> iput()
+		 * -> ext4_writepages() -> s_writepages_rwsem.
+		 *
+		 * Drop the rwsem before starting the journal handle to also
+		 * avoid a deadlock with ext4_change_inode_journal_flag(),
+		 * which takes rwsem (write) then jbd2_journal_lock_updates().
+		 */
+		if (mpd->can_map)
+			ext4_writepages_up_read(inode->i_sb, mpd->alloc_ctx);
 		handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
 		if (IS_ERR(handle)) {
+			if (mpd->can_map)
+				mpd->alloc_ctx =
+					ext4_writepages_down_read(inode->i_sb);
 			ret = PTR_ERR(handle);
 			goto out_writepages;
 		}
 		BUG_ON(ext4_test_inode_state(inode,
 				EXT4_STATE_MAY_INLINE_DATA));
-		ext4_destroy_inline_data(handle, inode);
+		ret = ext4_destroy_inline_data(handle, inode);
 		ext4_journal_stop(handle);
+		if (mpd->can_map)
+			mpd->alloc_ctx = ext4_writepages_down_read(inode->i_sb);
+		if (ret)
+			goto out_writepages;
 	}
 
 	/*
@@ -3032,13 +3054,12 @@ static int ext4_writepages(struct address_space *mapping,
 		.can_map = 1,
 	};
 	int ret;
-	int alloc_ctx;
 
 	ret = ext4_emergency_state(sb);
 	if (unlikely(ret))
 		return ret;
 
-	alloc_ctx = ext4_writepages_down_read(sb);
+	mpd.alloc_ctx = ext4_writepages_down_read(sb);
 	ret = ext4_do_writepages(&mpd);
 	/*
 	 * For data=journal writeback we could have come across pages marked
@@ -3047,7 +3068,7 @@ static int ext4_writepages(struct address_space *mapping,
 	 */
 	if (!ret && mpd.journalled_more_data)
 		ret = ext4_do_writepages(&mpd);
-	ext4_writepages_up_read(sb, alloc_ctx);
+	ext4_writepages_up_read(sb, mpd.alloc_ctx);
 
 	return ret;
 }
-- 
2.43.0