Merge the bitmap modification and group descriptor update into a single group lock acquisition in __ext4_new_inode(). Previously the bitmap bit was set under one lock/unlock pair, and the GDP fields (UNINIT, itable_unused, free_inodes, dirs, csum) were updated under a separate lock/unlock pair with a gap in between. Another thread could modify the bitmap and update the checksum during that gap, making incremental CRC incorrect. Now the full sequence -- set bit, update free inodes, clear UNINIT, update itable_unused, and compute checksum -- happens atomically under the same ext4_lock_group(). The alloc_sem is acquired before the group lock to maintain correct locking order with itable lazyinit. Use ext4_inode_bitmap_csum_set_fast() for the normal path where the stored checksum is valid. When EXT4_BG_INODE_UNINIT is set, fall back to ext4_inode_bitmap_csum_set() for a full recalculation to establish a correct baseline (mkfs leaves the checksum as zero for UNINIT groups). Signed-off-by: Baokun Li --- fs/ext4/ialloc.c | 129 +++++++++++++++++++++++------------------------ 1 file changed, 63 insertions(+), 66 deletions(-) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 8b75b331b26e..9dd1cdb367ba 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1135,7 +1135,25 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap, ext4_std_error(sb, err); goto out; } + + BUFFER_TRACE(group_desc_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, sb, group_desc_bh, + EXT4_JTR_NONE); + if (err) { + ext4_std_error(sb, err); + goto out; + } + + /* We may have to initialize the block bitmap if it isn't already */ + err = ext4_might_init_block_bitmap(handle, sb, group, gdp); + if (err) + goto out; + + if (ext4_has_group_desc_csum(sb) && + !(sbi->s_mount_state & EXT4_FC_REPLAY)) + down_read(&grp->alloc_sem); ext4_lock_group(sb, group); + ret2 = ext4_test_and_set_bit(bit, inode_bitmap_bh->b_data); if (ret2) { /* Someone already took the bit. Repeat the search @@ -1147,9 +1165,54 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap, ret2 = 0; } else { ret2 = 1; /* we didn't grab the inode */ + goto unlock_group; + } + } + + /* Update the relevant bg descriptor fields */ + ext4_free_inodes_set(sb, gdp, + ext4_free_inodes_count(sb, gdp) - 1); + if (S_ISDIR(mode)) { + ext4_used_dirs_set(sb, gdp, + ext4_used_dirs_count(sb, gdp) + 1); + if (sbi->s_log_groups_per_flex) { + ext4_group_t f = ext4_flex_group(sbi, group); + atomic_inc(&sbi_array_rcu_deref(sbi, s_flex_groups, + f)->used_dirs); + } + } + + if (ext4_has_group_desc_csum(sb)) { + bool fast_crc = true; + int free = EXT4_INODES_PER_GROUP(sb) - + ext4_itable_unused_count(sb, gdp); + + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); + free = 0; + /* Incremental CRC needs a valid csum baseline */ + fast_crc = false; } + /* + * Check the relative inode number against the + * last used relative inode number in this group. + * If it is greater we need to update the + * bg_itable_unused count. + */ + if (bit >= free) + ext4_itable_unused_set(sb, gdp, + EXT4_INODES_PER_GROUP(sb) - bit - 1); + if (fast_crc) + ext4_inode_bitmap_csum_set_fast(sb, gdp, bit); + else + ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh); + ext4_group_desc_csum_set(sb, group, gdp); } +unlock_group: ext4_unlock_group(sb, group); + if (ext4_has_group_desc_csum(sb) && + !(sbi->s_mount_state & EXT4_FC_REPLAY)) + up_read(&grp->alloc_sem); if (!ret2) goto got; /* we grabbed the inode! */ @@ -1168,72 +1231,6 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap, goto out; } - BUFFER_TRACE(group_desc_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, sb, group_desc_bh, - EXT4_JTR_NONE); - if (err) { - ext4_std_error(sb, err); - goto out; - } - - /* We may have to initialize the block bitmap if it isn't already */ - err = ext4_might_init_block_bitmap(handle, sb, group, gdp); - if (err) - goto out; - - /* Update the relevant bg descriptor fields */ - if (ext4_has_group_desc_csum(sb)) { - int free; - struct ext4_group_info *grp = NULL; - - if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) { - grp = ext4_get_group_info(sb, group); - if (!grp) { - err = -EFSCORRUPTED; - goto out; - } - down_read(&grp->alloc_sem); /* - * protect vs itable - * lazyinit - */ - } - ext4_lock_group(sb, group); /* while we modify the bg desc */ - free = EXT4_INODES_PER_GROUP(sb) - - ext4_itable_unused_count(sb, gdp); - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); - free = 0; - } - /* - * Check the relative inode number against the last used - * relative inode number in this group. if it is greater - * we need to update the bg_itable_unused count - */ - if (bit >= free) - ext4_itable_unused_set(sb, gdp, - (EXT4_INODES_PER_GROUP(sb) - bit - 1)); - if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) - up_read(&grp->alloc_sem); - } else { - ext4_lock_group(sb, group); - } - - ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); - if (S_ISDIR(mode)) { - ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1); - if (sbi->s_log_groups_per_flex) { - ext4_group_t f = ext4_flex_group(sbi, group); - - atomic_inc(&sbi_array_rcu_deref(sbi, s_flex_groups, - f)->used_dirs); - } - } - if (ext4_has_group_desc_csum(sb)) { - ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh); - ext4_group_desc_csum_set(sb, group, gdp); - } - ext4_unlock_group(sb, group); - BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); if (err) { -- 2.43.7