Move the bitmap modification, GDP update, and checksum update into a single group lock acquisition in ext4_mark_inode_used(), eliminating the race window where another thread could interleave a full recomputation between bitmap modification and checksum update. Add a fast_crc flag to select between incremental and full CRC update. When EXT4_BG_INODE_UNINIT is set, the stored checksum in the group descriptor is not a valid CRC of the bitmap -- mkfs leaves it as zero for UNINIT groups, and ext4_read_inode_bitmap() memsets the buffer to zero without updating the gdp checksum. So fast_crc is forced to false to fall back to ext4_inode_bitmap_csum_set() for a full recalculation that establishes a correct baseline. For non-UNINIT groups, ext4_inode_bitmap_csum_set_fast() computes the CRC delta for the single flipped bit in O(log N) time. Signed-off-by: Baokun Li --- fs/ext4/ialloc.c | 69 ++++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 90896b7f8c73..e209e27f827f 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -838,35 +838,37 @@ int ext4_mark_inode_used(struct super_block *sb, int ino, umode_t mode) goto out; } - ext4_set_bit(bit, inode_bitmap_bh->b_data); - - BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(NULL, NULL, inode_bitmap_bh); - if (err) { - ext4_std_error(sb, err); - goto out; - } - err = sync_dirty_buffer(inode_bitmap_bh); - if (err) { - ext4_std_error(sb, err); - goto out; - } - /* We may have to initialize the block bitmap if it isn't already */ err = ext4_might_init_block_bitmap(NULL, sb, group, gdp); if (err) goto out; + ext4_lock_group(sb, group); + /* Fast commit replay is single-threaded, no need for test_and_set */ + ext4_set_bit(bit, inode_bitmap_bh->b_data); + /* Update the relevant bg descriptor fields */ + ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); + if (S_ISDIR(mode)) { + ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1); + if (sbi->s_log_groups_per_flex) { + ext4_group_t f = ext4_flex_group(sbi, group); + + atomic_inc(&sbi_array_rcu_deref(sbi, s_flex_groups, + f)->used_dirs); + } + } + if (ext4_has_group_desc_csum(sb)) { - int free; + bool fast_crc = true; + int free = EXT4_INODES_PER_GROUP(sb) - + ext4_itable_unused_count(sb, gdp); - ext4_lock_group(sb, group); /* while we modify the bg desc */ - free = EXT4_INODES_PER_GROUP(sb) - - ext4_itable_unused_count(sb, gdp); if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); free = 0; + /* Incremental CRC needs a valid checksum baseline */ + fast_crc = false; } /* @@ -877,27 +879,26 @@ int ext4_mark_inode_used(struct super_block *sb, int ino, umode_t mode) if (bit >= free) ext4_itable_unused_set(sb, gdp, (EXT4_INODES_PER_GROUP(sb) - bit - 1)); - } else { - ext4_lock_group(sb, group); + if (fast_crc) + ext4_inode_bitmap_csum_set_fast(sb, gdp, bit); + else + ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh); + ext4_group_desc_csum_set(sb, group, gdp); } - ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); - if (S_ISDIR(mode)) { - ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1); - if (sbi->s_log_groups_per_flex) { - ext4_group_t f = ext4_flex_group(sbi, group); + ext4_unlock_group(sb, group); - atomic_inc(&sbi_array_rcu_deref(sbi, s_flex_groups, - f)->used_dirs); - } + BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(NULL, NULL, inode_bitmap_bh); + if (err) { + ext4_std_error(sb, err); + goto out; } - - if (ext4_has_group_desc_csum(sb)) { - ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh); - ext4_group_desc_csum_set(sb, group, gdp); + err = sync_dirty_buffer(inode_bitmap_bh); + if (err) { + ext4_std_error(sb, err); + goto out; } - - ext4_unlock_group(sb, group); err = ext4_handle_dirty_metadata(NULL, NULL, group_desc_bh); sync_dirty_buffer(group_desc_bh); out: -- 2.43.7