btrfs_init_new_device() opens and claims the new device on a live superblock without holding the write count, so a bdev_freeze() racing the window between the claim being published and the device becoming a member could freeze the filesystem through a claim the add may still abort and tear down. Add btrfs_open_device_deny_freeze(): it opens the device once non-exclusively to take the freeze deny, then claims it by the same dev_t, so the holder is only ever published while the device is already unfreezable. Keep it denied until the add is durable: bdev_allow_freeze() on each success return (the device is now a committed member), btrfs_release_device_allow_freeze() on the error unwind. The deny spans the whole add, including the seeding tail whose late failures still release the device. A device already frozen when the add starts is refused with -EBUSY. Signed-off-by: Christian Brauner (Amutable) --- fs/btrfs/volumes.c | 45 ++++++++++++++++++++++++++++++++++++++++----- fs/btrfs/volumes.h | 2 ++ 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 36f9835f65e3..4558e018b53b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2822,6 +2822,36 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans) return 0; } +/* + * Open @path for @sb with freezing denied before the holder claim is published, + * so a racing bdev_freeze() can never reach a claim a device add or replace may + * still abort. The deny is taken on a throwaway non-holder probe open, then the + * holder is opened by the probe's dev_t. Balanced by the caller. + */ +struct file *btrfs_open_device_deny_freeze(const char *path, + struct super_block *sb) +{ + struct file *probe_file, *bdev_file; + int ret; + + probe_file = bdev_file_open_by_path(path, BLK_OPEN_READ, NULL, NULL); + if (IS_ERR(probe_file)) + return probe_file; + + ret = bdev_deny_freeze(file_bdev(probe_file)); + if (ret) { + bdev_fput(probe_file); + return ERR_PTR(ret); + } + + bdev_file = bdev_file_open_by_dev(file_bdev(probe_file)->bd_dev, + BLK_OPEN_WRITE, sb, &fs_holder_ops); + if (IS_ERR(bdev_file)) + bdev_allow_freeze(file_bdev(probe_file)); + bdev_fput(probe_file); + return bdev_file; +} + int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path) { struct btrfs_root *root = fs_info->dev_root; @@ -2840,8 +2870,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path if (sb_rdonly(sb) && !fs_devices->seeding) return -EROFS; - bdev_file = bdev_file_open_by_path(device_path, BLK_OPEN_WRITE, - fs_info->sb, &fs_holder_ops); + /* Forbid freezing until the device is a committed member (or unwound). */ + bdev_file = btrfs_open_device_deny_freeze(device_path, fs_info->sb); if (IS_ERR(bdev_file)) return PTR_ERR(bdev_file); @@ -3006,8 +3036,10 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path up_write(&sb->s_umount); locked = false; - if (ret) /* transaction commit */ + if (ret) { /* transaction commit */ + bdev_allow_freeze(file_bdev(bdev_file)); return ret; + } ret = btrfs_relocate_sys_chunks(fs_info); if (ret < 0) @@ -3015,8 +3047,10 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path "Failed to relocate sys chunks after device initialization. This can be fixed using the \"btrfs balance\" command."); trans = btrfs_attach_transaction(root); if (IS_ERR(trans)) { - if (PTR_ERR(trans) == -ENOENT) + if (PTR_ERR(trans) == -ENOENT) { + bdev_allow_freeze(file_bdev(bdev_file)); return 0; + } ret = PTR_ERR(trans); trans = NULL; goto error_sysfs; @@ -3036,6 +3070,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path /* Update ctime/mtime for blkid or udev */ update_dev_time(device_path); + bdev_allow_freeze(file_bdev(bdev_file)); return ret; error_sysfs: @@ -3065,7 +3100,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path error_free_device: btrfs_free_device(device); error: - bdev_fput(bdev_file); + btrfs_release_device_allow_freeze(bdev_file); if (locked) { mutex_unlock(&uuid_mutex); up_write(&sb->s_umount); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 60e82c15881a..75c7963f5d4c 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -769,6 +769,8 @@ struct btrfs_device *btrfs_find_device(const struct btrfs_fs_devices *fs_devices const struct btrfs_dev_lookup_args *args); int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path); +struct file *btrfs_open_device_deny_freeze(const char *path, + struct super_block *sb); int btrfs_balance(struct btrfs_fs_info *fs_info, struct btrfs_balance_control *bctl, struct btrfs_ioctl_balance_args *bargs); -- 2.47.3