User can change the number of writeback contexts with values 1 to num cpus using the new sysfs attribute echo > /sys/class/bdi/:/nwritebacks The sequence of operations when number of writebacks is changed : - fetch the superblock for a bdi - freezes the filesystem - iterate through inodes of the superblock and flush the pages - shutdown and free the writeback threads - allocate and register the wb threads - thaw the filesystem Suggested-by: Christoph Hellwig Signed-off-by: Kundan Kumar Signed-off-by: Anuj Gupta --- fs/super.c | 23 +++++++++ include/linux/backing-dev.h | 1 + include/linux/fs.h | 1 + mm/backing-dev.c | 93 +++++++++++++++++++++++++++++++++++++ mm/page-writeback.c | 8 ++++ 5 files changed, 126 insertions(+) diff --git a/fs/super.c b/fs/super.c index 7f876f32343a..19ae05880888 100644 --- a/fs/super.c +++ b/fs/super.c @@ -2072,6 +2072,29 @@ static inline bool may_unfreeze(struct super_block *sb, enum freeze_holder who, return false; } +struct super_block *freeze_bdi_super(struct backing_dev_info *bdi) +{ + struct super_block *sb_iter; + struct super_block *sb = NULL; + + spin_lock(&sb_lock); + list_for_each_entry(sb_iter, &super_blocks, s_list) { + if (sb_iter->s_bdi == bdi) { + sb = sb_iter; + break; + } + } + spin_unlock(&sb_lock); + + if (sb) { + atomic_inc(&sb->s_active); + freeze_super(sb, FREEZE_HOLDER_KERNEL, NULL); + } + + return sb; +} +EXPORT_SYMBOL(freeze_bdi_super); + /** * freeze_super - lock the filesystem and force it into a consistent state * @sb: the super to lock diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index fb042e593c16..14f53183b8d1 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -144,6 +144,7 @@ int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ra int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes); int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); +int bdi_set_nwritebacks(struct backing_dev_info *bdi, unsigned int nwritebacks); /* * Flags in backing_dev_info::capability diff --git a/include/linux/fs.h b/include/linux/fs.h index 5199b0d49fa5..c7ed1c0b79f9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2770,6 +2770,7 @@ extern int unregister_filesystem(struct file_system_type *); extern int vfs_statfs(const struct path *, struct kstatfs *); extern int user_statfs(const char __user *, struct kstatfs *); extern int fd_statfs(int, struct kstatfs *); +struct super_block *freeze_bdi_super(struct backing_dev_info *bdi); int freeze_super(struct super_block *super, enum freeze_holder who, const void *freeze_owner); int thaw_super(struct super_block *super, enum freeze_holder who, diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 2a8f3b683b2d..5bfb9bf3ce52 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -35,6 +35,17 @@ LIST_HEAD(bdi_list); /* bdi_wq serves all asynchronous writeback tasks */ struct workqueue_struct *bdi_wq; +static int cgwb_bdi_init(struct backing_dev_info *bdi); +static void cgwb_bdi_register(struct backing_dev_info *bdi, + struct bdi_writeback_ctx *bdi_wb_ctx); +static void cgwb_bdi_unregister(struct backing_dev_info *bdi, + struct bdi_writeback_ctx *bdi_wb_ctx); +static void wb_shutdown(struct bdi_writeback *wb); +static void wb_exit(struct bdi_writeback *wb); +static struct bdi_writeback_ctx **wb_ctx_alloc(struct backing_dev_info *bdi, + int num_ctxs); +static void wb_ctx_free(struct backing_dev_info *bdi); + #ifdef CONFIG_DEBUG_FS #include #include @@ -469,6 +480,87 @@ static ssize_t strict_limit_show(struct device *dev, } static DEVICE_ATTR_RW(strict_limit); +static ssize_t nwritebacks_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + unsigned int nwritebacks; + ssize_t ret; + struct super_block *sb = NULL; + struct bdi_writeback_ctx **wb_ctx; + struct bdi_writeback_ctx *bdi_wb_ctx; + struct inode *inode; + + ret = kstrtouint(buf, 10, &nwritebacks); + if (ret < 0) + return ret; + + if (nwritebacks < 1 || nwritebacks > num_online_cpus()) + return -EINVAL; + + if (nwritebacks == bdi->nr_wb_ctx) + return count; + + wb_ctx = wb_ctx_alloc(bdi, nwritebacks); + if (!wb_ctx) + return -ENOMEM; + + sb = freeze_bdi_super(bdi); + if (!sb) + return -EBUSY; + + spin_lock(&sb->s_inode_list_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + filemap_write_and_wait(inode->i_mapping); + truncate_inode_pages_final(inode->i_mapping); +#ifdef CONFIG_CGROUP_WRITEBACK + if (inode->i_wb) { + WARN_ON_ONCE(!(inode->i_state & I_CLEAR)); + wb_put(inode->i_wb); + inode->i_wb = NULL; + } +#endif + } + spin_unlock(&sb->s_inode_list_lock); + + for_each_bdi_wb_ctx(bdi, bdi_wb_ctx) { + wb_shutdown(&bdi_wb_ctx->wb); + cgwb_bdi_unregister(bdi, bdi_wb_ctx); + } + + for_each_bdi_wb_ctx(bdi, bdi_wb_ctx) { + WARN_ON_ONCE(test_bit(WB_registered, &bdi_wb_ctx->wb.state)); + wb_exit(&bdi_wb_ctx->wb); + kfree(bdi_wb_ctx); + } + kfree(bdi->wb_ctx); + + ret = bdi_set_nwritebacks(bdi, nwritebacks); + + bdi->wb_ctx = wb_ctx; + + cgwb_bdi_init(bdi); + for_each_bdi_wb_ctx(bdi, bdi_wb_ctx) { + cgwb_bdi_register(bdi, bdi_wb_ctx); + set_bit(WB_registered, &bdi_wb_ctx->wb.state); + } + + thaw_super(sb, FREEZE_HOLDER_KERNEL, NULL); + deactivate_super(sb); + + return ret; +} + +static ssize_t nwritebacks_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%d\n", bdi->nr_wb_ctx); +} +static DEVICE_ATTR_RW(nwritebacks); + static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, @@ -479,6 +571,7 @@ static struct attribute *bdi_dev_attrs[] = { &dev_attr_max_bytes.attr, &dev_attr_stable_pages_required.attr, &dev_attr_strict_limit.attr, + &dev_attr_nwritebacks.attr, NULL, }; ATTRIBUTE_GROUPS(bdi_dev); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 6f283a777da6..1a43022affdd 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -818,6 +818,14 @@ int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit return 0; } +int bdi_set_nwritebacks(struct backing_dev_info *bdi, unsigned int nwritebacks) +{ + spin_lock_bh(&bdi_lock); + bdi->nr_wb_ctx = nwritebacks; + spin_unlock_bh(&bdi_lock); + return 0; +} + static unsigned long dirty_freerun_ceiling(unsigned long thresh, unsigned long bg_thresh) { -- 2.25.1