Add multiple KUnit tests to test various permutations of extent splitting and conversion. We test the following cases: 1. Split of unwritten extent into 2 parts and convert 1 part to written 2. Split of unwritten extent into 3 parts and convert 1 part to written 3. Split of unwritten extent into 2 unwritten extents 4. Split of unwritten extent into 3 unwritten extents 5. Split of written extent into 2 parts and convert 1 part to unwritten 6. Split of written extent into 3 parts and convert 1 part to unwritten 7. Zeroout fallback for all the above cases except 5-6 because zeroout is not supported for written to unwritten splits The main function we test here is ext4_split_convert_extents(). Currently some of the tests are failing due to issues in implementation. All failures are mitigated at other layers in ext4 [1] but still point out the mismatch in expectation of what the caller wants vs what the function does. The aim is to eventually fix all the failures we see here. More detailed implementation notes can be found in the topmost commit in the test file. [1] for example, EXT4_GET_BLOCKS_CONVERT doesn't really convert the split extent to written, but rather the callers end up doing the conversion. Signed-off-by: Ojaswin Mujoo --- fs/ext4/extents-test.c | 565 +++++++++++++++++++++++++++++++++++++++ fs/ext4/extents.c | 22 +- fs/ext4/extents_status.c | 3 + fs/ext4/inode.c | 4 + 4 files changed, 592 insertions(+), 2 deletions(-) create mode 100644 fs/ext4/extents-test.c diff --git a/fs/ext4/extents-test.c b/fs/ext4/extents-test.c new file mode 100644 index 000000000000..937810a0f264 --- /dev/null +++ b/fs/ext4/extents-test.c @@ -0,0 +1,565 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Written by Ojaswin Mujoo (IBM) + * + * These Kunit tests are designed to test the functionality of + * extent split and conversion in ext4. + * + * Currently, ext4 can split extents in 2 ways: + * 1. By splitting the extents in the extent tree and optionally converting them + * to written or unwritten based on flags passed. + * 2. In case 1 encounters an error, ext4 instead zerooes out the unwritten + * areas of the extent and marks the complete extent written. + * + * The primary function that handles this is ext4_split_convert_extents(). + * + * We test both of the methods of split. The behavior we try to enforce is: + * 1. When passing EXT4_GET_BLOCKS_CONVERT flag to ext4_split_convert_extents(), + * the split extent should be converted to initialized. + * 2. When passing EXT4_GET_BLOCKS_CONVERT_UNWRITTEN flag to + * ext4_split_convert_extents(), the split extent should be converted to + * uninitialized. + * 3. In case we use the zeroout method, then we should correctly write zeroes + * to the unwritten areas of the extent and we should not corrupt/leak any + * data. + * + * Enforcing 1 and 2 is straight forward, we just setup a minimal inode with + * extent tree, call ext4_split_convert_extents() and check the final state of + * the extent tree. + * + * For zeroout testing, we maintain a separate buffer which represents the disk + * data corresponding to the extents. We then override ext4's zeroout functions + * to instead write zeroes to our buffer. Then, we override + * ext4_ext_insert_extent() to return -ENOSPC, which triggers the zeroout. + * Finally, we check the state of the extent tree and zeroout buffer to confirm + * everything went well. + */ + +#include +#include +#include +#include + +#include "ext4.h" +#include "ext4_extents.h" + +#define EX_DATA_PBLK 100 +#define EX_DATA_LBLK 10 +#define EX_DATA_LEN 3 + +struct kunit_ctx { + /* + * Ext4 inode which has only 1 unwrit extent + */ + struct ext4_inode_info *k_ei; + /* + * Represents the underlying data area (used for zeroout testing) + */ + char *k_data; +} k_ctx; + +/* + * describes the state of an expected extent in extent tree. + */ +struct kunit_ext_state { + ext4_lblk_t ex_lblk; + ext4_lblk_t ex_len; + bool is_unwrit; +}; + +/* + * describes the state of the data area of a writ extent. Used for testing + * correctness of zeroout. + */ +struct kunit_ext_data_state { + char exp_char; + ext4_lblk_t off_blk; + ext4_lblk_t len_blk; +}; + +struct kunit_ext_test_param { + /* description of test */ + char *desc; + + /* is extent unwrit at beginning of test */ + bool is_unwrit_at_start; + + /* flags to pass while splitting */ + int split_flags; + + /* map describing range to split */ + struct ext4_map_blocks split_map; + + /* no of extents expected after split */ + int nr_exp_ext; + + /* + * expected state of extents after split. We will never split into more + * than 3 extents + */ + struct kunit_ext_state exp_ext_state[3]; + + /* Below fields used for zeroout tests */ + + bool is_zeroout_test; + /* + * no of expected data segments (zeroout tests). Example, if we expect + * data to be 4kb 0s, followed by 8kb non-zero, then nr_exp_data_segs==2 + */ + int nr_exp_data_segs; + + /* + * expected state of data area after zeroout. + */ + struct kunit_ext_data_state exp_data_state[3]; +}; + +static void ext_kill_sb(struct super_block *sb) +{ + generic_shutdown_super(sb); +} + +static int ext_set(struct super_block *sb, void *data) +{ + return 0; +} + +static struct file_system_type ext_fs_type = { + .name = "extents test", + .kill_sb = ext_kill_sb, +}; + +static void extents_kunit_exit(struct kunit *test) +{ + kfree(k_ctx.k_ei); + kfree(k_ctx.k_data); +} + +static void ext4_cache_extents_stub(struct inode *inode, + struct ext4_extent_header *eh) +{ + return; +} + +static int __ext4_ext_dirty_stub(const char *where, unsigned int line, + handle_t *handle, struct inode *inode, + struct ext4_ext_path *path) +{ + return 0; +} + +static struct ext4_ext_path * +ext4_ext_insert_extent_stub(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *newext, int gb_flags) +{ + return ERR_PTR(-ENOSPC); +} + +static void ext4_es_remove_extent_stub(struct inode *inode, ext4_lblk_t lblk, + ext4_lblk_t len) +{ + return; +} + +static void ext4_zeroout_es_stub(struct inode *inode, struct ext4_extent *ex) +{ + return; +} + +/* + * We will zeroout the equivalent range in the data area + */ +static int ext4_ext_zeroout_stub(struct inode *inode, struct ext4_extent *ex) +{ + ext4_lblk_t ee_block, off_blk; + loff_t ee_len; + loff_t off_bytes; + struct kunit *test = kunit_get_current_test(); + + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); + + KUNIT_EXPECT_EQ_MSG(test, 1, ee_block >= EX_DATA_LBLK, "ee_block=%d", + ee_block); + KUNIT_EXPECT_EQ(test, 1, + ee_block + ee_len <= EX_DATA_LBLK + EX_DATA_LEN); + + off_blk = ee_block - EX_DATA_LBLK; + off_bytes = off_blk << inode->i_sb->s_blocksize_bits; + memset(k_ctx.k_data + off_bytes, 0, + ee_len << inode->i_sb->s_blocksize_bits); + + return 0; +} + +static int ext4_issue_zeroout_stub(struct inode *inode, ext4_lblk_t lblk, + ext4_fsblk_t pblk, ext4_lblk_t len) +{ + ext4_lblk_t off_blk; + loff_t off_bytes; + struct kunit *test = kunit_get_current_test(); + + kunit_log(KERN_ALERT, test, + "%s: lblk=%u pblk=%llu len=%u", __func__, lblk, pblk, len); + KUNIT_EXPECT_EQ(test, 1, lblk >= EX_DATA_LBLK); + KUNIT_EXPECT_EQ(test, 1, lblk + len <= EX_DATA_LBLK + EX_DATA_LEN); + KUNIT_EXPECT_EQ(test, 1, lblk - EX_DATA_LBLK == pblk - EX_DATA_PBLK); + + off_blk = lblk - EX_DATA_LBLK; + off_bytes = off_blk << inode->i_sb->s_blocksize_bits; + memset(k_ctx.k_data + off_bytes, 0, + len << inode->i_sb->s_blocksize_bits); + + return 0; +} + +static int extents_kunit_init(struct kunit *test) +{ + struct ext4_extent_header *eh = NULL; + struct ext4_inode_info *ei; + struct inode *inode; + struct super_block *sb; + struct kunit_ext_test_param *param = + (struct kunit_ext_test_param *)(test->param_value); + + /* setup the mock inode */ + k_ctx.k_ei = kzalloc(sizeof(struct ext4_inode_info), GFP_KERNEL); + if (k_ctx.k_ei == NULL) + return -ENOMEM; + ei = k_ctx.k_ei; + inode = &ei->vfs_inode; + + sb = sget(&ext_fs_type, NULL, ext_set, 0, NULL); + if (IS_ERR(sb)) + return PTR_ERR(sb); + + sb->s_blocksize = 4096; + sb->s_blocksize_bits = 12; + + ei->i_disksize = (EX_DATA_LBLK + EX_DATA_LEN + 10) << sb->s_blocksize_bits; + inode->i_sb = sb; + + k_ctx.k_data = kzalloc(EX_DATA_LEN * 4096, GFP_KERNEL); + if (k_ctx.k_data == NULL) + return -ENOMEM; + + /* + * set the data area to a junk value + */ + memset(k_ctx.k_data, 'X', EX_DATA_LEN * 4096); + + /* create a tree with depth 0 */ + eh = (struct ext4_extent_header *)k_ctx.k_ei->i_data; + + /* Fill extent header */ + eh = ext_inode_hdr(&k_ctx.k_ei->vfs_inode); + eh->eh_depth = 0; + eh->eh_entries = cpu_to_le16(1); + eh->eh_magic = EXT4_EXT_MAGIC; + eh->eh_max = + cpu_to_le16(ext4_ext_space_root_idx(&k_ctx.k_ei->vfs_inode, 0)); + eh->eh_generation = 0; + + /* + * add 1 extent in leaf node covering lblks [10,13) and pblk [100,103) + */ + EXT_FIRST_EXTENT(eh)->ee_block = cpu_to_le32(EX_DATA_LBLK); + EXT_FIRST_EXTENT(eh)->ee_len = cpu_to_le16(EX_DATA_LEN); + ext4_ext_store_pblock(EXT_FIRST_EXTENT(eh), EX_DATA_PBLK); + if (!param || param->is_unwrit_at_start) + ext4_ext_mark_unwritten(EXT_FIRST_EXTENT(eh)); + + /* Add stubs */ + kunit_activate_static_stub(test, ext4_cache_extents, + ext4_cache_extents_stub); + kunit_activate_static_stub(test, __ext4_ext_dirty, + __ext4_ext_dirty_stub); + kunit_activate_static_stub(test, ext4_es_remove_extent, + ext4_es_remove_extent_stub); + kunit_activate_static_stub(test, ext4_zeroout_es, ext4_zeroout_es_stub); + kunit_activate_static_stub(test, ext4_ext_zeroout, ext4_ext_zeroout_stub); + kunit_activate_static_stub(test, ext4_issue_zeroout, + ext4_issue_zeroout_stub); + return 0; +} + +/* + * Return 1 if all bytes in the buf equal to c, else return the offset of first mismatch + */ +static int check_buffer(char *buf, int c, int size) +{ + void *ret = NULL; + + ret = memchr_inv(buf, c, size); + if (ret == NULL) + return 0; + + kunit_log(KERN_ALERT, kunit_get_current_test(), + "# %s: wrong char found at offset %ld (expected:%d got:%d)", __func__, + ((char *)ret - buf), c, *((char *)ret)); + return 1; +} + +static void test_split_convert(struct kunit *test) +{ + struct ext4_ext_path *path; + struct inode *inode = &k_ctx.k_ei->vfs_inode; + struct ext4_extent *ex; + struct ext4_map_blocks map; + const struct kunit_ext_test_param *param = + (const struct kunit_ext_test_param *)(test->param_value); + int blkbits = inode->i_sb->s_blocksize_bits; + + if (param->is_zeroout_test) + /* + * Force zeroout by making ext4_ext_insert_extent return ENOSPC + */ + kunit_activate_static_stub(test, ext4_ext_insert_extent, + ext4_ext_insert_extent_stub); + + path = ext4_find_extent(inode, EX_DATA_LBLK, NULL, 0); + ex = path->p_ext; + KUNIT_EXPECT_EQ(test, 10, ex->ee_block); + KUNIT_EXPECT_EQ(test, 3, ext4_ext_get_actual_len(ex)); + KUNIT_EXPECT_EQ(test, param->is_unwrit_at_start, ext4_ext_is_unwritten(ex)); + if (param->is_zeroout_test) + KUNIT_EXPECT_EQ(test, 0, + check_buffer(k_ctx.k_data, 'X', + EX_DATA_LEN << blkbits)); + + map.m_lblk = param->split_map.m_lblk; + map.m_len = param->split_map.m_len; + ext4_split_convert_extents(NULL, inode, &map, path, + param->split_flags, NULL); + + path = ext4_find_extent(inode, EX_DATA_LBLK, NULL, 0); + ex = path->p_ext; + + for (int i = 0; i < param->nr_exp_ext; i++) { + struct kunit_ext_state exp_ext = param->exp_ext_state[i]; + + KUNIT_EXPECT_EQ(test, exp_ext.ex_lblk, ex->ee_block); + KUNIT_EXPECT_EQ(test, exp_ext.ex_len, ext4_ext_get_actual_len(ex)); + KUNIT_EXPECT_EQ_MSG( + test, exp_ext.is_unwrit, ext4_ext_is_unwritten(ex), + "# exp: lblk:%d len:%d unwrit:%d, got: lblk:%d len:%d unwrit:%d\n", + exp_ext.ex_lblk, exp_ext.ex_len, exp_ext.is_unwrit, + ex->ee_block, ext4_ext_get_actual_len(ex), ext4_ext_is_unwritten(ex)); + + ex = ex + 1; + } + + if (!param->is_zeroout_test) + return; + + /* + * Check that then data area has been zeroed out correctly + */ + for (int i = 0; i < param->nr_exp_data_segs; i++) { + loff_t off, len; + struct kunit_ext_data_state exp_data_seg = param->exp_data_state[i]; + + off = exp_data_seg.off_blk << blkbits; + len = exp_data_seg.len_blk << blkbits; + KUNIT_EXPECT_EQ_MSG(test, 0, + check_buffer(k_ctx.k_data + off, + exp_data_seg.exp_char, len), + "# corruption in byte range [%lld, %lld)", + off, len); + } + + return; +} + +static const struct kunit_ext_test_param test_split_convert_params[] = { + /* unwrit to writ splits */ + { .desc = "split unwrit extent to 2 extents and convert 1st half writ", + .is_unwrit_at_start = 1, + .split_flags = EXT4_GET_BLOCKS_CONVERT, + .split_map = { .m_lblk = 10, .m_len = 1 }, + .nr_exp_ext = 2, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 0 }, + { .ex_lblk = 11, .ex_len = 2, .is_unwrit = 1 } }, + .is_zeroout_test = 0 }, + { .desc = "split unwrit extent to 2 extents and convert 2nd half writ", + .is_unwrit_at_start = 1, + .split_flags = EXT4_GET_BLOCKS_CONVERT, + .split_map = { .m_lblk = 11, .m_len = 2 }, + .nr_exp_ext = 2, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 1 }, + { .ex_lblk = 11, .ex_len = 2, .is_unwrit = 0 } }, + .is_zeroout_test = 0 }, + { .desc = "split unwrit extent to 3 extents and convert 2nd half to writ", + .is_unwrit_at_start = 1, + .split_flags = EXT4_GET_BLOCKS_CONVERT, + .split_map = { .m_lblk = 11, .m_len = 1 }, + .nr_exp_ext = 3, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 1 }, + { .ex_lblk = 11, .ex_len = 1, .is_unwrit = 0 }, + { .ex_lblk = 12, .ex_len = 1, .is_unwrit = 1 } }, + .is_zeroout_test = 0 }, + + /* unwrit to unwrit splits */ + { .desc = "split unwrit extent to 2 unwrit extents", + .is_unwrit_at_start = 1, + .split_flags = EXT4_GET_BLOCKS_UNWRIT_EXT, + .split_map = { .m_lblk = 10, .m_len = 1 }, + .nr_exp_ext = 2, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 1 }, + { .ex_lblk = 11, .ex_len = 2, .is_unwrit = 1 } }, + .is_zeroout_test = 0 }, + { .desc = "split unwrit extent to 2 extents (2)", + .is_unwrit_at_start = 1, + .split_flags = EXT4_GET_BLOCKS_UNWRIT_EXT, + .split_map = { .m_lblk = 11, .m_len = 2 }, + .nr_exp_ext = 2, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 1 }, + { .ex_lblk = 11, .ex_len = 2, .is_unwrit = 1 } }, + .is_zeroout_test = 0 }, + { .desc = "split unwrit extent to 3 unwrit extents", + .is_unwrit_at_start = 1, + .split_flags = EXT4_GET_BLOCKS_UNWRIT_EXT, + .split_map = { .m_lblk = 11, .m_len = 1 }, + .nr_exp_ext = 3, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 1 }, + { .ex_lblk = 11, .ex_len = 1, .is_unwrit = 1 }, + { .ex_lblk = 12, .ex_len = 1, .is_unwrit = 1 } }, + .is_zeroout_test = 0 }, + + /* writ to unwrit splits */ + { .desc = "split writ extent to 2 extents and convert 1st half unwrit", + .is_unwrit_at_start = 0, + .split_flags = EXT4_GET_BLOCKS_CONVERT_UNWRITTEN, + .split_map = { .m_lblk = 10, .m_len = 1 }, + .nr_exp_ext = 2, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 1 }, + { .ex_lblk = 11, .ex_len = 2, .is_unwrit = 0 } }, + .is_zeroout_test = 0 }, + { .desc = "split writ extent to 2 extents and convert 2nd half unwrit", + .is_unwrit_at_start = 0, + .split_flags = EXT4_GET_BLOCKS_CONVERT_UNWRITTEN, + .split_map = { .m_lblk = 11, .m_len = 2 }, + .nr_exp_ext = 2, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 0 }, + { .ex_lblk = 11, .ex_len = 2, .is_unwrit = 1 } }, + .is_zeroout_test = 0 }, + { .desc = "split writ extent to 3 extents and convert 2nd half to unwrit", + .is_unwrit_at_start = 0, + .split_flags = EXT4_GET_BLOCKS_CONVERT_UNWRITTEN, + .split_map = { .m_lblk = 11, .m_len = 1 }, + .nr_exp_ext = 3, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 0 }, + { .ex_lblk = 11, .ex_len = 1, .is_unwrit = 1 }, + { .ex_lblk = 12, .ex_len = 1, .is_unwrit = 0 } }, + .is_zeroout_test = 0 }, + + /* + * ***** zeroout tests ***** + */ + /* unwrit to writ splits */ + { .desc = "split unwrit extent to 2 extents and convert 1st half writ (zeroout)", + .is_unwrit_at_start = 1, + .split_flags = EXT4_GET_BLOCKS_CONVERT, + .split_map = { .m_lblk = 10, .m_len = 1 }, + .nr_exp_ext = 1, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 3, .is_unwrit = 0 } }, + .is_zeroout_test = 1, + .nr_exp_data_segs = 2, + /* 1 block of data followed by 2 blocks of zeroes */ + .exp_data_state = { { .exp_char = 'X', .off_blk = 0, .len_blk = 1 }, + { .exp_char = 0, .off_blk = 1, .len_blk = 2 } } }, + { .desc = "split unwrit extent to 2 extents and convert 2nd half writ (zeroout)", + .is_unwrit_at_start = 1, + .split_flags = EXT4_GET_BLOCKS_CONVERT, + .split_map = { .m_lblk = 11, .m_len = 2 }, + .nr_exp_ext = 1, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 3, .is_unwrit = 0 } }, + .is_zeroout_test = 1, + .nr_exp_data_segs = 2, + /* 1 block of zeroes followed by 2 blocks of data */ + .exp_data_state = { { .exp_char = 0, .off_blk = 0, .len_blk = 1 }, + { .exp_char = 'X', .off_blk = 1, .len_blk = 2 } } }, + { .desc = "split unwrit extent to 3 extents and convert 2nd half writ (zeroout)", + .is_unwrit_at_start = 1, + .split_flags = EXT4_GET_BLOCKS_CONVERT, + .split_map = { .m_lblk = 11, .m_len = 1 }, + .nr_exp_ext = 1, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 3, .is_unwrit = 0 } }, + .is_zeroout_test = 1, + .nr_exp_data_segs = 3, + /* [zeroes] [data] [zeroes] */ + .exp_data_state = { { .exp_char = 0, .off_blk = 0, .len_blk = 1 }, + { .exp_char = 'X', .off_blk = 1, .len_blk = 1 }, + { .exp_char = 0, .off_blk = 2, .len_blk = 1 } } }, + + /* unwrit to unwrit splits */ + { .desc = "split unwrit extent to 2 unwrit extents (zeroout)", + .is_unwrit_at_start = 1, + .split_flags = EXT4_GET_BLOCKS_UNWRIT_EXT, + .split_map = { .m_lblk = 10, .m_len = 1 }, + .nr_exp_ext = 1, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 3, .is_unwrit = 0 } }, + .is_zeroout_test = 1, + .nr_exp_data_segs = 1, + .exp_data_state = { { .exp_char = 0, .off_blk = 0, .len_blk = 3 } } }, + { .desc = "split unwrit extent to 2 unwrit extents (2) (zeroout)", + .is_unwrit_at_start = 1, + .split_flags = EXT4_GET_BLOCKS_UNWRIT_EXT, + .split_map = { .m_lblk = 11, .m_len = 2 }, + .nr_exp_ext = 1, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 3, .is_unwrit = 0 } }, + .is_zeroout_test = 1, + .nr_exp_data_segs = 1, + .exp_data_state = { { .exp_char = 0, .off_blk = 0, .len_blk = 3 } } }, + { .desc = "split unwrit extent to 3 unwrit extents (zeroout)", + .is_unwrit_at_start = 1, + .split_flags = EXT4_GET_BLOCKS_UNWRIT_EXT, + .split_map = { .m_lblk = 11, .m_len = 1 }, + .nr_exp_ext = 1, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 3, .is_unwrit = 0 } }, + .is_zeroout_test = 1, + .nr_exp_data_segs = 1, + .exp_data_state = { { .exp_char = 0, .off_blk = 0, .len_blk = 3 } } }, +}; + +static void ext_get_desc(struct kunit *test, const void *p, char *desc) + +{ + struct kunit_ext_test_param *param = (struct kunit_ext_test_param *)p; + + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s\n", param->desc); +} + +static int test_split_convert_param_init(struct kunit *test) +{ + size_t arr_size = ARRAY_SIZE(test_split_convert_params); + + kunit_register_params_array(test, test_split_convert_params, arr_size, + ext_get_desc); + return 0; +} + +/* + * Note that we use KUNIT_CASE_PARAM_WITH_INIT() instead of the more compact + * KUNIT_ARRAY_PARAM() because the later currently has a limitation causing the + * output parsing to be prone to error. For more context: + * + * https://lore.kernel.org/linux-kselftest/aULJpTvJDw9ctUDe@li-dc0c254c-257c-11b2-a85c-98b6c1322444.ibm.com/ + */ +static struct kunit_case extents_test_cases[] = { + KUNIT_CASE_PARAM_WITH_INIT(test_split_convert, kunit_array_gen_params, + test_split_convert_param_init, NULL), + {} +}; + +static struct kunit_suite extents_test_suite = { + .name = "ext4_extents_test", + .init = extents_kunit_init, + .exit = extents_kunit_exit, + .test_cases = extents_test_cases, +}; + +kunit_test_suites(&extents_test_suite); + +MODULE_LICENSE("GPL"); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c7c66ab825e7..0ad0a9f2e3d4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -32,6 +32,7 @@ #include "ext4_jbd2.h" #include "ext4_extents.h" #include "xattr.h" +#include #include @@ -197,6 +198,9 @@ static int __ext4_ext_dirty(const char *where, unsigned int line, { int err; + KUNIT_STATIC_STUB_REDIRECT(__ext4_ext_dirty, where, line, handle, inode, + path); + WARN_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem)); if (path->p_bh) { ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh)); @@ -535,6 +539,8 @@ static void ext4_cache_extents(struct inode *inode, ext4_lblk_t prev = 0; int i; + KUNIT_STATIC_STUB_REDIRECT(ext4_cache_extents, inode, eh); + for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) { unsigned int status = EXTENT_STATUS_WRITTEN; ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); @@ -898,6 +904,8 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, int ret; gfp_t gfp_flags = GFP_NOFS; + KUNIT_STATIC_STUB_REDIRECT(ext4_find_extent, inode, block, path, flags); + if (flags & EXT4_EX_NOFAIL) gfp_flags |= __GFP_NOFAIL; @@ -1990,6 +1998,8 @@ ext4_ext_insert_extent(handle_t *handle, struct inode *inode, ext4_lblk_t next; int mb_flags = 0, unwritten; + KUNIT_STATIC_STUB_REDIRECT(ext4_ext_insert_extent, handle, inode, path, newext, gb_flags); + if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) mb_flags |= EXT4_MB_DELALLOC_RESERVED; if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { @@ -3138,8 +3148,10 @@ static void ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex) ext4_fsblk_t ee_pblock; unsigned int ee_len; - ee_block = le32_to_cpu(ex->ee_block); - ee_len = ext4_ext_get_actual_len(ex); + KUNIT_STATIC_STUB_REDIRECT(ext4_zeroout_es, inode, ex); + + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); ee_pblock = ext4_ext_pblock(ex); if (ee_len == 0) @@ -3155,6 +3167,8 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) ext4_fsblk_t ee_pblock; unsigned int ee_len; + KUNIT_STATIC_STUB_REDIRECT(ext4_ext_zeroout, inode, ex); + ee_len = ext4_ext_get_actual_len(ex); ee_pblock = ext4_ext_pblock(ex); return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock, @@ -6180,3 +6194,7 @@ int ext4_ext_clear_bb(struct inode *inode) ext4_free_ext_path(path); return 0; } + +#ifdef CONFIG_EXT4_KUNIT_TESTS +#include "extents-test.c" +#endif diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index fc83e7e2ca9e..6c1faf7c9f2a 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -16,6 +16,7 @@ #include "ext4.h" #include +#include /* * According to previous discussion in Ext4 Developer Workshop, we @@ -1627,6 +1628,8 @@ void ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, int reserved = 0; struct extent_status *es = NULL; + KUNIT_STATIC_STUB_REDIRECT(ext4_es_remove_extent, inode, lblk, len); + if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) return; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2e79b09fe2f0..c60813260f9a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -48,6 +48,8 @@ #include "acl.h" #include "truncate.h" +#include + #include static void ext4_journalled_zero_new_buffers(handle_t *handle, @@ -401,6 +403,8 @@ int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk, { int ret; + KUNIT_STATIC_STUB_REDIRECT(ext4_issue_zeroout, inode, lblk, pblk, len); + if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) return fscrypt_zeroout_range(inode, lblk, pblk, len); -- 2.51.0 Add more kunit tests to cover all high level callers of ext4_split_convert_extents(). The main functions we cover are: 1. ext4_ext_handle_unwritten_extents() 1.1 - Split/Convert unwritten extent to written in endio convtext. 1.2 - Split/Convert unwritten extent to written in non endio context. 2. convert_initialized_extent() - Convert written extent to unwritten during zero range Signed-off-by: Ojaswin Mujoo --- fs/ext4/extents-test.c | 275 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 274 insertions(+), 1 deletion(-) diff --git a/fs/ext4/extents-test.c b/fs/ext4/extents-test.c index 937810a0f264..4fb94d3c8a1e 100644 --- a/fs/ext4/extents-test.c +++ b/fs/ext4/extents-test.c @@ -90,6 +90,9 @@ struct kunit_ext_test_param { /* map describing range to split */ struct ext4_map_blocks split_map; + /* disable zeroout */ + bool disable_zeroout; + /* no of extents expected after split */ int nr_exp_ext; @@ -131,6 +134,9 @@ static struct file_system_type ext_fs_type = { static void extents_kunit_exit(struct kunit *test) { + struct ext4_sb_info *sbi = k_ctx.k_ei->vfs_inode.i_sb->s_fs_info; + + kfree(sbi); kfree(k_ctx.k_ei); kfree(k_ctx.k_data); } @@ -220,6 +226,7 @@ static int extents_kunit_init(struct kunit *test) struct ext4_inode_info *ei; struct inode *inode; struct super_block *sb; + struct ext4_sb_info *sbi = NULL; struct kunit_ext_test_param *param = (struct kunit_ext_test_param *)(test->param_value); @@ -237,7 +244,18 @@ static int extents_kunit_init(struct kunit *test) sb->s_blocksize = 4096; sb->s_blocksize_bits = 12; - ei->i_disksize = (EX_DATA_LBLK + EX_DATA_LEN + 10) << sb->s_blocksize_bits; + sbi = kzalloc(sizeof(struct ext4_sb_info), GFP_KERNEL); + if (sbi == NULL) + return -ENOMEM; + + sbi->s_sb = sb; + sb->s_fs_info = sbi; + + if (!param || !param->disable_zeroout) + sbi->s_extent_max_zeroout_kb = 32; + + ei->i_disksize = (EX_DATA_LBLK + EX_DATA_LEN + 10) + << sb->s_blocksize_bits; inode->i_sb = sb; k_ctx.k_data = kzalloc(EX_DATA_LEN * 4096, GFP_KERNEL); @@ -279,6 +297,8 @@ static int extents_kunit_init(struct kunit *test) ext4_es_remove_extent_stub); kunit_activate_static_stub(test, ext4_zeroout_es, ext4_zeroout_es_stub); kunit_activate_static_stub(test, ext4_ext_zeroout, ext4_ext_zeroout_stub); + kunit_activate_static_stub(test, ext4_issue_zeroout, + ext4_issue_zeroout_stub); kunit_activate_static_stub(test, ext4_issue_zeroout, ext4_issue_zeroout_stub); return 0; @@ -372,6 +392,150 @@ static void test_split_convert(struct kunit *test) return; } +static void test_convert_initialized(struct kunit *test) +{ + struct ext4_ext_path *path; + struct inode *inode = &k_ctx.k_ei->vfs_inode; + struct ext4_extent *ex; + struct ext4_map_blocks map; + const struct kunit_ext_test_param *param = + (const struct kunit_ext_test_param *)(test->param_value); + int blkbits = inode->i_sb->s_blocksize_bits; + int allocated = 0; + + if (param->is_zeroout_test) + /* + * Force zeroout by making ext4_ext_insert_extent return ENOSPC + */ + kunit_activate_static_stub(test, ext4_ext_insert_extent, + ext4_ext_insert_extent_stub); + + path = ext4_find_extent(inode, EX_DATA_LBLK, NULL, 0); + ex = path->p_ext; + KUNIT_EXPECT_EQ(test, 10, ex->ee_block); + KUNIT_EXPECT_EQ(test, 3, ext4_ext_get_actual_len(ex)); + KUNIT_EXPECT_EQ(test, param->is_unwrit_at_start, ext4_ext_is_unwritten(ex)); + if (param->is_zeroout_test) + KUNIT_EXPECT_EQ(test, 0, + check_buffer(k_ctx.k_data, 'X', + EX_DATA_LEN << blkbits)); + + map.m_lblk = param->split_map.m_lblk; + map.m_len = param->split_map.m_len; + convert_initialized_extent(NULL, inode, &map, path, &allocated); + + path = ext4_find_extent(inode, EX_DATA_LBLK, NULL, 0); + ex = path->p_ext; + + for (int i = 0; i < param->nr_exp_ext; i++) { + struct kunit_ext_state exp_ext = param->exp_ext_state[i]; + + KUNIT_EXPECT_EQ(test, exp_ext.ex_lblk, ex->ee_block); + KUNIT_EXPECT_EQ(test, exp_ext.ex_len, ext4_ext_get_actual_len(ex)); + KUNIT_EXPECT_EQ_MSG( + test, exp_ext.is_unwrit, ext4_ext_is_unwritten(ex), + "# exp: lblk:%d len:%d unwrit:%d, got: lblk:%d len:%d unwrit:%d\n", + exp_ext.ex_lblk, exp_ext.ex_len, exp_ext.is_unwrit, + ex->ee_block, ext4_ext_get_actual_len(ex), ext4_ext_is_unwritten(ex)); + + ex = ex + 1; + } + + if (!param->is_zeroout_test) + return; + + /* + * Check that then data area has been zeroed out correctly + */ + for (int i = 0; i < param->nr_exp_data_segs; i++) { + loff_t off, len; + struct kunit_ext_data_state exp_data_seg = param->exp_data_state[i]; + + off = exp_data_seg.off_blk << blkbits; + len = exp_data_seg.len_blk << blkbits; + KUNIT_EXPECT_EQ_MSG(test, 0, + check_buffer(k_ctx.k_data + off, + exp_data_seg.exp_char, len), + "# corruption in byte range [%lld, %lld)", + off, len); + } + + return; +} + +static void test_handle_unwritten(struct kunit *test) +{ + struct ext4_ext_path *path; + struct inode *inode = &k_ctx.k_ei->vfs_inode; + struct ext4_extent *ex; + struct ext4_map_blocks map; + const struct kunit_ext_test_param *param = + (const struct kunit_ext_test_param *)(test->param_value); + int blkbits = inode->i_sb->s_blocksize_bits; + int allocated = 0; + ext4_fsblk_t dummy_pblk = 999; + + if (param->is_zeroout_test) + /* + * Force zeroout by making ext4_ext_insert_extent return ENOSPC + */ + kunit_activate_static_stub(test, ext4_ext_insert_extent, + ext4_ext_insert_extent_stub); + + path = ext4_find_extent(inode, EX_DATA_LBLK, NULL, 0); + ex = path->p_ext; + KUNIT_EXPECT_EQ(test, 10, ex->ee_block); + KUNIT_EXPECT_EQ(test, 3, ext4_ext_get_actual_len(ex)); + KUNIT_EXPECT_EQ(test, param->is_unwrit_at_start, ext4_ext_is_unwritten(ex)); + if (param->is_zeroout_test) + KUNIT_EXPECT_EQ(test, 0, + check_buffer(k_ctx.k_data, 'X', + EX_DATA_LEN << blkbits)); + + map.m_lblk = param->split_map.m_lblk; + map.m_len = param->split_map.m_len; + ext4_ext_handle_unwritten_extents(NULL, inode, &map, path, param->split_flags, + &allocated, dummy_pblk); + + path = ext4_find_extent(inode, EX_DATA_LBLK, NULL, 0); + ex = path->p_ext; + + for (int i = 0; i < param->nr_exp_ext; i++) { + struct kunit_ext_state exp_ext = param->exp_ext_state[i]; + + KUNIT_EXPECT_EQ(test, exp_ext.ex_lblk, ex->ee_block); + KUNIT_EXPECT_EQ(test, exp_ext.ex_len, ext4_ext_get_actual_len(ex)); + KUNIT_EXPECT_EQ_MSG( + test, exp_ext.is_unwrit, ext4_ext_is_unwritten(ex), + "# exp: lblk:%d len:%d unwrit:%d, got: lblk:%d len:%d unwrit:%d\n", + exp_ext.ex_lblk, exp_ext.ex_len, exp_ext.is_unwrit, + ex->ee_block, ext4_ext_get_actual_len(ex), ext4_ext_is_unwritten(ex)); + + ex = ex + 1; + } + + if (!param->is_zeroout_test) + return; + + /* + * Check that then data area has been zeroed out correctly + */ + for (int i = 0; i < param->nr_exp_data_segs; i++) { + loff_t off, len; + struct kunit_ext_data_state exp_data_seg = param->exp_data_state[i]; + + off = exp_data_seg.off_blk << blkbits; + len = exp_data_seg.len_blk << blkbits; + KUNIT_EXPECT_EQ_MSG(test, 0, + check_buffer(k_ctx.k_data + off, + exp_data_seg.exp_char, len), + "# corruption in byte range [%lld, %lld)", + off, len); + } + + return; +} + static const struct kunit_ext_test_param test_split_convert_params[] = { /* unwrit to writ splits */ { .desc = "split unwrit extent to 2 extents and convert 1st half writ", @@ -523,6 +687,93 @@ static const struct kunit_ext_test_param test_split_convert_params[] = { .exp_data_state = { { .exp_char = 0, .off_blk = 0, .len_blk = 3 } } }, }; +static const struct kunit_ext_test_param +test_convert_initialized_params[] = { + /* writ to unwrit splits */ + { .desc = "split writ extent to 2 extents and convert 1st half unwrit", + .is_unwrit_at_start = 0, + .split_map = { .m_lblk = 10, .m_len = 1 }, + .nr_exp_ext = 2, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 1 }, + { .ex_lblk = 11, .ex_len = 2, .is_unwrit = 0 } }, + .is_zeroout_test = 0 }, + { .desc = "split writ extent to 2 extents and convert 2nd half unwrit", + .is_unwrit_at_start = 0, + .split_map = { .m_lblk = 11, .m_len = 2 }, + .nr_exp_ext = 2, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 0 }, + { .ex_lblk = 11, .ex_len = 2, .is_unwrit = 1 } }, + .is_zeroout_test = 0 }, + { .desc = "split writ extent to 3 extents and convert 2nd half to unwrit", + .is_unwrit_at_start = 0, + .split_map = { .m_lblk = 11, .m_len = 1 }, + .nr_exp_ext = 3, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 0 }, + { .ex_lblk = 11, .ex_len = 1, .is_unwrit = 1 }, + { .ex_lblk = 12, .ex_len = 1, .is_unwrit = 0 } }, + .is_zeroout_test = 0 }, +}; + +static const struct kunit_ext_test_param test_handle_unwritten_params[] = { + /* unwrit to writ splits via endio path */ + { .desc = "split unwrit extent to 2 extents and convert 1st half writ (endio)", + .is_unwrit_at_start = 1, + .split_flags = EXT4_GET_BLOCKS_CONVERT, + .split_map = { .m_lblk = 10, .m_len = 1 }, + .nr_exp_ext = 2, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 0 }, + { .ex_lblk = 11, .ex_len = 2, .is_unwrit = 1 } }, + .is_zeroout_test = 0 }, + { .desc = "split unwrit extent to 2 extents and convert 2nd half writ (endio)", + .is_unwrit_at_start = 1, + .split_flags = EXT4_GET_BLOCKS_CONVERT, + .split_map = { .m_lblk = 11, .m_len = 2 }, + .nr_exp_ext = 2, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 1 }, + { .ex_lblk = 11, .ex_len = 2, .is_unwrit = 0 } }, + .is_zeroout_test = 0 }, + { .desc = "split unwrit extent to 3 extents and convert 2nd half to writ (endio)", + .is_unwrit_at_start = 1, + .split_flags = EXT4_GET_BLOCKS_CONVERT, + .split_map = { .m_lblk = 11, .m_len = 1 }, + .nr_exp_ext = 3, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 1 }, + { .ex_lblk = 11, .ex_len = 1, .is_unwrit = 0 }, + { .ex_lblk = 12, .ex_len = 1, .is_unwrit = 1 } }, + .is_zeroout_test = 0 }, + + /* unwrit to writ splits via non-endio path */ + { .desc = "split unwrit extent to 2 extents and convert 1st half writ (non endio)", + .is_unwrit_at_start = 1, + .split_flags = EXT4_GET_BLOCKS_CREATE, + .split_map = { .m_lblk = 10, .m_len = 1 }, + .nr_exp_ext = 2, + .disable_zeroout = true, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 0 }, + { .ex_lblk = 11, .ex_len = 2, .is_unwrit = 1 } }, + .is_zeroout_test = 0 }, + { .desc = "split unwrit extent to 2 extents and convert 2nd half writ (non endio)", + .is_unwrit_at_start = 1, + .split_flags = EXT4_GET_BLOCKS_CREATE, + .split_map = { .m_lblk = 11, .m_len = 2 }, + .nr_exp_ext = 2, + .disable_zeroout = true, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 1 }, + { .ex_lblk = 11, .ex_len = 2, .is_unwrit = 0 } }, + .is_zeroout_test = 0 }, + { .desc = "split unwrit extent to 3 extents and convert 2nd half to writ (non endio)", + .is_unwrit_at_start = 1, + .split_flags = EXT4_GET_BLOCKS_CREATE, + .split_map = { .m_lblk = 11, .m_len = 1 }, + .nr_exp_ext = 3, + .disable_zeroout = true, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 1 }, + { .ex_lblk = 11, .ex_len = 1, .is_unwrit = 0 }, + { .ex_lblk = 12, .ex_len = 1, .is_unwrit = 1 } }, + .is_zeroout_test = 0 }, + +}; + static void ext_get_desc(struct kunit *test, const void *p, char *desc) { @@ -540,6 +791,24 @@ static int test_split_convert_param_init(struct kunit *test) return 0; } +static int test_convert_initialized_param_init(struct kunit *test) +{ + size_t arr_size = ARRAY_SIZE(test_convert_initialized_params); + + kunit_register_params_array(test, test_convert_initialized_params, + arr_size, ext_get_desc); + return 0; +} + +static int test_handle_unwritten_init(struct kunit *test) +{ + size_t arr_size = ARRAY_SIZE(test_handle_unwritten_params); + + kunit_register_params_array(test, test_handle_unwritten_params, + arr_size, ext_get_desc); + return 0; +} + /* * Note that we use KUNIT_CASE_PARAM_WITH_INIT() instead of the more compact * KUNIT_ARRAY_PARAM() because the later currently has a limitation causing the @@ -550,6 +819,10 @@ static int test_split_convert_param_init(struct kunit *test) static struct kunit_case extents_test_cases[] = { KUNIT_CASE_PARAM_WITH_INIT(test_split_convert, kunit_array_gen_params, test_split_convert_param_init, NULL), + KUNIT_CASE_PARAM_WITH_INIT(test_convert_initialized, kunit_array_gen_params, + test_convert_initialized_param_init, NULL), + KUNIT_CASE_PARAM_WITH_INIT(test_handle_unwritten, kunit_array_gen_params, + test_handle_unwritten_init, NULL), {} }; -- 2.51.0 Currently, ext4_zero_range passes EXT4_EX_NOCACHE flag to avoid caching extents however this is not respected by convert_initialized_extent(). Hence, modify it to accept flags from the caller and to pass the flags on to other extent manipulation functions it calls. This makes sure the NOCACHE flag is respected throughout the code path. Signed-off-by: Ojaswin Mujoo --- fs/ext4/extents-test.c | 2 +- fs/ext4/extents.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/ext4/extents-test.c b/fs/ext4/extents-test.c index 4fb94d3c8a1e..54aed3eabfe2 100644 --- a/fs/ext4/extents-test.c +++ b/fs/ext4/extents-test.c @@ -422,7 +422,7 @@ static void test_convert_initialized(struct kunit *test) map.m_lblk = param->split_map.m_lblk; map.m_len = param->split_map.m_len; - convert_initialized_extent(NULL, inode, &map, path, &allocated); + convert_initialized_extent(NULL, inode, &map, path, 0, &allocated); path = ext4_find_extent(inode, EX_DATA_LBLK, NULL, 0); ex = path->p_ext; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0ad0a9f2e3d4..5228196f5ad4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3845,6 +3845,7 @@ static struct ext4_ext_path * convert_initialized_extent(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, struct ext4_ext_path *path, + int flags, unsigned int *allocated) { struct ext4_extent *ex; @@ -3870,7 +3871,7 @@ convert_initialized_extent(handle_t *handle, struct inode *inode, if (ee_block != map->m_lblk || ee_len > map->m_len) { path = ext4_split_convert_extents(handle, inode, map, path, - EXT4_GET_BLOCKS_CONVERT_UNWRITTEN, NULL); + flags | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN, NULL); if (IS_ERR(path)) return path; @@ -4264,7 +4265,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, if ((!ext4_ext_is_unwritten(ex)) && (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { path = convert_initialized_extent(handle, - inode, map, path, &allocated); + inode, map, path, flags, &allocated); if (IS_ERR(path)) err = PTR_ERR(path); goto out; -- 2.51.0 Currently, callers like ext4_convert_unwritten_extents() pass EXT4_EX_NOCACHE flag to avoid caching extents however this is not respected by ext4_convert_unwritten_extents_endio(). Hence, modify it to accept flags from the caller and to pass the flags on to other extent manipulation functions it calls. This makes sure the NOCACHE flag is respected throughout the code path. Also, since the caller already passes METADATA_NOFAIL and CONVERT flags we don't need to explicitly pass it anymore. Signed-off-by: Ojaswin Mujoo --- fs/ext4/extents.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 5228196f5ad4..460a70e6dae0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3785,7 +3785,7 @@ static struct ext4_ext_path *ext4_split_convert_extents(handle_t *handle, static struct ext4_ext_path * ext4_convert_unwritten_extents_endio(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, - struct ext4_ext_path *path) + struct ext4_ext_path *path, int flags) { struct ext4_extent *ex; ext4_lblk_t ee_block; @@ -3802,9 +3802,6 @@ ext4_convert_unwritten_extents_endio(handle_t *handle, struct inode *inode, (unsigned long long)ee_block, ee_len); if (ee_block != map->m_lblk || ee_len > map->m_len) { - int flags = EXT4_GET_BLOCKS_CONVERT | - EXT4_GET_BLOCKS_METADATA_NOFAIL; - path = ext4_split_convert_extents(handle, inode, map, path, flags, NULL); if (IS_ERR(path)) @@ -3943,7 +3940,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, /* IO end_io complete, convert the filled extent to written */ if (flags & EXT4_GET_BLOCKS_CONVERT) { path = ext4_convert_unwritten_extents_endio(handle, inode, - map, path); + map, path, flags); if (IS_ERR(path)) return path; ext4_update_inode_fsync_trans(handle, inode, 1); -- 2.51.0 Currently, zeroout is used as a fallback in case we fail to split/convert extents in the "traditional" modify-the-extent-tree way. This is essential to mitigate failures in critical paths like extent splitting during endio. However, the logic is very messy and not easy to follow. Further, the fragile use of various flags has made it prone to errors. Refactor zeroout out logic by moving it up to ext4_split_extents(). Further, zeroout correctly based on the type of conversion we want, ie: - unwritten to written: Zeroout everything around the mapped range. - unwritten to unwritten: Zeroout everything - written to unwritten: Zeroout only the mapped range. Signed-off-by: Ojaswin Mujoo --- fs/ext4/extents.c | 287 +++++++++++++++++++++++++++++++--------------- 1 file changed, 195 insertions(+), 92 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 460a70e6dae0..8082e1d93bbf 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -44,14 +44,6 @@ #define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */ #define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */ -/* first half contains valid data */ -#define EXT4_EXT_DATA_ENTIRE_VALID1 0x8 /* has entirely valid data */ -#define EXT4_EXT_DATA_PARTIAL_VALID1 0x10 /* has partially valid data */ -#define EXT4_EXT_DATA_VALID1 (EXT4_EXT_DATA_ENTIRE_VALID1 | \ - EXT4_EXT_DATA_PARTIAL_VALID1) - -#define EXT4_EXT_DATA_VALID2 0x20 /* second half contains valid data */ - static __le32 ext4_extent_block_csum(struct inode *inode, struct ext4_extent_header *eh) { @@ -3194,7 +3186,8 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) * a> the extent are splitted into two extent. * b> split is not needed, and just mark the extent. * - * Return an extent path pointer on success, or an error pointer on failure. + * Return an extent path pointer on success, or an error pointer on failure. On + * failure, the extent is restored to original state. */ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, struct inode *inode, @@ -3204,14 +3197,10 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, { ext4_fsblk_t newblock; ext4_lblk_t ee_block; - struct ext4_extent *ex, newex, orig_ex, zero_ex; + struct ext4_extent *ex, newex, orig_ex; struct ext4_extent *ex2 = NULL; unsigned int ee_len, depth; - int err = 0; - - BUG_ON((split_flag & EXT4_EXT_DATA_VALID1) == EXT4_EXT_DATA_VALID1); - BUG_ON((split_flag & EXT4_EXT_DATA_VALID1) && - (split_flag & EXT4_EXT_DATA_VALID2)); + int err = 0, insert_err = 0; /* Do not cache extents that are in the process of being modified. */ flags |= EXT4_EX_NOCACHE; @@ -3277,11 +3266,10 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, path = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (!IS_ERR(path)) - goto out; + return path; - err = PTR_ERR(path); - if (err != -ENOSPC && err != -EDQUOT && err != -ENOMEM) - goto out_path; + insert_err = PTR_ERR(path); + err = 0; /* * Get a new path to try to zeroout or fix the extent length. @@ -3297,53 +3285,13 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, split, PTR_ERR(path)); goto out_path; } - depth = ext_depth(inode); - ex = path[depth].p_ext; - - if (EXT4_EXT_MAY_ZEROOUT & split_flag) { - if (split_flag & EXT4_EXT_DATA_VALID1) - memcpy(&zero_ex, ex2, sizeof(zero_ex)); - else if (split_flag & EXT4_EXT_DATA_VALID2) - memcpy(&zero_ex, ex, sizeof(zero_ex)); - else - memcpy(&zero_ex, &orig_ex, sizeof(zero_ex)); - ext4_ext_mark_initialized(&zero_ex); - err = ext4_ext_zeroout(inode, &zero_ex); - if (err) - goto fix_extent_len; - - /* - * The first half contains partially valid data, the splitting - * of this extent has not been completed, fix extent length - * and ext4_split_extent() split will the first half again. - */ - if (split_flag & EXT4_EXT_DATA_PARTIAL_VALID1) { - /* - * Drop extent cache to prevent stale unwritten - * extents remaining after zeroing out. - */ - ext4_es_remove_extent(inode, - le32_to_cpu(zero_ex.ee_block), - ext4_ext_get_actual_len(&zero_ex)); - goto fix_extent_len; - } - - /* update the extent length and mark as initialized */ - ex->ee_len = cpu_to_le16(ee_len); - ext4_ext_try_to_merge(handle, inode, path, ex); - err = ext4_ext_dirty(handle, inode, path + path->p_depth); - if (!err) - /* update extent status tree */ - ext4_zeroout_es(inode, &zero_ex); - /* - * If we failed at this point, we don't know in which - * state the extent tree exactly is so don't try to fix - * length of the original extent as it may do even more - * damage. - */ + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) goto out; - } + + depth = ext_depth(inode); + ex = path[depth].p_ext; fix_extent_len: ex->ee_len = orig_ex.ee_len; @@ -3353,9 +3301,9 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, */ ext4_ext_dirty(handle, inode, path + path->p_depth); out: - if (err) { + if (err || insert_err) { ext4_free_ext_path(path); - path = ERR_PTR(err); + path = err ? ERR_PTR(err) : ERR_PTR(insert_err); } out_path: if (IS_ERR(path)) @@ -3365,6 +3313,115 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, return path; } +static struct ext4_ext_path * +ext4_split_extent_zeroout(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path, + struct ext4_map_blocks *map, int flags) +{ + struct ext4_extent *ex; + unsigned int ee_len, depth; + ext4_lblk_t ee_block; + uint64_t lblk, pblk, len; + int is_unwrit; + int err = 0; + + depth = ext_depth(inode); + ex = path[depth].p_ext; + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); + is_unwrit = ext4_ext_is_unwritten(ex); + + if (flags & EXT4_GET_BLOCKS_CONVERT) { + /* + * EXT4_GET_BLOCKS_CONVERT: Caller wants the range specified by + * map to be initialized. Zeroout everything except the map + * range. + */ + + loff_t map_end = (loff_t) map->m_lblk + map->m_len; + loff_t ex_end = (loff_t) ee_block + ee_len; + + if (!is_unwrit) + /* Shouldn't happen. Just exit */ + return ERR_PTR(-EINVAL); + + /* zeroout left */ + if (map->m_lblk > ee_block) { + lblk = ee_block; + len = map->m_lblk - ee_block; + pblk = ext4_ext_pblock(ex); + err = ext4_issue_zeroout(inode, lblk, pblk, len); + if (err) + /* ZEROOUT failed, just return original error */ + return ERR_PTR(err); + } + + /* zeroout right */ + if (map->m_lblk + map->m_len < ee_block + ee_len) { + lblk = map_end; + len = ex_end - map_end; + pblk = ext4_ext_pblock(ex) + (map_end - ee_block); + err = ext4_issue_zeroout(inode, lblk, pblk, len); + if (err) + /* ZEROOUT failed, just return original error */ + return ERR_PTR(err); + } + } else if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) { + /* + * EXT4_GET_BLOCKS_CONVERT_UNWRITTEN: Caller wants the + * range specified by map to be marked unwritten. + * Zeroout the map range leaving rest as it is. + */ + + if (is_unwrit) + /* Shouldn't happen. Just exit */ + return ERR_PTR(-EINVAL); + + lblk = map->m_lblk; + len = map->m_len; + pblk = ext4_ext_pblock(ex) + (map->m_lblk - ee_block); + err = ext4_issue_zeroout(inode, lblk, pblk, len); + if (err) + /* ZEROOUT failed, just return original error */ + return ERR_PTR(err); + } else if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) { + /* + * EXT4_GET_BLOCKS_UNWRIT_EXT: Today, this flag + * implicitly implies that callers when wanting an + * unwritten to unwritten split. So zeroout the whole + * extent. + * + * TODO: The implicit meaning of the flag is not ideal + * and eventually we should aim for a more well defined + * behavior + */ + + if (!is_unwrit) + /* Shouldn't happen. Just exit */ + return ERR_PTR(-EINVAL); + + lblk = ee_block; + len = ee_len; + pblk = ext4_ext_pblock(ex); + err = ext4_issue_zeroout(inode, lblk, pblk, len); + if (err) + /* ZEROOUT failed, just return original error */ + return ERR_PTR(err); + } + + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + return ERR_PTR(err); + + ext4_ext_mark_initialized(ex); + + ext4_ext_dirty(handle, inode, path + path->p_depth); + if (err) + return ERR_PTR(err); + + return 0; +} + /* * ext4_split_extent() splits an extent and mark extent which is covered * by @map as split_flags indicates @@ -3383,11 +3440,12 @@ static struct ext4_ext_path *ext4_split_extent(handle_t *handle, int split_flag, int flags, unsigned int *allocated) { - ext4_lblk_t ee_block; + ext4_lblk_t ee_block, orig_ee_block; struct ext4_extent *ex; - unsigned int ee_len, depth; - int unwritten; - int split_flag1, flags1; + unsigned int ee_len, orig_ee_len, depth; + int unwritten, orig_unwritten; + int split_flag1 = 0, flags1 = 0; + int err = 0, orig_err; depth = ext_depth(inode); ex = path[depth].p_ext; @@ -3395,23 +3453,29 @@ static struct ext4_ext_path *ext4_split_extent(handle_t *handle, ee_len = ext4_ext_get_actual_len(ex); unwritten = ext4_ext_is_unwritten(ex); + orig_ee_block = ee_block; + orig_ee_len = ee_len; + orig_unwritten = unwritten; + /* Do not cache extents that are in the process of being modified. */ flags |= EXT4_EX_NOCACHE; if (map->m_lblk + map->m_len < ee_block + ee_len) { - split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT; flags1 = flags | EXT4_GET_BLOCKS_SPLIT_NOMERGE; if (unwritten) split_flag1 |= EXT4_EXT_MARK_UNWRIT1 | EXT4_EXT_MARK_UNWRIT2; - if (split_flag & EXT4_EXT_DATA_VALID2) - split_flag1 |= map->m_lblk > ee_block ? - EXT4_EXT_DATA_PARTIAL_VALID1 : - EXT4_EXT_DATA_ENTIRE_VALID1; path = ext4_split_extent_at(handle, inode, path, map->m_lblk + map->m_len, split_flag1, flags1); - if (IS_ERR(path)) - return path; + + if (IS_ERR(path)) { + orig_err = PTR_ERR(path); + if (orig_err != -ENOSPC && orig_err != -EDQUOT && + orig_err != -ENOMEM) + return path; + + goto try_zeroout; + } /* * Update path is required because previous ext4_split_extent_at * may result in split of original leaf or extent zeroout. @@ -3427,22 +3491,68 @@ static struct ext4_ext_path *ext4_split_extent(handle_t *handle, ext4_free_ext_path(path); return ERR_PTR(-EFSCORRUPTED); } - unwritten = ext4_ext_is_unwritten(ex); } if (map->m_lblk >= ee_block) { - split_flag1 = split_flag & EXT4_EXT_DATA_VALID2; + split_flag1 = 0; if (unwritten) { split_flag1 |= EXT4_EXT_MARK_UNWRIT1; - split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT | - EXT4_EXT_MARK_UNWRIT2); + split_flag1 |= split_flag & EXT4_EXT_MARK_UNWRIT2; } - path = ext4_split_extent_at(handle, inode, path, - map->m_lblk, split_flag1, flags); + path = ext4_split_extent_at(handle, inode, path, map->m_lblk, + split_flag1, flags); + + if (IS_ERR(path)) { + orig_err = PTR_ERR(path); + if (orig_err != -ENOSPC && orig_err != -EDQUOT && + orig_err != -ENOMEM) + return path; + + goto try_zeroout; + } + } + + if (!err) + goto out; + +try_zeroout: + /* + * There was an error in splitting the extent, just zeroout and convert + * to initialize as a last resort + */ + if (split_flag & EXT4_EXT_MAY_ZEROOUT) { + path = ext4_find_extent(inode, map->m_lblk, NULL, flags); if (IS_ERR(path)) return path; + + depth = ext_depth(inode); + ex = path[depth].p_ext; + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); + unwritten = ext4_ext_is_unwritten(ex); + + /* + * The extent to zeroout should have been unchanged + * but its not, just return error to caller + */ + if (WARN_ON(ee_block != orig_ee_block || + ee_len != orig_ee_len || + unwritten != orig_unwritten)) + return ERR_PTR(orig_err); + + /* + * Something went wrong in zeroout, just return the + * original error + */ + if (ext4_split_extent_zeroout(handle, inode, path, map, flags)) + return ERR_PTR(orig_err); } + /* There's an error and we can't zeroout, just return the err */ + return ERR_PTR(orig_err); + +out: + if (allocated) { if (map->m_lblk + map->m_len > ee_block + ee_len) *allocated = ee_len - (map->m_lblk - ee_block); @@ -3486,7 +3596,7 @@ ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode, ext4_lblk_t ee_block, eof_block; unsigned int ee_len, depth, map_len = map->m_len; int err = 0; - int split_flag = EXT4_EXT_DATA_VALID2; + int split_flag = 0; unsigned int max_zeroout = 0; ext_debug(inode, "logical block %llu, max_blocks %u\n", @@ -3760,11 +3870,7 @@ static struct ext4_ext_path *ext4_split_convert_extents(handle_t *handle, ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); - /* Convert to unwritten */ - if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) { - split_flag |= EXT4_EXT_DATA_ENTIRE_VALID1; - /* Split the existing unwritten extent */ - } else if (flags & (EXT4_GET_BLOCKS_UNWRIT_EXT | + if (flags & (EXT4_GET_BLOCKS_UNWRIT_EXT | EXT4_GET_BLOCKS_CONVERT)) { /* * It is safe to convert extent to initialized via explicit @@ -3773,9 +3879,6 @@ static struct ext4_ext_path *ext4_split_convert_extents(handle_t *handle, split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; split_flag |= EXT4_EXT_MARK_UNWRIT2; - /* Convert to initialized */ - if (flags & EXT4_GET_BLOCKS_CONVERT) - split_flag |= EXT4_EXT_DATA_VALID2; } flags |= EXT4_GET_BLOCKS_SPLIT_NOMERGE; return ext4_split_extent(handle, inode, path, map, split_flag, flags, -- 2.51.0 ext4_split_convert_extents() has been historically prone to subtle bugs and inconsistent behavior due to the way all the various flags interact with the extent split and conversion process. For example, callers like ext4_convert_unwritten_extents_endio() and convert_initialized_extents() needed to open code extent conversion despite passing CONVERT or CONVERT_UNWRITTEN flags because ext4_split_convert_extents() wasn't performing the conversion. Hence, refactor ext4_split_convert_extents() to clearly enforce the semantics of each flag. The major changes here are: * Clearly separate the split and convert process: * ext4_split_extent() and ext4_split_extent_at() are now only responsible to perform the split. * ext4_split_convert_extents() is now responsible to perform extent conversion after calling ext4_split_extent() for splitting. * This helps get rid of all the MARK_UNWRIT* flags. * Clearly enforce the semantics of flags passed to ext4_split_convert_extents(): * EXT4_GET_BLOCKS_CONVERT: Will convert the split extent to written * EXT4_GET_BLOCKS_CONVERT_UNWRITTEN: Will convert the split extent to unwritten * Passing neither of the above means we only want a split. * Modify all callers to enforce the above semantics. * Use ext4_split_convert_extents() instead of ext4_split_extents() * in ext4_ext_convert_to_initialized() for uniformity. * Cleanup all callers open coding the conversion logic. * Further, modify kuniy tests to pass flags based on the new semantics. From an end user point of view, we should not see any changes in behavior of ext4. Signed-off-by: Ojaswin Mujoo --- fs/ext4/extents-test.c | 12 +- fs/ext4/extents.c | 299 +++++++++++++++++++---------------------- 2 files changed, 145 insertions(+), 166 deletions(-) diff --git a/fs/ext4/extents-test.c b/fs/ext4/extents-test.c index 54aed3eabfe2..725d5e79be96 100644 --- a/fs/ext4/extents-test.c +++ b/fs/ext4/extents-test.c @@ -567,7 +567,7 @@ static const struct kunit_ext_test_param test_split_convert_params[] = { /* unwrit to unwrit splits */ { .desc = "split unwrit extent to 2 unwrit extents", .is_unwrit_at_start = 1, - .split_flags = EXT4_GET_BLOCKS_UNWRIT_EXT, + .split_flags = 0, .split_map = { .m_lblk = 10, .m_len = 1 }, .nr_exp_ext = 2, .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 1 }, @@ -575,7 +575,7 @@ static const struct kunit_ext_test_param test_split_convert_params[] = { .is_zeroout_test = 0 }, { .desc = "split unwrit extent to 2 extents (2)", .is_unwrit_at_start = 1, - .split_flags = EXT4_GET_BLOCKS_UNWRIT_EXT, + .split_flags = 0, .split_map = { .m_lblk = 11, .m_len = 2 }, .nr_exp_ext = 2, .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 1 }, @@ -583,7 +583,7 @@ static const struct kunit_ext_test_param test_split_convert_params[] = { .is_zeroout_test = 0 }, { .desc = "split unwrit extent to 3 unwrit extents", .is_unwrit_at_start = 1, - .split_flags = EXT4_GET_BLOCKS_UNWRIT_EXT, + .split_flags = 0, .split_map = { .m_lblk = 11, .m_len = 1 }, .nr_exp_ext = 3, .exp_ext_state = { { .ex_lblk = 10, .ex_len = 1, .is_unwrit = 1 }, @@ -660,7 +660,7 @@ static const struct kunit_ext_test_param test_split_convert_params[] = { /* unwrit to unwrit splits */ { .desc = "split unwrit extent to 2 unwrit extents (zeroout)", .is_unwrit_at_start = 1, - .split_flags = EXT4_GET_BLOCKS_UNWRIT_EXT, + .split_flags = 0, .split_map = { .m_lblk = 10, .m_len = 1 }, .nr_exp_ext = 1, .exp_ext_state = { { .ex_lblk = 10, .ex_len = 3, .is_unwrit = 0 } }, @@ -669,7 +669,7 @@ static const struct kunit_ext_test_param test_split_convert_params[] = { .exp_data_state = { { .exp_char = 0, .off_blk = 0, .len_blk = 3 } } }, { .desc = "split unwrit extent to 2 unwrit extents (2) (zeroout)", .is_unwrit_at_start = 1, - .split_flags = EXT4_GET_BLOCKS_UNWRIT_EXT, + .split_flags = 0, .split_map = { .m_lblk = 11, .m_len = 2 }, .nr_exp_ext = 1, .exp_ext_state = { { .ex_lblk = 10, .ex_len = 3, .is_unwrit = 0 } }, @@ -678,7 +678,7 @@ static const struct kunit_ext_test_param test_split_convert_params[] = { .exp_data_state = { { .exp_char = 0, .off_blk = 0, .len_blk = 3 } } }, { .desc = "split unwrit extent to 3 unwrit extents (zeroout)", .is_unwrit_at_start = 1, - .split_flags = EXT4_GET_BLOCKS_UNWRIT_EXT, + .split_flags = 0, .split_map = { .m_lblk = 11, .m_len = 1 }, .nr_exp_ext = 1, .exp_ext_state = { { .ex_lblk = 10, .ex_len = 3, .is_unwrit = 0 } }, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 8082e1d93bbf..9fb8a3220ae2 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -41,8 +41,9 @@ */ #define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ due to ENOSPC */ -#define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */ -#define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */ +static struct ext4_ext_path *ext4_split_convert_extents( + handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, + struct ext4_ext_path *path, int flags, unsigned int *allocated); static __le32 ext4_extent_block_csum(struct inode *inode, struct ext4_extent_header *eh) @@ -84,8 +85,7 @@ static void ext4_extent_block_csum_set(struct inode *inode, static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, - ext4_lblk_t split, - int split_flag, int flags); + ext4_lblk_t split, int flags); static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped) { @@ -333,15 +333,12 @@ ext4_force_split_extent_at(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t lblk, int nofail) { - int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext); int flags = EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_SPLIT_NOMERGE; if (nofail) flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL | EXT4_EX_NOFAIL; - return ext4_split_extent_at(handle, inode, path, lblk, unwritten ? - EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0, - flags); + return ext4_split_extent_at(handle, inode, path, lblk, flags); } static int @@ -3174,17 +3171,11 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) * @inode: the file inode * @path: the path to the extent * @split: the logical block where the extent is splitted. - * @split_flags: indicates if the extent could be zeroout if split fails, and - * the states(init or unwritten) of new extents. * @flags: flags used to insert new extent to extent tree. * * * Splits extent [a, b] into two extents [a, @split) and [@split, b], states - * of which are determined by split_flag. - * - * There are two cases: - * a> the extent are splitted into two extent. - * b> split is not needed, and just mark the extent. + * of which are same as the original extent. No conversion is performed. * * Return an extent path pointer on success, or an error pointer on failure. On * failure, the extent is restored to original state. @@ -3193,14 +3184,14 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t split, - int split_flag, int flags) + int flags) { ext4_fsblk_t newblock; ext4_lblk_t ee_block; struct ext4_extent *ex, newex, orig_ex; struct ext4_extent *ex2 = NULL; unsigned int ee_len, depth; - int err = 0, insert_err = 0; + int err = 0, insert_err = 0, is_unwrit = 0; /* Do not cache extents that are in the process of being modified. */ flags |= EXT4_EX_NOCACHE; @@ -3214,39 +3205,24 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); newblock = split - ee_block + ext4_ext_pblock(ex); + is_unwrit = ext4_ext_is_unwritten(ex); BUG_ON(split < ee_block || split >= (ee_block + ee_len)); - BUG_ON(!ext4_ext_is_unwritten(ex) && - split_flag & (EXT4_EXT_MAY_ZEROOUT | - EXT4_EXT_MARK_UNWRIT1 | - EXT4_EXT_MARK_UNWRIT2)); - err = ext4_ext_get_access(handle, inode, path + depth); - if (err) + /* + * No split needed + */ + if (split == ee_block) goto out; - if (split == ee_block) { - /* - * case b: block @split is the block that the extent begins with - * then we just change the state of the extent, and splitting - * is not needed. - */ - if (split_flag & EXT4_EXT_MARK_UNWRIT2) - ext4_ext_mark_unwritten(ex); - else - ext4_ext_mark_initialized(ex); - - if (!(flags & EXT4_GET_BLOCKS_SPLIT_NOMERGE)) - ext4_ext_try_to_merge(handle, inode, path, ex); - - err = ext4_ext_dirty(handle, inode, path + path->p_depth); + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) goto out; - } /* case a */ memcpy(&orig_ex, ex, sizeof(orig_ex)); ex->ee_len = cpu_to_le16(split - ee_block); - if (split_flag & EXT4_EXT_MARK_UNWRIT1) + if (is_unwrit) ext4_ext_mark_unwritten(ex); /* @@ -3261,7 +3237,7 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle, ex2->ee_block = cpu_to_le32(split); ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block)); ext4_ext_store_pblock(ex2, newblock); - if (split_flag & EXT4_EXT_MARK_UNWRIT2) + if (is_unwrit) ext4_ext_mark_unwritten(ex2); path = ext4_ext_insert_extent(handle, inode, path, &newex, flags); @@ -3384,16 +3360,11 @@ ext4_split_extent_zeroout(handle_t *handle, struct inode *inode, if (err) /* ZEROOUT failed, just return original error */ return ERR_PTR(err); - } else if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) { + } else { /* - * EXT4_GET_BLOCKS_UNWRIT_EXT: Today, this flag - * implicitly implies that callers when wanting an - * unwritten to unwritten split. So zeroout the whole - * extent. - * - * TODO: The implicit meaning of the flag is not ideal - * and eventually we should aim for a more well defined - * behavior + * None of the convert flags imply we just want a split. + * In this case we can only zeroout if an unwritten split + * was needed. */ if (!is_unwrit) @@ -3415,7 +3386,7 @@ ext4_split_extent_zeroout(handle_t *handle, struct inode *inode, ext4_ext_mark_initialized(ex); - ext4_ext_dirty(handle, inode, path + path->p_depth); + ext4_ext_dirty(handle, inode, path + depth); if (err) return ERR_PTR(err); @@ -3438,13 +3409,13 @@ static struct ext4_ext_path *ext4_split_extent(handle_t *handle, struct ext4_ext_path *path, struct ext4_map_blocks *map, int split_flag, int flags, - unsigned int *allocated) + unsigned int *allocated, bool *did_zeroout) { ext4_lblk_t ee_block, orig_ee_block; struct ext4_extent *ex; unsigned int ee_len, orig_ee_len, depth; int unwritten, orig_unwritten; - int split_flag1 = 0, flags1 = 0; + int flags1 = 0; int err = 0, orig_err; depth = ext_depth(inode); @@ -3462,11 +3433,9 @@ static struct ext4_ext_path *ext4_split_extent(handle_t *handle, if (map->m_lblk + map->m_len < ee_block + ee_len) { flags1 = flags | EXT4_GET_BLOCKS_SPLIT_NOMERGE; - if (unwritten) - split_flag1 |= EXT4_EXT_MARK_UNWRIT1 | - EXT4_EXT_MARK_UNWRIT2; + path = ext4_split_extent_at(handle, inode, path, - map->m_lblk + map->m_len, split_flag1, flags1); + map->m_lblk + map->m_len, flags1); if (IS_ERR(path)) { orig_err = PTR_ERR(path); @@ -3494,13 +3463,8 @@ static struct ext4_ext_path *ext4_split_extent(handle_t *handle, } if (map->m_lblk >= ee_block) { - split_flag1 = 0; - if (unwritten) { - split_flag1 |= EXT4_EXT_MARK_UNWRIT1; - split_flag1 |= split_flag & EXT4_EXT_MARK_UNWRIT2; - } path = ext4_split_extent_at(handle, inode, path, map->m_lblk, - split_flag1, flags); + flags); if (IS_ERR(path)) { orig_err = PTR_ERR(path); @@ -3546,6 +3510,11 @@ static struct ext4_ext_path *ext4_split_extent(handle_t *handle, */ if (ext4_split_extent_zeroout(handle, inode, path, map, flags)) return ERR_PTR(orig_err); + + /* zeroout succeeded */ + if (did_zeroout) + *did_zeroout = true; + goto out; } /* There's an error and we can't zeroout, just return the err */ @@ -3596,7 +3565,6 @@ ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode, ext4_lblk_t ee_block, eof_block; unsigned int ee_len, depth, map_len = map->m_len; int err = 0; - int split_flag = 0; unsigned int max_zeroout = 0; ext_debug(inode, "logical block %llu, max_blocks %u\n", @@ -3748,9 +3716,7 @@ ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode, * It is safe to convert extent to initialized via explicit * zeroout only if extent is fully inside i_size or new_size. */ - split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; - - if (EXT4_EXT_MAY_ZEROOUT & split_flag) + if (ee_block + ee_len <= eof_block) max_zeroout = sbi->s_extent_max_zeroout_kb >> (inode->i_sb->s_blocksize_bits - 10); @@ -3805,8 +3771,8 @@ ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode, } fallback: - path = ext4_split_extent(handle, inode, path, &split_map, split_flag, - flags, NULL); + path = ext4_split_convert_extents(handle, inode, &split_map, path, + flags | EXT4_GET_BLOCKS_CONVERT, NULL); if (IS_ERR(path)) return path; out: @@ -3820,6 +3786,26 @@ ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode, return ERR_PTR(err); } +static bool ext4_ext_needs_conv(struct inode *inode, struct ext4_ext_path *path, + int flags) +{ + struct ext4_extent *ex; + bool is_unwrit; + int depth; + + depth = ext_depth(inode); + ex = path[depth].p_ext; + is_unwrit = ext4_ext_is_unwritten(ex); + + if (is_unwrit && (flags & EXT4_GET_BLOCKS_CONVERT)) + return true; + + if (!is_unwrit && (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) + return true; + + return false; +} + /* * This function is called by ext4_ext_map_blocks() from * ext4_get_blocks_dio_write() when DIO to write @@ -3856,7 +3842,9 @@ static struct ext4_ext_path *ext4_split_convert_extents(handle_t *handle, ext4_lblk_t ee_block; struct ext4_extent *ex; unsigned int ee_len; - int split_flag = 0, depth; + int split_flag = 0, depth, err = 0; + bool did_zeroout = false; + bool needs_conv = ext4_ext_needs_conv(inode, path, flags); ext_debug(inode, "logical block %llu, max_blocks %u\n", (unsigned long long)map->m_lblk, map->m_len); @@ -3870,19 +3858,81 @@ static struct ext4_ext_path *ext4_split_convert_extents(handle_t *handle, ee_block = le32_to_cpu(ex->ee_block); ee_len = ext4_ext_get_actual_len(ex); - if (flags & (EXT4_GET_BLOCKS_UNWRIT_EXT | - EXT4_GET_BLOCKS_CONVERT)) { + /* No split needed */ + if (ee_block == map->m_lblk && ee_len == map->m_len) + goto convert; + + /* + * We don't use zeroout fallback for written to unwritten conversion as + * it is not as critical as endio and it might take unusually long. + * Also, it is only safe to convert extent to initialized via explicit + * zeroout only if extent is fully inside i_size or new_size. + */ + if (!(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) + split_flag |= ee_block + ee_len <= eof_block ? + EXT4_EXT_MAY_ZEROOUT : + 0; + + /* + * pass SPLIT_NOMERGE explicitly so we don't end up merging extents we + * just split. + */ + path = ext4_split_extent(handle, inode, path, map, split_flag, + flags | EXT4_GET_BLOCKS_SPLIT_NOMERGE, + allocated, &did_zeroout); + +convert: + /* + * We don't need a conversion if: + * 1. There was an error in split. + * 2. We split via zeroout. + * 3. None of the convert flags were passed. + */ + if (IS_ERR(path) || did_zeroout || !needs_conv) + return path; + + path = ext4_find_extent(inode, map->m_lblk, path, flags); + if (IS_ERR(path)) + return path; + + depth = ext_depth(inode); + ex = path[depth].p_ext; + + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + goto err; + + if (flags & EXT4_GET_BLOCKS_CONVERT) + ext4_ext_mark_initialized(ex); + else if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) + ext4_ext_mark_unwritten(ex); + + if (!(flags & EXT4_GET_BLOCKS_SPLIT_NOMERGE)) /* - * It is safe to convert extent to initialized via explicit - * zeroout only if extent is fully inside i_size or new_size. + * note: ext4_ext_correct_indexes() isn't needed here because + * borders are not changed */ - split_flag |= ee_block + ee_len <= eof_block ? - EXT4_EXT_MAY_ZEROOUT : 0; - split_flag |= EXT4_EXT_MARK_UNWRIT2; + ext4_ext_try_to_merge(handle, inode, path, ex); + + err = ext4_ext_dirty(handle, inode, path + depth); + if (err) + goto err; + + /* Lets update the extent status tree after conversion */ + ext4_es_insert_extent(inode, le32_to_cpu(ex->ee_block), + ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex), + ext4_ext_is_unwritten(ex) ? + EXTENT_STATUS_UNWRITTEN : + EXTENT_STATUS_WRITTEN, + false); + +err: + if (err) { + ext4_free_ext_path(path); + return ERR_PTR(err); } - flags |= EXT4_GET_BLOCKS_SPLIT_NOMERGE; - return ext4_split_extent(handle, inode, path, map, split_flag, flags, - allocated); + + return path; } static struct ext4_ext_path * @@ -3894,7 +3944,6 @@ ext4_convert_unwritten_extents_endio(handle_t *handle, struct inode *inode, ext4_lblk_t ee_block; unsigned int ee_len; int depth; - int err = 0; depth = ext_depth(inode); ex = path[depth].p_ext; @@ -3904,41 +3953,8 @@ ext4_convert_unwritten_extents_endio(handle_t *handle, struct inode *inode, ext_debug(inode, "logical block %llu, max_blocks %u\n", (unsigned long long)ee_block, ee_len); - if (ee_block != map->m_lblk || ee_len > map->m_len) { - path = ext4_split_convert_extents(handle, inode, map, path, - flags, NULL); - if (IS_ERR(path)) - return path; - - path = ext4_find_extent(inode, map->m_lblk, path, 0); - if (IS_ERR(path)) - return path; - depth = ext_depth(inode); - ex = path[depth].p_ext; - } - - err = ext4_ext_get_access(handle, inode, path + depth); - if (err) - goto errout; - /* first mark the extent as initialized */ - ext4_ext_mark_initialized(ex); - - /* note: ext4_ext_correct_indexes() isn't needed here because - * borders are not changed - */ - ext4_ext_try_to_merge(handle, inode, path, ex); - - /* Mark modified extent as dirty */ - err = ext4_ext_dirty(handle, inode, path + path->p_depth); - if (err) - goto errout; - - ext4_ext_show_leaf(inode, path); - return path; - -errout: - ext4_free_ext_path(path); - return ERR_PTR(err); + return ext4_split_convert_extents(handle, inode, map, path, flags, + NULL); } static struct ext4_ext_path * @@ -3952,7 +3968,6 @@ convert_initialized_extent(handle_t *handle, struct inode *inode, ext4_lblk_t ee_block; unsigned int ee_len; int depth; - int err = 0; /* * Make sure that the extent is no bigger than we support with @@ -3969,40 +3984,12 @@ convert_initialized_extent(handle_t *handle, struct inode *inode, ext_debug(inode, "logical block %llu, max_blocks %u\n", (unsigned long long)ee_block, ee_len); - if (ee_block != map->m_lblk || ee_len > map->m_len) { - path = ext4_split_convert_extents(handle, inode, map, path, - flags | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN, NULL); - if (IS_ERR(path)) - return path; - - path = ext4_find_extent(inode, map->m_lblk, path, 0); - if (IS_ERR(path)) - return path; - depth = ext_depth(inode); - ex = path[depth].p_ext; - if (!ex) { - EXT4_ERROR_INODE(inode, "unexpected hole at %lu", - (unsigned long) map->m_lblk); - err = -EFSCORRUPTED; - goto errout; - } - } - - err = ext4_ext_get_access(handle, inode, path + depth); - if (err) - goto errout; - /* first mark the extent as unwritten */ - ext4_ext_mark_unwritten(ex); - - /* note: ext4_ext_correct_indexes() isn't needed here because - * borders are not changed - */ - ext4_ext_try_to_merge(handle, inode, path, ex); + path = ext4_split_convert_extents( + handle, inode, map, path, + flags | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN, NULL); + if (IS_ERR(path)) + return path; - /* Mark modified extent as dirty */ - err = ext4_ext_dirty(handle, inode, path + path->p_depth); - if (err) - goto errout; ext4_ext_show_leaf(inode, path); ext4_update_inode_fsync_trans(handle, inode, 1); @@ -4012,10 +3999,6 @@ convert_initialized_extent(handle_t *handle, struct inode *inode, *allocated = map->m_len; map->m_len = *allocated; return path; - -errout: - ext4_free_ext_path(path); - return ERR_PTR(err); } static struct ext4_ext_path * @@ -5649,7 +5632,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len) struct ext4_extent *extent; ext4_lblk_t start_lblk, len_lblk, ee_start_lblk = 0; unsigned int credits, ee_len; - int ret, depth, split_flag = 0; + int ret, depth; loff_t start; trace_ext4_insert_range(inode, offset, len); @@ -5720,12 +5703,8 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len) */ if ((start_lblk > ee_start_lblk) && (start_lblk < (ee_start_lblk + ee_len))) { - if (ext4_ext_is_unwritten(extent)) - split_flag = EXT4_EXT_MARK_UNWRIT1 | - EXT4_EXT_MARK_UNWRIT2; path = ext4_split_extent_at(handle, inode, path, - start_lblk, split_flag, - EXT4_EX_NOCACHE | + start_lblk, EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_SPLIT_NOMERGE | EXT4_GET_BLOCKS_METADATA_NOFAIL); } -- 2.51.0 Currently, when we are doing an extent split and convert operation of written to unwritten extent (example, as done by ZERO_RANGE), we don't allow the zeroout fallback in case the extent tree manipulation fails. This is mostly because zeroout might take unsually long and the fact that this code path is more tolerant to failures than endio. Since we have zeroout machinery in place, we might as well use it hence lift this restriction. To mitigate zeroout taking too long respect the max zeroout limit here so that the operation finishes relatively fast. Also, add kunit tests for this case. Signed-off-by: Ojaswin Mujoo --- fs/ext4/extents-test.c | 33 +++++++++++++++++++++++++++++++++ fs/ext4/extents.c | 23 +++++++++++++++-------- 2 files changed, 48 insertions(+), 8 deletions(-) diff --git a/fs/ext4/extents-test.c b/fs/ext4/extents-test.c index 725d5e79be96..3b5274297fe9 100644 --- a/fs/ext4/extents-test.c +++ b/fs/ext4/extents-test.c @@ -685,6 +685,39 @@ static const struct kunit_ext_test_param test_split_convert_params[] = { .is_zeroout_test = 1, .nr_exp_data_segs = 1, .exp_data_state = { { .exp_char = 0, .off_blk = 0, .len_blk = 3 } } }, + + /* writ to unwrit splits */ + { .desc = "split writ extent to 2 extents and convert 1st half unwrit (zeroout)", + .is_unwrit_at_start = 0, + .split_flags = EXT4_GET_BLOCKS_CONVERT_UNWRITTEN, + .split_map = { .m_lblk = 10, .m_len = 1 }, + .nr_exp_ext = 1, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 3, .is_unwrit = 0 } }, + .is_zeroout_test = 1, + .nr_exp_data_segs = 2, + .exp_data_state = { { .exp_char = 0, .off_blk = 0, .len_blk = 1 }, + { .exp_char = 'X', .off_blk = 1, .len_blk = 2 }}}, + { .desc = "split writ extent to 2 extents and convert 2nd half unwrit (zeroout)", + .is_unwrit_at_start = 0, + .split_flags = EXT4_GET_BLOCKS_CONVERT_UNWRITTEN, + .split_map = { .m_lblk = 11, .m_len = 2 }, + .nr_exp_ext = 1, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 3, .is_unwrit = 0 } }, + .is_zeroout_test = 1, + .nr_exp_data_segs = 2, + .exp_data_state = { { .exp_char = 'X', .off_blk = 0, .len_blk = 1 }, + { .exp_char = 0, .off_blk = 1, .len_blk = 2 } } }, + { .desc = "split writ extent to 3 extents and convert 2nd half unwrit (zeroout)", + .is_unwrit_at_start = 0, + .split_flags = EXT4_GET_BLOCKS_CONVERT_UNWRITTEN, + .split_map = { .m_lblk = 11, .m_len = 1 }, + .nr_exp_ext = 1, + .exp_ext_state = { { .ex_lblk = 10, .ex_len = 3, .is_unwrit = 0 } }, + .is_zeroout_test = 1, + .nr_exp_data_segs = 3, + .exp_data_state = { { .exp_char = 'X', .off_blk = 0, .len_blk = 1 }, + { .exp_char = 0, .off_blk = 1, .len_blk = 1 }, + { .exp_char = 'X', .off_blk = 2, .len_blk = 1 }}}, }; static const struct kunit_ext_test_param diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 9fb8a3220ae2..95dd88df8fe4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3485,7 +3485,19 @@ static struct ext4_ext_path *ext4_split_extent(handle_t *handle, * to initialize as a last resort */ if (split_flag & EXT4_EXT_MAY_ZEROOUT) { - path = ext4_find_extent(inode, map->m_lblk, NULL, flags); + int max_zeroout_blks = + EXT4_SB(inode->i_sb)->s_extent_max_zeroout_kb >> + (inode->i_sb->s_blocksize_bits - 10); + if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN && + map->m_len > max_zeroout_blks) + /* + * Written to unwritten extent is not a critical path so + * lets respect the max zeroout + */ + return ERR_PTR(orig_err); + + path = ext4_find_extent(inode, map->m_lblk, NULL, + flags); if (IS_ERR(path)) return path; @@ -3863,15 +3875,10 @@ static struct ext4_ext_path *ext4_split_convert_extents(handle_t *handle, goto convert; /* - * We don't use zeroout fallback for written to unwritten conversion as - * it is not as critical as endio and it might take unusually long. - * Also, it is only safe to convert extent to initialized via explicit + * It is only safe to convert extent to initialized via explicit * zeroout only if extent is fully inside i_size or new_size. */ - if (!(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) - split_flag |= ee_block + ee_len <= eof_block ? - EXT4_EXT_MAY_ZEROOUT : - 0; + split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; /* * pass SPLIT_NOMERGE explicitly so we don't end up merging extents we -- 2.51.0