From: Zhang Yi Currently, ext4_es_cache_extent() is used to load extents into the extent status tree when reading on-disk extent blocks. Since it may be called while moving or modifying the extent tree, so it does not overwrite existing extents in the extent status tree and is only used for the initial loading. There are many other places in ext4 where on-disk extents are inserted into the extent status tree, such as in ext4_map_query_blocks(). Currently, they call ext4_es_insert_extent() to perform the insertion, but they don't modify the extents, so ext4_es_cache_extent() would be a more appropriate choice. However, when ext4_map_query_blocks() inserts an extent, it may overwrite a short existing extent of the same type. Therefore, to prepare for the replacements, we need to extend ext4_es_cache_extent() to allow it to overwrite existing extents with the same type. In addition, since cached extents can be more lenient than the extents they modify and do not involve modifying reserved blocks, it is not necessary to ensure that the insertion operation succeeds as strictly as in the ext4_es_insert_extent() function. Signed-off-by: Zhang Yi --- fs/ext4/extents.c | 4 ++-- fs/ext4/extents_status.c | 28 +++++++++++++++++++++------- fs/ext4/extents_status.h | 2 +- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ca5499e9412b..c42ceb5aae37 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -537,12 +537,12 @@ static void ext4_cache_extents(struct inode *inode, if (prev && (prev != lblk)) ext4_es_cache_extent(inode, prev, lblk - prev, ~0, - EXTENT_STATUS_HOLE); + EXTENT_STATUS_HOLE, false); if (ext4_ext_is_unwritten(ex)) status = EXTENT_STATUS_UNWRITTEN; ext4_es_cache_extent(inode, lblk, len, - ext4_ext_pblock(ex), status); + ext4_ext_pblock(ex), status, false); prev = lblk + len; } } diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 31dc0496f8d0..f9546ecf7340 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -986,13 +986,19 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, } /* - * ext4_es_cache_extent() inserts information into the extent status - * tree if and only if there isn't information about the range in - * question already. + * ext4_es_cache_extent() inserts extent information into the extent status + * tree. If 'overwrite' is not set, it inserts extent only if there isn't + * information about the specified range. Otherwise, it overwrites the + * current information. + * + * Note that this interface is only used for caching on-disk extent + * information and cannot be used to convert existing extents in the extent + * status tree. To convert existing extents, use ext4_es_insert_extent() + * instead. */ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk, - unsigned int status) + unsigned int status, bool overwrite) { struct extent_status *es; struct extent_status newes; @@ -1012,10 +1018,18 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, BUG_ON(end < lblk); write_lock(&EXT4_I(inode)->i_es_lock); - es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk); - if (!es || es->es_lblk > end) - __es_insert_extent(inode, &newes, NULL); + if (es && es->es_lblk <= end) { + if (!overwrite) + goto unlock; + + /* Only extents of the same type can be overwritten. */ + WARN_ON_ONCE(ext4_es_type(es) != status); + if (__es_remove_extent(inode, lblk, end, NULL, NULL)) + goto unlock; + } + __es_insert_extent(inode, &newes, NULL); +unlock: write_unlock(&EXT4_I(inode)->i_es_lock); } diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index 8f9c008d11e8..415f7c223a46 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h @@ -139,7 +139,7 @@ extern void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, bool delalloc_reserve_used); extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk, - unsigned int status); + unsigned int status, bool overwrite); extern void ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len); extern void ext4_es_find_extent_range(struct inode *inode, -- 2.46.1 From: Zhang Yi Since ext4_es_cache_extent() can only be used to load on-disk extents and does not permit modifying extents, it is not possible to overwrite an extent of a different type. To prevent misuse of the interface, the current implementation checks only the first existing extent but does not verify all extents within the range to be inserted, as doing so would be time-consuming in highly fragmented scenarios. Furthermore, adding such checks to __es_remove_extent() would complicate its logic. Therefore, a full check can be performed in debug mode to ensure that the function does not overwrite any valuable extents. Signed-off-by: Zhang Yi --- fs/ext4/extents_status.c | 50 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index f9546ecf7340..55103c331b6b 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -985,6 +985,48 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, return; } +#ifdef CONFIG_EXT4_DEBUG +/* + * If we find an extent that already exists during caching extents, its + * status must match the one to be cached. Otherwise, the extent status + * tree may have been corrupted. + */ +static void ext4_es_cache_extent_check(struct inode *inode, + struct extent_status *es, struct extent_status *newes) +{ + unsigned int status = ext4_es_type(newes); + struct rb_node *node; + + if (ext4_es_type(es) != status) + goto conflict; + + while ((node = rb_next(&es->rb_node)) != NULL) { + es = rb_entry(node, struct extent_status, rb_node); + + if (es->es_lblk >= newes->es_lblk + newes->es_len) + break; + if (ext4_es_type(es) != status) + goto conflict; + } + return; + +conflict: + ext4_warning_inode(inode, + "ES cache extent failed: add [%d,%d,%llu,0x%x] conflict with existing [%d,%d,%llu,0x%x]\n", + newes->es_lblk, newes->es_len, ext4_es_pblock(newes), + ext4_es_status(newes), es->es_lblk, es->es_len, + ext4_es_pblock(es), ext4_es_status(es)); + + WARN_ON_ONCE(1); +} +#else +static void ext4_es_cache_extent_check(struct inode __maybe_unused *inode, + struct extent_status *es, struct extent_status *newes) +{ + WARN_ON_ONCE(ext4_es_type(es) != ext4_es_type(newes)); +} +#endif + /* * ext4_es_cache_extent() inserts extent information into the extent status * tree. If 'overwrite' is not set, it inserts extent only if there isn't @@ -1022,9 +1064,11 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, if (es && es->es_lblk <= end) { if (!overwrite) goto unlock; - - /* Only extents of the same type can be overwritten. */ - WARN_ON_ONCE(ext4_es_type(es) != status); + /* + * Check whether the overwrites are safe. Only extents + * of the same type can be overwritten. + */ + ext4_es_cache_extent_check(inode, es, &newes); if (__es_remove_extent(inode, lblk, end, NULL, NULL)) goto unlock; } -- 2.46.1 From: Zhang Yi Print a trace point after successfully inserting an extent in the ext4_es_cache_extent() function. Additionally, similar to other extent cache operation functions, call ext4_print_pending_tree() to display the extent debug information of the inode when in ES_DEBUG mode. Signed-off-by: Zhang Yi --- fs/ext4/extents_status.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 55103c331b6b..ae25a3888de4 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -1052,7 +1052,6 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, newes.es_lblk = lblk; newes.es_len = len; ext4_es_store_pblock_status(&newes, pblk, status); - trace_ext4_es_cache_extent(inode, &newes); if (!len) return; @@ -1073,6 +1072,8 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, goto unlock; } __es_insert_extent(inode, &newes, NULL); + trace_ext4_es_cache_extent(inode, &newes); + ext4_es_print_tree(inode); unlock: write_unlock(&EXT4_I(inode)->i_es_lock); } -- 2.46.1 From: Zhang Yi In ext4, the remaining places for inserting extents into the extent status tree within ext4_ext_determine_insert_hole() and ext4_map_query_blocks() directly cache on-disk extents. We can use ext4_es_cache_extent() instead of ext4_es_insert_extent() in these cases. This will help reduce unnecessary increases in extent sequence numbers and cache invalidations after supporting IOMAP in the future. Signed-off-by: Zhang Yi --- fs/ext4/extents.c | 4 ++-- fs/ext4/inode.c | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c42ceb5aae37..7dc80141350d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4160,8 +4160,8 @@ static ext4_lblk_t ext4_ext_determine_insert_hole(struct inode *inode, insert_hole: /* Put just found gap into cache to speed up subsequent requests */ ext_debug(inode, " -> %u:%u\n", hole_start, len); - ext4_es_insert_extent(inode, hole_start, len, ~0, - EXTENT_STATUS_HOLE, false); + ext4_es_cache_extent(inode, hole_start, len, ~0, + EXTENT_STATUS_HOLE, true); /* Update hole_len to reflect hole size after lblk */ if (hole_start != lblk) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e99306a8f47c..a3c37de552e9 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -504,8 +504,8 @@ static int ext4_map_query_blocks_next_in_leaf(handle_t *handle, retval = ext4_ext_map_blocks(handle, inode, &map2, 0); if (retval <= 0) { - ext4_es_insert_extent(inode, map->m_lblk, map->m_len, - map->m_pblk, status, false); + ext4_es_cache_extent(inode, map->m_lblk, map->m_len, + map->m_pblk, status, true); return map->m_len; } @@ -526,13 +526,13 @@ static int ext4_map_query_blocks_next_in_leaf(handle_t *handle, */ if (map->m_pblk + map->m_len == map2.m_pblk && status == status2) { - ext4_es_insert_extent(inode, map->m_lblk, - map->m_len + map2.m_len, map->m_pblk, - status, false); + ext4_es_cache_extent(inode, map->m_lblk, + map->m_len + map2.m_len, map->m_pblk, + status, true); map->m_len += map2.m_len; } else { - ext4_es_insert_extent(inode, map->m_lblk, map->m_len, - map->m_pblk, status, false); + ext4_es_cache_extent(inode, map->m_lblk, map->m_len, + map->m_pblk, status, true); } return map->m_len; @@ -571,8 +571,8 @@ static int ext4_map_query_blocks(handle_t *handle, struct inode *inode, map->m_len == orig_mlen) { status = map->m_flags & EXT4_MAP_UNWRITTEN ? EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; - ext4_es_insert_extent(inode, map->m_lblk, map->m_len, - map->m_pblk, status, false); + ext4_es_cache_extent(inode, map->m_lblk, map->m_len, + map->m_pblk, status, true); return retval; } -- 2.46.1