Move the iomap_readpage_iter() async bio read logic into a separate helper function. This is needed to make iomap read/readahead more generically usable, especially for filesystems that do not require CONFIG_BLOCK. Signed-off-by: Joanne Koong --- fs/iomap/buffered-io.c | 68 ++++++++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 26 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index fd827398afd2..13854fb6ad86 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -357,36 +357,21 @@ struct iomap_readpage_ctx { struct readahead_control *rac; }; -static int iomap_readpage_iter(struct iomap_iter *iter, - struct iomap_readpage_ctx *ctx) +/** + * Read in a folio range asynchronously through bios. + * + * This should only be used for read/readahead, not for buffered writes. + * Buffered writes must read in the folio synchronously. + */ +static void iomap_read_folio_range_bio_async(const struct iomap_iter *iter, + struct iomap_readpage_ctx *ctx, loff_t pos, size_t plen) { + struct folio *folio = ctx->cur_folio; const struct iomap *iomap = &iter->iomap; - loff_t pos = iter->pos; + struct iomap_folio_state *ifs = folio->private; + size_t poff = offset_in_folio(folio, pos); loff_t length = iomap_length(iter); - struct folio *folio = ctx->cur_folio; - struct iomap_folio_state *ifs; - size_t poff, plen; sector_t sector; - int ret; - - if (iomap->type == IOMAP_INLINE) { - ret = iomap_read_inline_data(iter, folio); - if (ret) - return ret; - return iomap_iter_advance(iter, &length); - } - - /* zero post-eof blocks as the page may be mapped */ - ifs = ifs_alloc(iter->inode, folio, iter->flags); - iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen); - if (plen == 0) - goto done; - - if (iomap_block_needs_zeroing(iter, pos)) { - folio_zero_range(folio, poff, plen); - iomap_set_range_uptodate(folio, poff, plen); - goto done; - } ctx->cur_folio_in_bio = true; if (ifs) { @@ -425,6 +410,37 @@ static int iomap_readpage_iter(struct iomap_iter *iter, ctx->bio->bi_end_io = iomap_read_end_io; bio_add_folio_nofail(ctx->bio, folio, plen, poff); } +} + +static int iomap_readpage_iter(struct iomap_iter *iter, + struct iomap_readpage_ctx *ctx) +{ + const struct iomap *iomap = &iter->iomap; + loff_t pos = iter->pos; + loff_t length = iomap_length(iter); + struct folio *folio = ctx->cur_folio; + size_t poff, plen; + int ret; + + if (iomap->type == IOMAP_INLINE) { + ret = iomap_read_inline_data(iter, folio); + if (ret) + return ret; + return iomap_iter_advance(iter, &length); + } + + /* zero post-eof blocks as the page may be mapped */ + ifs_alloc(iter->inode, folio, iter->flags); + iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen); + if (plen == 0) + goto done; + + if (iomap_block_needs_zeroing(iter, pos)) { + folio_zero_range(folio, poff, plen); + iomap_set_range_uptodate(folio, poff, plen); + } else { + iomap_read_folio_range_bio_async(iter, ctx, pos, plen); + } done: /* -- 2.47.3 Move the read/readahead bio submission logic into a separate helper This is needed to make iomap read/readahead more generically usable, especially for filesystems that do not require CONFIG_BLOCK. Signed-off-by: Joanne Koong --- fs/iomap/buffered-io.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 13854fb6ad86..a3b02ed5328f 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -357,6 +357,14 @@ struct iomap_readpage_ctx { struct readahead_control *rac; }; +static void iomap_submit_read_bio(struct iomap_readpage_ctx *ctx) +{ + struct bio *bio = ctx->bio; + + if (bio) + submit_bio(bio); +} + /** * Read in a folio range asynchronously through bios. * @@ -388,8 +396,7 @@ static void iomap_read_folio_range_bio_async(const struct iomap_iter *iter, gfp_t orig_gfp = gfp; unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE); - if (ctx->bio) - submit_bio(ctx->bio); + iomap_submit_read_bio(ctx); if (ctx->rac) /* same as readahead_gfp_mask */ gfp |= __GFP_NORETRY | __GFP_NOWARN; @@ -484,13 +491,10 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) while ((ret = iomap_iter(&iter, ops)) > 0) iter.status = iomap_read_folio_iter(&iter, &ctx); - if (ctx.bio) { - submit_bio(ctx.bio); - WARN_ON_ONCE(!ctx.cur_folio_in_bio); - } else { - WARN_ON_ONCE(ctx.cur_folio_in_bio); + iomap_submit_read_bio(&ctx); + + if (!ctx.cur_folio_in_bio) folio_unlock(folio); - } /* * Just like mpage_readahead and block_read_full_folio, we always @@ -556,12 +560,10 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) while (iomap_iter(&iter, ops) > 0) iter.status = iomap_readahead_iter(&iter, &ctx); - if (ctx.bio) - submit_bio(ctx.bio); - if (ctx.cur_folio) { - if (!ctx.cur_folio_in_bio) - folio_unlock(ctx.cur_folio); - } + iomap_submit_read_bio(&ctx); + + if (ctx.cur_folio && !ctx.cur_folio_in_bio) + folio_unlock(ctx.cur_folio); } EXPORT_SYMBOL_GPL(iomap_readahead); -- 2.47.3 The purpose of struct iomap_readpage_ctx's cur_folio_in_bio is to track whether the folio is owned by the bio (where thus the bio is responsible for unlocking the folio) or if it needs to be unlocked by iomap. Rename this to folio_owned to make the purpose more clear and so that when iomap read/readahead logic is made generic, the name also makes sense for filesystems that don't use bios. Signed-off-by: Joanne Koong --- fs/iomap/buffered-io.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index a3b02ed5328f..598998269107 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -352,7 +352,12 @@ static void iomap_read_end_io(struct bio *bio) struct iomap_readpage_ctx { struct folio *cur_folio; - bool cur_folio_in_bio; + /* + * Is the folio owned by this readpage context, or by some + * external IO helper? Either way, the owner of the folio is + * responsible for unlocking it when the read completes. + */ + bool folio_owned; struct bio *bio; struct readahead_control *rac; }; @@ -381,7 +386,7 @@ static void iomap_read_folio_range_bio_async(const struct iomap_iter *iter, loff_t length = iomap_length(iter); sector_t sector; - ctx->cur_folio_in_bio = true; + ctx->folio_owned = true; if (ifs) { spin_lock_irq(&ifs->state_lock); ifs->read_bytes_pending += plen; @@ -493,7 +498,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) iomap_submit_read_bio(&ctx); - if (!ctx.cur_folio_in_bio) + if (!ctx.folio_owned) folio_unlock(folio); /* @@ -513,13 +518,13 @@ static int iomap_readahead_iter(struct iomap_iter *iter, while (iomap_length(iter)) { if (ctx->cur_folio && offset_in_folio(ctx->cur_folio, iter->pos) == 0) { - if (!ctx->cur_folio_in_bio) + if (!ctx->folio_owned) folio_unlock(ctx->cur_folio); ctx->cur_folio = NULL; } if (!ctx->cur_folio) { ctx->cur_folio = readahead_folio(ctx->rac); - ctx->cur_folio_in_bio = false; + ctx->folio_owned = false; } ret = iomap_readpage_iter(iter, ctx); if (ret) @@ -562,7 +567,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) iomap_submit_read_bio(&ctx); - if (ctx.cur_folio && !ctx.cur_folio_in_bio) + if (ctx.cur_folio && !ctx.folio_owned) folio_unlock(ctx.cur_folio); } EXPORT_SYMBOL_GPL(iomap_readahead); -- 2.47.3 Store the iomap_readpage_ctx bio generically as a "void *private". This makes the read/readahead interface more generic, which allows it to be used by filesystems that may not be block-based and may not have CONFIG_BLOCK set. Signed-off-by: Joanne Koong --- fs/iomap/buffered-io.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 598998269107..a83a94bc0be9 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -358,13 +358,13 @@ struct iomap_readpage_ctx { * responsible for unlocking it when the read completes. */ bool folio_owned; - struct bio *bio; + void *private; struct readahead_control *rac; }; static void iomap_submit_read_bio(struct iomap_readpage_ctx *ctx) { - struct bio *bio = ctx->bio; + struct bio *bio = ctx->private; if (bio) submit_bio(bio); @@ -385,6 +385,7 @@ static void iomap_read_folio_range_bio_async(const struct iomap_iter *iter, size_t poff = offset_in_folio(folio, pos); loff_t length = iomap_length(iter); sector_t sector; + struct bio *bio = ctx->private; ctx->folio_owned = true; if (ifs) { @@ -394,9 +395,8 @@ static void iomap_read_folio_range_bio_async(const struct iomap_iter *iter, } sector = iomap_sector(iomap, pos); - if (!ctx->bio || - bio_end_sector(ctx->bio) != sector || - !bio_add_folio(ctx->bio, folio, plen, poff)) { + if (!bio || bio_end_sector(bio) != sector || + !bio_add_folio(bio, folio, plen, poff)) { gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL); gfp_t orig_gfp = gfp; unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE); @@ -405,22 +405,21 @@ static void iomap_read_folio_range_bio_async(const struct iomap_iter *iter, if (ctx->rac) /* same as readahead_gfp_mask */ gfp |= __GFP_NORETRY | __GFP_NOWARN; - ctx->bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs), + bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs), REQ_OP_READ, gfp); /* * If the bio_alloc fails, try it again for a single page to * avoid having to deal with partial page reads. This emulates * what do_mpage_read_folio does. */ - if (!ctx->bio) { - ctx->bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ, - orig_gfp); - } + if (!bio) + bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ, orig_gfp); if (ctx->rac) - ctx->bio->bi_opf |= REQ_RAHEAD; - ctx->bio->bi_iter.bi_sector = sector; - ctx->bio->bi_end_io = iomap_read_end_io; - bio_add_folio_nofail(ctx->bio, folio, plen, poff); + bio->bi_opf |= REQ_RAHEAD; + bio->bi_iter.bi_sector = sector; + bio->bi_end_io = iomap_read_end_io; + bio_add_folio_nofail(bio, folio, plen, poff); + ctx->private = bio; } } -- 2.47.3 Propagate any error encountered in iomap_read_folio() back up to its caller (otherwise a default -EIO will be passed up by filemap_read_folio() to callers). This is standard behavior for how other filesystems handle their ->read_folio() errors as well. Remove the out of date comment about setting the folio error flag. Folio error flags were removed in commit 1f56eedf7ff7 ("iomap: Remove calls to set and clear folio error flag"). Signed-off-by: Joanne Koong Reviewed-by: "Darrick J. Wong" --- fs/iomap/buffered-io.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index a83a94bc0be9..51d204f0e077 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -500,12 +500,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) if (!ctx.folio_owned) folio_unlock(folio); - /* - * Just like mpage_readahead and block_read_full_folio, we always - * return 0 and just set the folio error flag on errors. This - * should be cleaned up throughout the stack eventually. - */ - return 0; + return ret; } EXPORT_SYMBOL_GPL(iomap_read_folio); -- 2.47.3 Iterate over all non-uptodate ranges in a single call to iomap_readpage_iter() instead of leaving the partial folio iteration to the caller. This will be useful for supporting caller-provided async folio read callbacks (added in later commit) because that will require tracking when the first and last async read request for a folio is sent, in order to prevent premature read completion of the folio. This additionally makes the iomap_readahead_iter() logic a bit simpler. Signed-off-by: Joanne Koong --- fs/iomap/buffered-io.c | 67 ++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 38 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 51d204f0e077..fc8fa24ae7db 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -431,6 +431,7 @@ static int iomap_readpage_iter(struct iomap_iter *iter, loff_t length = iomap_length(iter); struct folio *folio = ctx->cur_folio; size_t poff, plen; + loff_t count; int ret; if (iomap->type == IOMAP_INLINE) { @@ -442,39 +443,29 @@ static int iomap_readpage_iter(struct iomap_iter *iter, /* zero post-eof blocks as the page may be mapped */ ifs_alloc(iter->inode, folio, iter->flags); - iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen); - if (plen == 0) - goto done; - if (iomap_block_needs_zeroing(iter, pos)) { - folio_zero_range(folio, poff, plen); - iomap_set_range_uptodate(folio, poff, plen); - } else { - iomap_read_folio_range_bio_async(iter, ctx, pos, plen); - } - -done: - /* - * Move the caller beyond our range so that it keeps making progress. - * For that, we have to include any leading non-uptodate ranges, but - * we can skip trailing ones as they will be handled in the next - * iteration. - */ - length = pos - iter->pos + plen; - return iomap_iter_advance(iter, &length); -} + length = min_t(loff_t, length, + folio_size(folio) - offset_in_folio(folio, pos)); + while (length) { + iomap_adjust_read_range(iter->inode, folio, &pos, + length, &poff, &plen); + count = pos - iter->pos + plen; + if (plen == 0) + return iomap_iter_advance(iter, &count); -static int iomap_read_folio_iter(struct iomap_iter *iter, - struct iomap_readpage_ctx *ctx) -{ - int ret; + if (iomap_block_needs_zeroing(iter, pos)) { + folio_zero_range(folio, poff, plen); + iomap_set_range_uptodate(folio, poff, plen); + } else { + iomap_read_folio_range_bio_async(iter, ctx, pos, plen); + } - while (iomap_length(iter)) { - ret = iomap_readpage_iter(iter, ctx); + length -= count; + ret = iomap_iter_advance(iter, &count); if (ret) return ret; + pos = iter->pos; } - return 0; } @@ -493,7 +484,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) trace_iomap_readpage(iter.inode, 1); while ((ret = iomap_iter(&iter, ops)) > 0) - iter.status = iomap_read_folio_iter(&iter, &ctx); + iter.status = iomap_readpage_iter(&iter, &ctx); iomap_submit_read_bio(&ctx); @@ -510,16 +501,16 @@ static int iomap_readahead_iter(struct iomap_iter *iter, int ret; while (iomap_length(iter)) { - if (ctx->cur_folio && - offset_in_folio(ctx->cur_folio, iter->pos) == 0) { - if (!ctx->folio_owned) - folio_unlock(ctx->cur_folio); - ctx->cur_folio = NULL; - } - if (!ctx->cur_folio) { - ctx->cur_folio = readahead_folio(ctx->rac); - ctx->folio_owned = false; - } + if (ctx->cur_folio && !ctx->folio_owned) + folio_unlock(ctx->cur_folio); + ctx->cur_folio = readahead_folio(ctx->rac); + /* + * We should never in practice hit this case since + * the iter length matches the readahead length. + */ + if (WARN_ON_ONCE(!ctx->cur_folio)) + return -EINVAL; + ctx->folio_owned = false; ret = iomap_readpage_iter(iter, ctx); if (ret) return ret; -- 2.47.3 ->readpage was deprecated and reads are now on folios. Signed-off-by: Joanne Koong Reviewed-by: "Darrick J. Wong" --- fs/iomap/buffered-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index fc8fa24ae7db..c376a793e4c5 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -423,7 +423,7 @@ static void iomap_read_folio_range_bio_async(const struct iomap_iter *iter, } } -static int iomap_readpage_iter(struct iomap_iter *iter, +static int iomap_read_folio_iter(struct iomap_iter *iter, struct iomap_readpage_ctx *ctx) { const struct iomap *iomap = &iter->iomap; @@ -484,7 +484,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) trace_iomap_readpage(iter.inode, 1); while ((ret = iomap_iter(&iter, ops)) > 0) - iter.status = iomap_readpage_iter(&iter, &ctx); + iter.status = iomap_read_folio_iter(&iter, &ctx); iomap_submit_read_bio(&ctx); @@ -511,7 +511,7 @@ static int iomap_readahead_iter(struct iomap_iter *iter, if (WARN_ON_ONCE(!ctx->cur_folio)) return -EINVAL; ctx->folio_owned = false; - ret = iomap_readpage_iter(iter, ctx); + ret = iomap_read_folio_iter(iter, ctx); if (ret) return ret; } -- 2.47.3 ->readpage was deprecated and reads are now on folios. Signed-off-by: Joanne Koong Reviewed-by: "Darrick J. Wong" --- fs/iomap/buffered-io.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index c376a793e4c5..008042108c68 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -350,7 +350,7 @@ static void iomap_read_end_io(struct bio *bio) bio_put(bio); } -struct iomap_readpage_ctx { +struct iomap_read_folio_ctx { struct folio *cur_folio; /* * Is the folio owned by this readpage context, or by some @@ -362,7 +362,7 @@ struct iomap_readpage_ctx { struct readahead_control *rac; }; -static void iomap_submit_read_bio(struct iomap_readpage_ctx *ctx) +static void iomap_submit_read_bio(struct iomap_read_folio_ctx *ctx) { struct bio *bio = ctx->private; @@ -377,7 +377,7 @@ static void iomap_submit_read_bio(struct iomap_readpage_ctx *ctx) * Buffered writes must read in the folio synchronously. */ static void iomap_read_folio_range_bio_async(const struct iomap_iter *iter, - struct iomap_readpage_ctx *ctx, loff_t pos, size_t plen) + struct iomap_read_folio_ctx *ctx, loff_t pos, size_t plen) { struct folio *folio = ctx->cur_folio; const struct iomap *iomap = &iter->iomap; @@ -424,7 +424,7 @@ static void iomap_read_folio_range_bio_async(const struct iomap_iter *iter, } static int iomap_read_folio_iter(struct iomap_iter *iter, - struct iomap_readpage_ctx *ctx) + struct iomap_read_folio_ctx *ctx) { const struct iomap *iomap = &iter->iomap; loff_t pos = iter->pos; @@ -476,7 +476,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) .pos = folio_pos(folio), .len = folio_size(folio), }; - struct iomap_readpage_ctx ctx = { + struct iomap_read_folio_ctx ctx = { .cur_folio = folio, }; int ret; @@ -496,7 +496,7 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) EXPORT_SYMBOL_GPL(iomap_read_folio); static int iomap_readahead_iter(struct iomap_iter *iter, - struct iomap_readpage_ctx *ctx) + struct iomap_read_folio_ctx *ctx) { int ret; @@ -541,7 +541,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) .pos = readahead_pos(rac), .len = readahead_length(rac), }; - struct iomap_readpage_ctx ctx = { + struct iomap_read_folio_ctx ctx = { .rac = rac, }; -- 2.47.3 Move ifs read_bytes_pending increment logic into a separate helper, iomap_start_folio_read(), which will be needed later on by caller-provided read callbacks (added in a later commit) for read/readahead. This is the counterpart to the currently existing iomap_finish_folio_read(). Make iomap_start_folio_read() and iomap_finish_folio_read() publicly accessible. These need to be accessible in order for caller-provided read callbacks to use. Signed-off-by: Joanne Koong Reviewed-by: "Darrick J. Wong" --- fs/iomap/buffered-io.c | 26 +++++++++++++++++--------- include/linux/iomap.h | 3 +++ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 008042108c68..50de09426c96 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -317,9 +317,20 @@ static int iomap_read_inline_data(const struct iomap_iter *iter, return 0; } -#ifdef CONFIG_BLOCK -static void iomap_finish_folio_read(struct folio *folio, size_t off, - size_t len, int error) +void iomap_start_folio_read(struct folio *folio, size_t len) +{ + struct iomap_folio_state *ifs = folio->private; + + if (ifs) { + spin_lock_irq(&ifs->state_lock); + ifs->read_bytes_pending += len; + spin_unlock_irq(&ifs->state_lock); + } +} +EXPORT_SYMBOL_GPL(iomap_start_folio_read); + +void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, + int error) { struct iomap_folio_state *ifs = folio->private; bool uptodate = !error; @@ -339,7 +350,9 @@ static void iomap_finish_folio_read(struct folio *folio, size_t off, if (finished) folio_end_read(folio, uptodate); } +EXPORT_SYMBOL_GPL(iomap_finish_folio_read); +#ifdef CONFIG_BLOCK static void iomap_read_end_io(struct bio *bio) { int error = blk_status_to_errno(bio->bi_status); @@ -381,18 +394,13 @@ static void iomap_read_folio_range_bio_async(const struct iomap_iter *iter, { struct folio *folio = ctx->cur_folio; const struct iomap *iomap = &iter->iomap; - struct iomap_folio_state *ifs = folio->private; size_t poff = offset_in_folio(folio, pos); loff_t length = iomap_length(iter); sector_t sector; struct bio *bio = ctx->private; ctx->folio_owned = true; - if (ifs) { - spin_lock_irq(&ifs->state_lock); - ifs->read_bytes_pending += plen; - spin_unlock_irq(&ifs->state_lock); - } + iomap_start_folio_read(folio, plen); sector = iomap_sector(iomap, pos); if (!bio || bio_end_sector(bio) != sector || diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 73dceabc21c8..0938c4a57f4c 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -467,6 +467,9 @@ ssize_t iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct folio *folio, loff_t pos, loff_t end_pos, unsigned int dirty_len); int iomap_ioend_writeback_submit(struct iomap_writepage_ctx *wpc, int error); +void iomap_start_folio_read(struct folio *folio, size_t len); +void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, + int error); void iomap_start_folio_write(struct inode *inode, struct folio *folio, size_t len); void iomap_finish_folio_write(struct inode *inode, struct folio *folio, -- 2.47.3 struct iomap_read_folio_ctx will be made a public interface when read/readahead takes in caller-provided callbacks. To make the interface simpler for end users, keep track of the folio ownership state internally instead of exposing it in struct iomap_read_folio_ctx. Signed-off-by: Joanne Koong --- fs/iomap/buffered-io.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 50de09426c96..d38459740180 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -365,12 +365,6 @@ static void iomap_read_end_io(struct bio *bio) struct iomap_read_folio_ctx { struct folio *cur_folio; - /* - * Is the folio owned by this readpage context, or by some - * external IO helper? Either way, the owner of the folio is - * responsible for unlocking it when the read completes. - */ - bool folio_owned; void *private; struct readahead_control *rac; }; @@ -399,7 +393,6 @@ static void iomap_read_folio_range_bio_async(const struct iomap_iter *iter, sector_t sector; struct bio *bio = ctx->private; - ctx->folio_owned = true; iomap_start_folio_read(folio, plen); sector = iomap_sector(iomap, pos); @@ -432,7 +425,7 @@ static void iomap_read_folio_range_bio_async(const struct iomap_iter *iter, } static int iomap_read_folio_iter(struct iomap_iter *iter, - struct iomap_read_folio_ctx *ctx) + struct iomap_read_folio_ctx *ctx, bool *cur_folio_owned) { const struct iomap *iomap = &iter->iomap; loff_t pos = iter->pos; @@ -465,6 +458,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, folio_zero_range(folio, poff, plen); iomap_set_range_uptodate(folio, poff, plen); } else { + *cur_folio_owned = true; iomap_read_folio_range_bio_async(iter, ctx, pos, plen); } @@ -487,16 +481,22 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) struct iomap_read_folio_ctx ctx = { .cur_folio = folio, }; + /* + * If an external IO helper takes ownership of the folio, + * it is responsible for unlocking it when the read completes. + */ + bool cur_folio_owned = false; int ret; trace_iomap_readpage(iter.inode, 1); while ((ret = iomap_iter(&iter, ops)) > 0) - iter.status = iomap_read_folio_iter(&iter, &ctx); + iter.status = iomap_read_folio_iter(&iter, &ctx, + &cur_folio_owned); iomap_submit_read_bio(&ctx); - if (!ctx.folio_owned) + if (!cur_folio_owned) folio_unlock(folio); return ret; @@ -504,12 +504,13 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) EXPORT_SYMBOL_GPL(iomap_read_folio); static int iomap_readahead_iter(struct iomap_iter *iter, - struct iomap_read_folio_ctx *ctx) + struct iomap_read_folio_ctx *ctx, + bool *cur_folio_owned) { int ret; while (iomap_length(iter)) { - if (ctx->cur_folio && !ctx->folio_owned) + if (ctx->cur_folio && !*cur_folio_owned) folio_unlock(ctx->cur_folio); ctx->cur_folio = readahead_folio(ctx->rac); /* @@ -518,8 +519,8 @@ static int iomap_readahead_iter(struct iomap_iter *iter, */ if (WARN_ON_ONCE(!ctx->cur_folio)) return -EINVAL; - ctx->folio_owned = false; - ret = iomap_read_folio_iter(iter, ctx); + *cur_folio_owned = false; + ret = iomap_read_folio_iter(iter, ctx, cur_folio_owned); if (ret) return ret; } @@ -552,15 +553,21 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) struct iomap_read_folio_ctx ctx = { .rac = rac, }; + /* + * If an external IO helper takes ownership of the folio, + * it is responsible for unlocking it when the read completes. + */ + bool cur_folio_owned = false; trace_iomap_readahead(rac->mapping->host, readahead_count(rac)); while (iomap_iter(&iter, ops) > 0) - iter.status = iomap_readahead_iter(&iter, &ctx); + iter.status = iomap_readahead_iter(&iter, &ctx, + &cur_folio_owned); iomap_submit_read_bio(&ctx); - if (ctx.cur_folio && !ctx.folio_owned) + if (ctx.cur_folio && !cur_folio_owned) folio_unlock(ctx.cur_folio); } EXPORT_SYMBOL_GPL(iomap_readahead); -- 2.47.3 Add caller-provided callbacks for read and readahead so that it can be used generically, especially by filesystems that are not block-based. In particular, this: * Modifies the read and readahead interface to take in a struct iomap_read_folio_ctx that is publicly defined as: struct iomap_read_folio_ctx { const struct iomap_read_ops *ops; struct folio *cur_folio; struct readahead_control *rac; void *private; }; where struct iomap_read_ops is defined as: struct iomap_read_ops { int (*read_folio_range)(const struct iomap_iter *iter, struct iomap_read_folio_ctx *ctx, loff_t pos, size_t len); int (*read_submit)(struct iomap_read_folio_ctx *ctx); }; read_folio_range() reads in the folio range and is required by the caller to provide. read_submit() is optional and is used for submitting any pending read requests. iomap_read_folio() must set ops->read_folio_range() and cur_folio, and iomap_readahead() must set ops->read_folio_range() and rac. * Modifies existing filesystems that use iomap for read and readahead to use the new API. There is no change in functionality for these filesystems. Signed-off-by: Joanne Koong --- .../filesystems/iomap/operations.rst | 42 ++++++++++++++ block/fops.c | 14 ++++- fs/erofs/data.c | 14 ++++- fs/gfs2/aops.c | 21 +++++-- fs/iomap/buffered-io.c | 58 ++++++++++--------- fs/xfs/xfs_aops.c | 14 ++++- fs/zonefs/file.c | 14 ++++- include/linux/iomap.h | 42 +++++++++++++- 8 files changed, 178 insertions(+), 41 deletions(-) diff --git a/Documentation/filesystems/iomap/operations.rst b/Documentation/filesystems/iomap/operations.rst index 067ed8e14ef3..be890192287c 100644 --- a/Documentation/filesystems/iomap/operations.rst +++ b/Documentation/filesystems/iomap/operations.rst @@ -135,6 +135,30 @@ These ``struct kiocb`` flags are significant for buffered I/O with iomap: * ``IOCB_DONTCACHE``: Turns on ``IOMAP_DONTCACHE``. +``struct iomap_read_ops`` +-------------------------- + +.. code-block:: c + + struct iomap_read_ops { + int (*read_folio_range)(const struct iomap_iter *iter, + struct iomap_read_folio_ctx *ctx, loff_t pos, + size_t len); + int (*read_submit)(struct iomap_read_folio_ctx *ctx); + }; + +iomap calls these functions: + + - ``read_folio_range``: Called to read in the range (read can be done + synchronously or asynchronously). This must be provided by the caller. + The caller is responsible for calling iomap_start_folio_read() and + iomap_finish_folio_read() before and after reading the folio range. This + should be done even if an error is encountered during the read. This + returns 0 on success or a negative error on failure. + + - ``read_submit``: Submit any pending read requests. This function is + optional. This returns 0 on success or a negative error on failure. + Internal per-Folio State ------------------------ @@ -182,6 +206,24 @@ The ``flags`` argument to ``->iomap_begin`` will be set to zero. The pagecache takes whatever locks it needs before calling the filesystem. +Both ``iomap_readahead`` and ``iomap_read_folio`` pass in a ``struct +iomap_read_folio_ctx``: + +.. code-block:: c + + struct iomap_read_folio_ctx { + const struct iomap_read_ops *ops; + struct folio *cur_folio; + struct readahead_control *rac; + void *private; + }; + +``iomap_readahead`` must set ``ops->read_folio_range()`` and ``rac``. +``iomap_read_folio`` must set ``ops->read_folio_range()`` and ``cur_folio``. +Both can optionally set ``ops->read_submit()`` and/or ``private``. ``private`` +is used to pass in any custom data the caller needs accessible in the ops +callbacks. + Buffered Writes --------------- diff --git a/block/fops.c b/block/fops.c index ddbc69c0922b..00d9728a9b08 100644 --- a/block/fops.c +++ b/block/fops.c @@ -533,12 +533,22 @@ const struct address_space_operations def_blk_aops = { #else /* CONFIG_BUFFER_HEAD */ static int blkdev_read_folio(struct file *file, struct folio *folio) { - return iomap_read_folio(folio, &blkdev_iomap_ops); + struct iomap_read_folio_ctx ctx = { + .ops = &iomap_read_bios_ops, + .cur_folio = folio, + }; + + return iomap_read_folio(&blkdev_iomap_ops, &ctx); } static void blkdev_readahead(struct readahead_control *rac) { - iomap_readahead(rac, &blkdev_iomap_ops); + struct iomap_read_folio_ctx ctx = { + .ops = &iomap_read_bios_ops, + .rac = rac, + }; + + iomap_readahead(&blkdev_iomap_ops, &ctx); } static ssize_t blkdev_writeback_range(struct iomap_writepage_ctx *wpc, diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 3b1ba571c728..3f27db03310d 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -369,17 +369,27 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, */ static int erofs_read_folio(struct file *file, struct folio *folio) { + struct iomap_read_folio_ctx ctx = { + .ops = &iomap_read_bios_ops, + .cur_folio = folio, + }; + trace_erofs_read_folio(folio, true); - return iomap_read_folio(folio, &erofs_iomap_ops); + return iomap_read_folio(&erofs_iomap_ops, &ctx); } static void erofs_readahead(struct readahead_control *rac) { + struct iomap_read_folio_ctx ctx = { + .ops = &iomap_read_bios_ops, + .rac = rac, + }; + trace_erofs_readahead(rac->mapping->host, readahead_index(rac), readahead_count(rac), true); - return iomap_readahead(rac, &erofs_iomap_ops); + return iomap_readahead(&erofs_iomap_ops, &ctx); } static sector_t erofs_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 47d74afd63ac..1a8567a41f03 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -428,7 +428,12 @@ static int gfs2_read_folio(struct file *file, struct folio *folio) if (!gfs2_is_jdata(ip) || (i_blocksize(inode) == PAGE_SIZE && !folio_buffers(folio))) { - error = iomap_read_folio(folio, &gfs2_iomap_ops); + struct iomap_read_folio_ctx ctx = { + .ops = &iomap_read_bios_ops, + .cur_folio = folio, + }; + + error = iomap_read_folio(&gfs2_iomap_ops, &ctx); } else if (gfs2_is_stuffed(ip)) { error = stuffed_read_folio(ip, folio); } else { @@ -498,12 +503,18 @@ static void gfs2_readahead(struct readahead_control *rac) struct inode *inode = rac->mapping->host; struct gfs2_inode *ip = GFS2_I(inode); - if (gfs2_is_stuffed(ip)) + if (gfs2_is_stuffed(ip)) { ; - else if (gfs2_is_jdata(ip)) + } else if (gfs2_is_jdata(ip)) { mpage_readahead(rac, gfs2_block_map); - else - iomap_readahead(rac, &gfs2_iomap_ops); + } else { + struct iomap_read_folio_ctx ctx = { + .ops = &iomap_read_bios_ops, + .rac = rac, + }; + + iomap_readahead(&gfs2_iomap_ops, &ctx); + } } /** diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index d38459740180..6fafe3b30563 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -363,18 +363,14 @@ static void iomap_read_end_io(struct bio *bio) bio_put(bio); } -struct iomap_read_folio_ctx { - struct folio *cur_folio; - void *private; - struct readahead_control *rac; -}; - -static void iomap_submit_read_bio(struct iomap_read_folio_ctx *ctx) +static int iomap_submit_read_bio(struct iomap_read_folio_ctx *ctx) { struct bio *bio = ctx->private; if (bio) submit_bio(bio); + + return 0; } /** @@ -383,7 +379,7 @@ static void iomap_submit_read_bio(struct iomap_read_folio_ctx *ctx) * This should only be used for read/readahead, not for buffered writes. * Buffered writes must read in the folio synchronously. */ -static void iomap_read_folio_range_bio_async(const struct iomap_iter *iter, +static int iomap_read_folio_range_bio_async(const struct iomap_iter *iter, struct iomap_read_folio_ctx *ctx, loff_t pos, size_t plen) { struct folio *folio = ctx->cur_folio; @@ -422,8 +418,15 @@ static void iomap_read_folio_range_bio_async(const struct iomap_iter *iter, bio_add_folio_nofail(bio, folio, plen, poff); ctx->private = bio; } + return 0; } +const struct iomap_read_ops iomap_read_bios_ops = { + .read_folio_range = iomap_read_folio_range_bio_async, + .read_submit = iomap_submit_read_bio, +}; +EXPORT_SYMBOL_GPL(iomap_read_bios_ops); + static int iomap_read_folio_iter(struct iomap_iter *iter, struct iomap_read_folio_ctx *ctx, bool *cur_folio_owned) { @@ -459,7 +462,10 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, iomap_set_range_uptodate(folio, poff, plen); } else { *cur_folio_owned = true; - iomap_read_folio_range_bio_async(iter, ctx, pos, plen); + ret = ctx->ops->read_folio_range(iter, ctx, pos, + plen); + if (ret) + return ret; } length -= count; @@ -471,35 +477,35 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, return 0; } -int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops) +int iomap_read_folio(const struct iomap_ops *ops, + struct iomap_read_folio_ctx *ctx) { + struct folio *folio = ctx->cur_folio; struct iomap_iter iter = { .inode = folio->mapping->host, .pos = folio_pos(folio), .len = folio_size(folio), }; - struct iomap_read_folio_ctx ctx = { - .cur_folio = folio, - }; /* * If an external IO helper takes ownership of the folio, * it is responsible for unlocking it when the read completes. */ bool cur_folio_owned = false; - int ret; + int ret, submit_ret = 0; trace_iomap_readpage(iter.inode, 1); while ((ret = iomap_iter(&iter, ops)) > 0) - iter.status = iomap_read_folio_iter(&iter, &ctx, + iter.status = iomap_read_folio_iter(&iter, ctx, &cur_folio_owned); - iomap_submit_read_bio(&ctx); + if (ctx->ops->read_submit) + submit_ret = ctx->ops->read_submit(ctx); if (!cur_folio_owned) folio_unlock(folio); - return ret; + return ret ? ret : submit_ret; } EXPORT_SYMBOL_GPL(iomap_read_folio); @@ -530,8 +536,8 @@ static int iomap_readahead_iter(struct iomap_iter *iter, /** * iomap_readahead - Attempt to read pages from a file. - * @rac: Describes the pages to be read. * @ops: The operations vector for the filesystem. + * @ctx: The ctx used for issuing readahead. * * This function is for filesystems to call to implement their readahead * address_space operation. @@ -543,16 +549,15 @@ static int iomap_readahead_iter(struct iomap_iter *iter, * function is called with memalloc_nofs set, so allocations will not cause * the filesystem to be reentered. */ -void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) +void iomap_readahead(const struct iomap_ops *ops, + struct iomap_read_folio_ctx *ctx) { + struct readahead_control *rac = ctx->rac; struct iomap_iter iter = { .inode = rac->mapping->host, .pos = readahead_pos(rac), .len = readahead_length(rac), }; - struct iomap_read_folio_ctx ctx = { - .rac = rac, - }; /* * If an external IO helper takes ownership of the folio, * it is responsible for unlocking it when the read completes. @@ -562,13 +567,14 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) trace_iomap_readahead(rac->mapping->host, readahead_count(rac)); while (iomap_iter(&iter, ops) > 0) - iter.status = iomap_readahead_iter(&iter, &ctx, + iter.status = iomap_readahead_iter(&iter, ctx, &cur_folio_owned); - iomap_submit_read_bio(&ctx); + if (ctx->ops->read_submit) + ctx->ops->read_submit(ctx); - if (ctx.cur_folio && !cur_folio_owned) - folio_unlock(ctx.cur_folio); + if (ctx->cur_folio && !cur_folio_owned) + folio_unlock(ctx->cur_folio); } EXPORT_SYMBOL_GPL(iomap_readahead); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 1ee4f835ac3c..124f30e567f4 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -742,14 +742,24 @@ xfs_vm_read_folio( struct file *unused, struct folio *folio) { - return iomap_read_folio(folio, &xfs_read_iomap_ops); + struct iomap_read_folio_ctx ctx = { + .ops = &iomap_read_bios_ops, + .cur_folio = folio, + }; + + return iomap_read_folio(&xfs_read_iomap_ops, &ctx); } STATIC void xfs_vm_readahead( struct readahead_control *rac) { - iomap_readahead(rac, &xfs_read_iomap_ops); + struct iomap_read_folio_ctx ctx = { + .ops = &iomap_read_bios_ops, + .rac = rac, + }; + + iomap_readahead(&xfs_read_iomap_ops, &ctx); } static int diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c index fd3a5922f6c3..254562842347 100644 --- a/fs/zonefs/file.c +++ b/fs/zonefs/file.c @@ -112,12 +112,22 @@ static const struct iomap_ops zonefs_write_iomap_ops = { static int zonefs_read_folio(struct file *unused, struct folio *folio) { - return iomap_read_folio(folio, &zonefs_read_iomap_ops); + struct iomap_read_folio_ctx ctx = { + .ops = &iomap_read_bios_ops, + .cur_folio = folio, + }; + + return iomap_read_folio(&zonefs_read_iomap_ops, &ctx); } static void zonefs_readahead(struct readahead_control *rac) { - iomap_readahead(rac, &zonefs_read_iomap_ops); + struct iomap_read_folio_ctx ctx = { + .ops = &iomap_read_bios_ops, + .rac = rac, + }; + + iomap_readahead(&zonefs_read_iomap_ops, &ctx); } /* diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 0938c4a57f4c..0c6424f70237 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -16,6 +16,7 @@ struct inode; struct iomap_iter; struct iomap_dio; struct iomap_writepage_ctx; +struct iomap_read_folio_ctx; struct iov_iter; struct kiocb; struct page; @@ -339,8 +340,10 @@ static inline bool iomap_want_unshare_iter(const struct iomap_iter *iter) ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops, const struct iomap_write_ops *write_ops, void *private); -int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); -void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); +int iomap_read_folio(const struct iomap_ops *ops, + struct iomap_read_folio_ctx *ctx); +void iomap_readahead(const struct iomap_ops *ops, + struct iomap_read_folio_ctx *ctx); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len); bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); @@ -478,6 +481,41 @@ void iomap_finish_folio_write(struct inode *inode, struct folio *folio, int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio); int iomap_writepages(struct iomap_writepage_ctx *wpc); +struct iomap_read_folio_ctx { + const struct iomap_read_ops *ops; + struct folio *cur_folio; + struct readahead_control *rac; + void *private; +}; + +struct iomap_read_ops { + /* + * Read in a folio range. + * + * The read can be done synchronously or asynchronously. The caller is + * responsible for calling iomap_start_folio_read() and + * iomap_finish_folio_read() before and after reading in the folio + * range. This should be done even if an error is encountered during the + * read. + * + * Returns 0 on success or a negative error on failure. + */ + int (*read_folio_range)(const struct iomap_iter *iter, + struct iomap_read_folio_ctx *ctx, loff_t pos, + size_t len); + + /* + * Submit any pending read requests. + * + * This is optional. + * + * Returns 0 on success or a negative error on failure. + */ + int (*read_submit)(struct iomap_read_folio_ctx *ctx); +}; + +extern const struct iomap_read_ops iomap_read_bios_ops; + /* * Flags for direct I/O ->end_io: */ -- 2.47.3 Non-block-based filesystems will be using iomap read/readahead. If they handle reading in ranges asynchronously and fulfill those read requests on an ongoing basis (instead of all together at the end), then there is the possibility that the read on the folio may be prematurely ended if earlier async requests complete before the later ones have been issued. For example if there is a large folio and a readahead request for 16 pages in that folio, if doing readahead on those 16 pages is split into 4 async requests and the first request is sent off and then completed before we have sent off the second request, then when the first request calls iomap_finish_folio_read(), ifs->read_bytes_pending would be 0, which would end the read and unlock the folio prematurely. To mitigate this, a "bias" is added to ifs->read_bytes_pending before the first range is forwarded to the caller and removed after the last range has been forwarded. iomap writeback does this with their async requests as well to prevent prematurely ending writeback. Signed-off-by: Joanne Koong --- fs/iomap/buffered-io.c | 43 ++++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 6fafe3b30563..f673e03f4ffb 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -329,8 +329,8 @@ void iomap_start_folio_read(struct folio *folio, size_t len) } EXPORT_SYMBOL_GPL(iomap_start_folio_read); -void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, - int error) +static void __iomap_finish_folio_read(struct folio *folio, size_t off, + size_t len, int error, bool update_bitmap) { struct iomap_folio_state *ifs = folio->private; bool uptodate = !error; @@ -340,7 +340,7 @@ void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, unsigned long flags; spin_lock_irqsave(&ifs->state_lock, flags); - if (!error) + if (!error && update_bitmap) uptodate = ifs_set_range_uptodate(folio, ifs, off, len); ifs->read_bytes_pending -= len; finished = !ifs->read_bytes_pending; @@ -350,6 +350,12 @@ void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, if (finished) folio_end_read(folio, uptodate); } + +void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, + int error) +{ + return __iomap_finish_folio_read(folio, off, len, error, true); +} EXPORT_SYMBOL_GPL(iomap_finish_folio_read); #ifdef CONFIG_BLOCK @@ -434,9 +440,10 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, loff_t pos = iter->pos; loff_t length = iomap_length(iter); struct folio *folio = ctx->cur_folio; + struct iomap_folio_state *ifs; size_t poff, plen; loff_t count; - int ret; + int ret = 0; if (iomap->type == IOMAP_INLINE) { ret = iomap_read_inline_data(iter, folio); @@ -446,7 +453,14 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, } /* zero post-eof blocks as the page may be mapped */ - ifs_alloc(iter->inode, folio, iter->flags); + ifs = ifs_alloc(iter->inode, folio, iter->flags); + + /* + * Add a bias to ifs->read_bytes_pending so that a read is ended only + * after all the ranges have been read in. + */ + if (ifs) + iomap_start_folio_read(folio, 1); length = min_t(loff_t, length, folio_size(folio) - offset_in_folio(folio, pos)); @@ -454,8 +468,10 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen); count = pos - iter->pos + plen; - if (plen == 0) - return iomap_iter_advance(iter, &count); + if (plen == 0) { + ret = iomap_iter_advance(iter, &count); + break; + } if (iomap_block_needs_zeroing(iter, pos)) { folio_zero_range(folio, poff, plen); @@ -465,16 +481,23 @@ static int iomap_read_folio_iter(struct iomap_iter *iter, ret = ctx->ops->read_folio_range(iter, ctx, pos, plen); if (ret) - return ret; + break; } length -= count; ret = iomap_iter_advance(iter, &count); if (ret) - return ret; + break; pos = iter->pos; } - return 0; + + if (ifs) { + __iomap_finish_folio_read(folio, 0, 1, ret, false); + /* __iomap_finish_folio_read takes care of any unlocking */ + *cur_folio_owned = true; + } + + return ret; } int iomap_read_folio(const struct iomap_ops *ops, -- 2.47.3 There is no longer a dependency on CONFIG_BLOCK in the iomap read and readahead logic. Move this logic out of the CONFIG_BLOCK guard. This allows non-block-based filesystems to use iomap for reads/readahead. Signed-off-by: Joanne Koong --- fs/iomap/buffered-io.c | 151 +++++++++++++++++++++-------------------- 1 file changed, 76 insertions(+), 75 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index f673e03f4ffb..c424e8c157dd 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -358,81 +358,6 @@ void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, } EXPORT_SYMBOL_GPL(iomap_finish_folio_read); -#ifdef CONFIG_BLOCK -static void iomap_read_end_io(struct bio *bio) -{ - int error = blk_status_to_errno(bio->bi_status); - struct folio_iter fi; - - bio_for_each_folio_all(fi, bio) - iomap_finish_folio_read(fi.folio, fi.offset, fi.length, error); - bio_put(bio); -} - -static int iomap_submit_read_bio(struct iomap_read_folio_ctx *ctx) -{ - struct bio *bio = ctx->private; - - if (bio) - submit_bio(bio); - - return 0; -} - -/** - * Read in a folio range asynchronously through bios. - * - * This should only be used for read/readahead, not for buffered writes. - * Buffered writes must read in the folio synchronously. - */ -static int iomap_read_folio_range_bio_async(const struct iomap_iter *iter, - struct iomap_read_folio_ctx *ctx, loff_t pos, size_t plen) -{ - struct folio *folio = ctx->cur_folio; - const struct iomap *iomap = &iter->iomap; - size_t poff = offset_in_folio(folio, pos); - loff_t length = iomap_length(iter); - sector_t sector; - struct bio *bio = ctx->private; - - iomap_start_folio_read(folio, plen); - - sector = iomap_sector(iomap, pos); - if (!bio || bio_end_sector(bio) != sector || - !bio_add_folio(bio, folio, plen, poff)) { - gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL); - gfp_t orig_gfp = gfp; - unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE); - - iomap_submit_read_bio(ctx); - - if (ctx->rac) /* same as readahead_gfp_mask */ - gfp |= __GFP_NORETRY | __GFP_NOWARN; - bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs), - REQ_OP_READ, gfp); - /* - * If the bio_alloc fails, try it again for a single page to - * avoid having to deal with partial page reads. This emulates - * what do_mpage_read_folio does. - */ - if (!bio) - bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ, orig_gfp); - if (ctx->rac) - bio->bi_opf |= REQ_RAHEAD; - bio->bi_iter.bi_sector = sector; - bio->bi_end_io = iomap_read_end_io; - bio_add_folio_nofail(bio, folio, plen, poff); - ctx->private = bio; - } - return 0; -} - -const struct iomap_read_ops iomap_read_bios_ops = { - .read_folio_range = iomap_read_folio_range_bio_async, - .read_submit = iomap_submit_read_bio, -}; -EXPORT_SYMBOL_GPL(iomap_read_bios_ops); - static int iomap_read_folio_iter(struct iomap_iter *iter, struct iomap_read_folio_ctx *ctx, bool *cur_folio_owned) { @@ -601,6 +526,82 @@ void iomap_readahead(const struct iomap_ops *ops, } EXPORT_SYMBOL_GPL(iomap_readahead); +#ifdef CONFIG_BLOCK +static void iomap_read_end_io(struct bio *bio) +{ + int error = blk_status_to_errno(bio->bi_status); + struct folio_iter fi; + + bio_for_each_folio_all(fi, bio) + iomap_finish_folio_read(fi.folio, fi.offset, fi.length, error); + bio_put(bio); +} + +static int iomap_submit_read_bio(struct iomap_read_folio_ctx *ctx) +{ + struct bio *bio = ctx->private; + + if (bio) + submit_bio(bio); + + return 0; +} + +/** + * Read in a folio range asynchronously through bios. + * + * This should only be used for read/readahead, not for buffered writes. + * Buffered writes must read in the folio synchronously. + */ +static int iomap_read_folio_range_bio_async(const struct iomap_iter *iter, + struct iomap_read_folio_ctx *ctx, loff_t pos, size_t plen) +{ + struct folio *folio = ctx->cur_folio; + const struct iomap *iomap = &iter->iomap; + size_t poff = offset_in_folio(folio, pos); + loff_t length = iomap_length(iter); + sector_t sector; + struct bio *bio = ctx->private; + + iomap_start_folio_read(folio, plen); + + sector = iomap_sector(iomap, pos); + if (!bio || bio_end_sector(bio) != sector || + !bio_add_folio(bio, folio, plen, poff)) { + gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL); + gfp_t orig_gfp = gfp; + unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE); + + if (bio) + submit_bio(bio); + + if (ctx->rac) /* same as readahead_gfp_mask */ + gfp |= __GFP_NORETRY | __GFP_NOWARN; + bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs), + REQ_OP_READ, gfp); + /* + * If the bio_alloc fails, try it again for a single page to + * avoid having to deal with partial page reads. This emulates + * what do_mpage_read_folio does. + */ + if (!bio) + bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ, orig_gfp); + if (ctx->rac) + bio->bi_opf |= REQ_RAHEAD; + bio->bi_iter.bi_sector = sector; + bio->bi_end_io = iomap_read_end_io; + bio_add_folio_nofail(bio, folio, plen, poff); + ctx->private = bio; + } + return 0; +} + +const struct iomap_read_ops iomap_read_bios_ops = { + .read_folio_range = iomap_read_folio_range_bio_async, + .read_submit = iomap_submit_read_bio, +}; +EXPORT_SYMBOL_GPL(iomap_read_bios_ops); + static int iomap_read_folio_range(const struct iomap_iter *iter, struct folio *folio, loff_t pos, size_t len) { -- 2.47.3 Read folio data into the page cache using iomap. This gives us granular uptodate tracking for large folios, which optimizes how much data needs to be read in. If some portions of the folio are already uptodate (eg through a prior write), we only need to read in the non-uptodate portions. Signed-off-by: Joanne Koong Reviewed-by: "Darrick J. Wong" --- fs/fuse/file.c | 79 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 23 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4adcf09d4b01..5b75a461f8e1 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -828,22 +828,69 @@ static int fuse_do_readfolio(struct file *file, struct folio *folio, return 0; } +static int fuse_iomap_begin(struct inode *inode, loff_t offset, loff_t length, + unsigned int flags, struct iomap *iomap, + struct iomap *srcmap) +{ + iomap->type = IOMAP_MAPPED; + iomap->length = length; + iomap->offset = offset; + return 0; +} + +static const struct iomap_ops fuse_iomap_ops = { + .iomap_begin = fuse_iomap_begin, +}; + +struct fuse_fill_read_data { + struct file *file; +}; + +static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter, + struct iomap_read_folio_ctx *ctx, + loff_t pos, size_t len) +{ + struct fuse_fill_read_data *data = ctx->private; + struct folio *folio = ctx->cur_folio; + size_t off = offset_in_folio(folio, pos); + struct file *file = data->file; + int ret; + + /* + * for non-readahead read requests, do reads synchronously since + * it's not guaranteed that the server can handle out-of-order reads + */ + iomap_start_folio_read(folio, len); + ret = fuse_do_readfolio(file, folio, off, len); + iomap_finish_folio_read(folio, off, len, ret); + return ret; +} + +static const struct iomap_read_ops fuse_iomap_read_ops = { + .read_folio_range = fuse_iomap_read_folio_range_async, +}; + static int fuse_read_folio(struct file *file, struct folio *folio) { struct inode *inode = folio->mapping->host; - int err; + struct fuse_fill_read_data data = { + .file = file, + }; + struct iomap_read_folio_ctx ctx = { + .cur_folio = folio, + .ops = &fuse_iomap_read_ops, + .private = &data, - err = -EIO; - if (fuse_is_bad(inode)) - goto out; + }; + int err; - err = fuse_do_readfolio(file, folio, 0, folio_size(folio)); - if (!err) - folio_mark_uptodate(folio); + if (fuse_is_bad(inode)) { + folio_unlock(folio); + return -EIO; + } + err = iomap_read_folio(&fuse_iomap_ops, &ctx); fuse_invalidate_atime(inode); - out: - folio_unlock(folio); return err; } @@ -1394,20 +1441,6 @@ static const struct iomap_write_ops fuse_iomap_write_ops = { .read_folio_range = fuse_iomap_read_folio_range, }; -static int fuse_iomap_begin(struct inode *inode, loff_t offset, loff_t length, - unsigned int flags, struct iomap *iomap, - struct iomap *srcmap) -{ - iomap->type = IOMAP_MAPPED; - iomap->length = length; - iomap->offset = offset; - return 0; -} - -static const struct iomap_ops fuse_iomap_ops = { - .iomap_begin = fuse_iomap_begin, -}; - static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; -- 2.47.3 Do readahead in fuse using iomap. This gives us granular uptodate tracking for large folios, which optimizes how much data needs to be read in. If some portions of the folio are already uptodate (eg through a prior write), we only need to read in the non-uptodate portions. Signed-off-by: Joanne Koong --- fs/fuse/file.c | 224 ++++++++++++++++++++++++++++--------------------- 1 file changed, 128 insertions(+), 96 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 5b75a461f8e1..3f57b5c6e037 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -844,8 +844,68 @@ static const struct iomap_ops fuse_iomap_ops = { struct fuse_fill_read_data { struct file *file; + + /* + * Fields below are used if sending the read request + * asynchronously. + */ + struct fuse_conn *fc; + struct fuse_io_args *ia; + unsigned int nr_bytes; }; +/* forward declarations */ +static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos, + unsigned len, struct fuse_args_pages *ap, + unsigned cur_bytes, bool write); +static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file, + unsigned int count, bool async); + +static int fuse_handle_readahead(struct folio *folio, + struct readahead_control *rac, + struct fuse_fill_read_data *data, loff_t pos, + size_t len) +{ + struct fuse_io_args *ia = data->ia; + size_t off = offset_in_folio(folio, pos); + struct fuse_conn *fc = data->fc; + struct fuse_args_pages *ap; + unsigned int nr_pages; + + if (ia && fuse_folios_need_send(fc, pos, len, &ia->ap, data->nr_bytes, + false)) { + fuse_send_readpages(ia, data->file, data->nr_bytes, + fc->async_read); + data->nr_bytes = 0; + data->ia = NULL; + ia = NULL; + } + if (!ia) { + if (fc->num_background >= fc->congestion_threshold && + rac->ra->async_size >= readahead_count(rac)) + /* + * Congested and only async pages left, so skip the + * rest. + */ + return -EAGAIN; + + nr_pages = min(fc->max_pages, readahead_count(rac)); + data->ia = fuse_io_alloc(NULL, nr_pages); + if (!data->ia) + return -ENOMEM; + ia = data->ia; + } + folio_get(folio); + ap = &ia->ap; + ap->folios[ap->num_folios] = folio; + ap->descs[ap->num_folios].offset = off; + ap->descs[ap->num_folios].length = len; + data->nr_bytes += len; + ap->num_folios++; + + return 0; +} + static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter, struct iomap_read_folio_ctx *ctx, loff_t pos, size_t len) @@ -856,18 +916,41 @@ static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter, struct file *file = data->file; int ret; - /* - * for non-readahead read requests, do reads synchronously since - * it's not guaranteed that the server can handle out-of-order reads - */ iomap_start_folio_read(folio, len); - ret = fuse_do_readfolio(file, folio, off, len); - iomap_finish_folio_read(folio, off, len, ret); + if (ctx->rac) { + ret = fuse_handle_readahead(folio, ctx->rac, data, pos, len); + /* + * If fuse_handle_readahead was successful, fuse_readpages_end + * will do the iomap_finish_folio_read, else we need to call it + * here + */ + if (ret) + iomap_finish_folio_read(folio, off, len, ret); + } else { + /* + * for non-readahead read requests, do reads synchronously + * since it's not guaranteed that the server can handle + * out-of-order reads + */ + ret = fuse_do_readfolio(file, folio, off, len); + iomap_finish_folio_read(folio, off, len, ret); + } return ret; } +static int fuse_iomap_read_submit(struct iomap_read_folio_ctx *ctx) +{ + struct fuse_fill_read_data *data = ctx->private; + + if (data->ia) + fuse_send_readpages(data->ia, data->file, data->nr_bytes, + data->fc->async_read); + return 0; +} + static const struct iomap_read_ops fuse_iomap_read_ops = { .read_folio_range = fuse_iomap_read_folio_range_async, + .read_submit = fuse_iomap_read_submit, }; static int fuse_read_folio(struct file *file, struct folio *folio) @@ -930,7 +1013,8 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args, } for (i = 0; i < ap->num_folios; i++) { - folio_end_read(ap->folios[i], !err); + iomap_finish_folio_read(ap->folios[i], ap->descs[i].offset, + ap->descs[i].length, err); folio_put(ap->folios[i]); } if (ia->ff) @@ -940,7 +1024,7 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args, } static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file, - unsigned int count) + unsigned int count, bool async) { struct fuse_file *ff = file->private_data; struct fuse_mount *fm = ff->fm; @@ -962,7 +1046,7 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file, fuse_read_args_fill(ia, file, pos, count, FUSE_READ); ia->read.attr_ver = fuse_get_attr_version(fm->fc); - if (fm->fc->async_read) { + if (async) { ia->ff = fuse_file_get(ff); ap->args.end = fuse_readpages_end; err = fuse_simple_background(fm, &ap->args, GFP_KERNEL); @@ -979,81 +1063,20 @@ static void fuse_readahead(struct readahead_control *rac) { struct inode *inode = rac->mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); - unsigned int max_pages, nr_pages; - struct folio *folio = NULL; + struct fuse_fill_read_data data = { + .file = rac->file, + .fc = fc, + }; + struct iomap_read_folio_ctx ctx = { + .ops = &fuse_iomap_read_ops, + .rac = rac, + .private = &data + }; if (fuse_is_bad(inode)) return; - max_pages = min_t(unsigned int, fc->max_pages, - fc->max_read / PAGE_SIZE); - - /* - * This is only accurate the first time through, since readahead_folio() - * doesn't update readahead_count() from the previous folio until the - * next call. Grab nr_pages here so we know how many pages we're going - * to have to process. This means that we will exit here with - * readahead_count() == folio_nr_pages(last_folio), but we will have - * consumed all of the folios, and read_pages() will call - * readahead_folio() again which will clean up the rac. - */ - nr_pages = readahead_count(rac); - - while (nr_pages) { - struct fuse_io_args *ia; - struct fuse_args_pages *ap; - unsigned cur_pages = min(max_pages, nr_pages); - unsigned int pages = 0; - - if (fc->num_background >= fc->congestion_threshold && - rac->ra->async_size >= readahead_count(rac)) - /* - * Congested and only async pages left, so skip the - * rest. - */ - break; - - ia = fuse_io_alloc(NULL, cur_pages); - if (!ia) - break; - ap = &ia->ap; - - while (pages < cur_pages) { - unsigned int folio_pages; - - /* - * This returns a folio with a ref held on it. - * The ref needs to be held until the request is - * completed, since the splice case (see - * fuse_try_move_page()) drops the ref after it's - * replaced in the page cache. - */ - if (!folio) - folio = __readahead_folio(rac); - - folio_pages = folio_nr_pages(folio); - if (folio_pages > cur_pages - pages) { - /* - * Large folios belonging to fuse will never - * have more pages than max_pages. - */ - WARN_ON(!pages); - break; - } - - ap->folios[ap->num_folios] = folio; - ap->descs[ap->num_folios].length = folio_size(folio); - ap->num_folios++; - pages += folio_pages; - folio = NULL; - } - fuse_send_readpages(ia, rac->file, pages << PAGE_SHIFT); - nr_pages -= pages; - } - if (folio) { - folio_end_read(folio, false); - folio_put(folio); - } + iomap_readahead(&fuse_iomap_ops, &ctx); } static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to) @@ -2084,7 +2107,7 @@ struct fuse_fill_wb_data { struct fuse_file *ff; unsigned int max_folios; /* - * nr_bytes won't overflow since fuse_writepage_need_send() caps + * nr_bytes won't overflow since fuse_folios_need_send() caps * wb requests to never exceed fc->max_pages (which has an upper bound * of U16_MAX). */ @@ -2129,14 +2152,15 @@ static void fuse_writepages_send(struct inode *inode, spin_unlock(&fi->lock); } -static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos, - unsigned len, struct fuse_args_pages *ap, - struct fuse_fill_wb_data *data) +static bool fuse_folios_need_send(struct fuse_conn *fc, loff_t pos, + unsigned len, struct fuse_args_pages *ap, + unsigned cur_bytes, bool write) { struct folio *prev_folio; struct fuse_folio_desc prev_desc; - unsigned bytes = data->nr_bytes + len; + unsigned bytes = cur_bytes + len; loff_t prev_pos; + size_t max_bytes = write ? fc->max_write : fc->max_read; WARN_ON(!ap->num_folios); @@ -2144,8 +2168,7 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos, if ((bytes + PAGE_SIZE - 1) >> PAGE_SHIFT > fc->max_pages) return true; - /* Reached max write bytes */ - if (bytes > fc->max_write) + if (bytes > max_bytes) return true; /* Discontinuity */ @@ -2155,11 +2178,6 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, loff_t pos, if (prev_pos != pos) return true; - /* Need to grow the pages array? If so, did the expansion fail? */ - if (ap->num_folios == data->max_folios && - !fuse_pages_realloc(data, fc->max_pages)) - return true; - return false; } @@ -2183,10 +2201,24 @@ static ssize_t fuse_iomap_writeback_range(struct iomap_writepage_ctx *wpc, return -EIO; } - if (wpa && fuse_writepage_need_send(fc, pos, len, ap, data)) { - fuse_writepages_send(inode, data); - data->wpa = NULL; - data->nr_bytes = 0; + if (wpa) { + bool send = fuse_folios_need_send(fc, pos, len, ap, + data->nr_bytes, true); + + if (!send) { + /* + * Need to grow the pages array? If so, did the + * expansion fail? + */ + send = (ap->num_folios == data->max_folios) && + !fuse_pages_realloc(data, fc->max_pages); + } + + if (send) { + fuse_writepages_send(inode, data); + data->wpa = NULL; + data->nr_bytes = 0; + } } if (data->wpa == NULL) { -- 2.47.3 Now that fuse is integrated with iomap for read/readahead, we can remove the workaround that was added in commit bd24d2108e9c ("fuse: fix fuseblk i_blkbits for iomap partial writes"), which was previously needed to avoid a race condition where an iomap partial write may be overwritten by a read if blocksize < PAGE_SIZE. Now that fuse does iomap read/readahead, this is protected against since there is granular uptodate tracking of blocks, which means this workaround can be removed. Signed-off-by: Joanne Koong --- fs/fuse/dir.c | 2 +- fs/fuse/fuse_i.h | 8 -------- fs/fuse/inode.c | 13 +------------ 3 files changed, 2 insertions(+), 21 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 5c569c3cb53f..ebee7e0b1cd3 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1199,7 +1199,7 @@ static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode, if (attr->blksize != 0) blkbits = ilog2(attr->blksize); else - blkbits = fc->blkbits; + blkbits = inode->i_sb->s_blocksize_bits; stat->blksize = 1 << blkbits; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index cc428d04be3e..1647eb7ca6fa 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -975,14 +975,6 @@ struct fuse_conn { /* Request timeout (in jiffies). 0 = no timeout */ unsigned int req_timeout; } timeout; - - /* - * This is a workaround until fuse uses iomap for reads. - * For fuseblk servers, this represents the blocksize passed in at - * mount time and for regular fuse servers, this is equivalent to - * inode->i_blkbits. - */ - u8 blkbits; }; /* diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 7ddfd2b3cc9c..3bfd83469d9f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -292,7 +292,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, if (attr->blksize) fi->cached_i_blkbits = ilog2(attr->blksize); else - fi->cached_i_blkbits = fc->blkbits; + fi->cached_i_blkbits = inode->i_sb->s_blocksize_bits; /* * Don't set the sticky bit in i_mode, unless we want the VFS @@ -1810,21 +1810,10 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) err = -EINVAL; if (!sb_set_blocksize(sb, ctx->blksize)) goto err; - /* - * This is a workaround until fuse hooks into iomap for reads. - * Use PAGE_SIZE for the blocksize else if the writeback cache - * is enabled, buffered writes go through iomap and a read may - * overwrite partially written data if blocksize < PAGE_SIZE - */ - fc->blkbits = sb->s_blocksize_bits; - if (ctx->blksize != PAGE_SIZE && - !sb_set_blocksize(sb, PAGE_SIZE)) - goto err; #endif } else { sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; - fc->blkbits = sb->s_blocksize_bits; } sb->s_subtype = ctx->subtype; -- 2.47.3