Block device buffered reads and writes already pass through filemap_read() and iomap_file_buffered_write() respectively, both of which handle IOCB_DONTCACHE. Enable RWF_DONTCACHE for block device files by setting FOP_DONTCACHE in def_blk_fops. For CONFIG_BUFFER_HEAD paths, add block_write_begin_iocb() which threads the kiocb through so that buffer_head-based I/O can use DONTCACHE behavior. The existing block_write_begin() is preserved as a wrapper that passes a NULL iocb. This support is useful for databases that operate on raw block devices, among other userspace applications. Signed-off-by: Tal Zussman --- block/fops.c | 5 +++-- fs/buffer.c | 19 ++++++++++++++++--- include/linux/buffer_head.h | 3 +++ 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/block/fops.c b/block/fops.c index 4d32785b31d9..d8165f6ba71c 100644 --- a/block/fops.c +++ b/block/fops.c @@ -505,7 +505,8 @@ static int blkdev_write_begin(const struct kiocb *iocb, unsigned len, struct folio **foliop, void **fsdata) { - return block_write_begin(mapping, pos, len, foliop, blkdev_get_block); + return block_write_begin_iocb(iocb, mapping, pos, len, foliop, + blkdev_get_block); } static int blkdev_write_end(const struct kiocb *iocb, @@ -967,7 +968,7 @@ const struct file_operations def_blk_fops = { .splice_write = iter_file_splice_write, .fallocate = blkdev_fallocate, .uring_cmd = blkdev_uring_cmd, - .fop_flags = FOP_BUFFER_RASYNC, + .fop_flags = FOP_BUFFER_RASYNC | FOP_DONTCACHE, }; static __init int blkdev_init(void) diff --git a/fs/buffer.c b/fs/buffer.c index 838c0c571022..18f1d128bb19 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2241,14 +2241,19 @@ EXPORT_SYMBOL(block_commit_write); * * The filesystem needs to handle block truncation upon failure. */ -int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, +int block_write_begin_iocb(const struct kiocb *iocb, + struct address_space *mapping, loff_t pos, unsigned len, struct folio **foliop, get_block_t *get_block) { pgoff_t index = pos >> PAGE_SHIFT; + fgf_t fgp_flags = FGP_WRITEBEGIN; struct folio *folio; int status; - folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, + if (iocb && iocb->ki_flags & IOCB_DONTCACHE) + fgp_flags |= FGP_DONTCACHE; + + folio = __filemap_get_folio(mapping, index, fgp_flags, mapping_gfp_mask(mapping)); if (IS_ERR(folio)) return PTR_ERR(folio); @@ -2263,6 +2268,13 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, *foliop = folio; return status; } + +int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, + struct folio **foliop, get_block_t *get_block) +{ + return block_write_begin_iocb(NULL, mapping, pos, len, foliop, + get_block); +} EXPORT_SYMBOL(block_write_begin); int block_write_end(loff_t pos, unsigned len, unsigned copied, @@ -2591,7 +2603,8 @@ int cont_write_begin(const struct kiocb *iocb, struct address_space *mapping, (*bytes)++; } - return block_write_begin(mapping, pos, len, foliop, get_block); + return block_write_begin_iocb(iocb, mapping, pos, len, foliop, + get_block); } EXPORT_SYMBOL(cont_write_begin); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index b16b88bfbc3e..ddf88ce290f2 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -260,6 +260,9 @@ int block_read_full_folio(struct folio *, get_block_t *); bool block_is_partially_uptodate(struct folio *, size_t from, size_t count); int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, struct folio **foliop, get_block_t *get_block); +int block_write_begin_iocb(const struct kiocb *iocb, + struct address_space *mapping, loff_t pos, unsigned len, + struct folio **foliop, get_block_t *get_block); int __block_write_begin(struct folio *folio, loff_t pos, unsigned len, get_block_t *get_block); int block_write_end(loff_t pos, unsigned len, unsigned copied, struct folio *); -- 2.39.5