Similar to commit 902893e39076 ("NFS: Enable use of the RWF_DONTCACHE flag on the NFS client"). The Ceph client now supports RWF_DONTCACHE for buffered file IO, but it needs preserve the IOCB_DONTCACHE state through netfs_write_begin(). Unlike NFS, Ceph does not need to defer drop-behind until after a separate COMMIT step. Ceph's buffered writeback completes when the OSD write completes, so the existing folio_end_writeback() handling is sufficient. This patch changes netfs because Ceph does not get its write_begin folio directly. ceph_write_begin() is a wrapper around netfs_write_begin(), and that helper was calling __filemap_get_folio(FGP_WRITEBEGIN, ...) without access to the kiocb. As a result, the per-IO IOCB_DONTCACHE flag was lost before folio allocation. Fix this by passing the kiocb into netfs_write_begin() and switching it to write_begin_get_folio(iocb, ...), so IOCB_DONTCACHE is translated into FGP_DONTCACHE when appropriate. Then set FOP_DONTCACHE on ceph_file_fops so the VFS will accept RWF_DONTCACHE for files on Ceph. Signed-off-by: Max Kellermann --- fs/ceph/addr.c | 3 ++- fs/ceph/file.c | 1 + fs/netfs/buffered_read.c | 6 +++--- include/linux/netfs.h | 3 ++- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 2090fc78529c..b4ea3ba8211e 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1865,7 +1865,8 @@ static int ceph_write_begin(const struct kiocb *iocb, struct ceph_inode_info *ci = ceph_inode(inode); int r; - r = netfs_write_begin(&ci->netfs, file, inode->i_mapping, pos, len, foliop, NULL); + r = netfs_write_begin(iocb, &ci->netfs, file, inode->i_mapping, pos, + len, foliop, NULL); if (r < 0) return r; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 5e7c73a29aa3..341589ef32cd 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -3191,4 +3191,5 @@ const struct file_operations ceph_file_fops = { .compat_ioctl = compat_ptr_ioctl, .fallocate = ceph_fallocate, .copy_file_range = ceph_copy_file_range, + .fop_flags = FOP_DONTCACHE, }; diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index a8c0d86118c5..6ec7c2459d8a 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -589,6 +589,7 @@ static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len, /** * netfs_write_begin - Helper to prepare for writing [DEPRECATED] + * @iocb: The kiocb describing the write request * @ctx: The netfs context * @file: The file to read from * @mapping: The mapping to read from @@ -620,7 +621,7 @@ static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len, * Note that this should be considered deprecated and netfs_perform_write() * used instead. */ -int netfs_write_begin(struct netfs_inode *ctx, +int netfs_write_begin(const struct kiocb *iocb, struct netfs_inode *ctx, struct file *file, struct address_space *mapping, loff_t pos, unsigned int len, struct folio **_folio, void **_fsdata) @@ -631,8 +632,7 @@ int netfs_write_begin(struct netfs_inode *ctx, int ret; retry: - folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, - mapping_gfp_mask(mapping)); + folio = write_begin_get_folio(iocb, mapping, index, len); if (IS_ERR(folio)) return PTR_ERR(folio); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index ba17ac5bf356..d58c614e32fd 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -414,7 +414,8 @@ int netfs_writeback_single(struct address_space *mapping, struct readahead_control; void netfs_readahead(struct readahead_control *); int netfs_read_folio(struct file *, struct folio *); -int netfs_write_begin(struct netfs_inode *, struct file *, +int netfs_write_begin(const struct kiocb *iocb, + struct netfs_inode *ctx, struct file *file, struct address_space *, loff_t pos, unsigned int len, struct folio **, void **fsdata); int netfs_writepages(struct address_space *mapping, -- 2.47.3