Introduce ext4_iomap_page_mkwrite() to implement the mmap iomap path for ext4. Most of this work is delegated to iomap_page_mkwrite(), which only needs to be called with ext4_iomap_buffer_write_ops and ext4_iomap_buffer_da_write_ops as arguments to allocate and map the blocks. However, the lock ordering of the folio lock and transaction start is the opposite of that in the buffer_head buffered write path, update the locking document accordingly. Signed-off-by: Zhang Yi --- fs/ext4/inode.c | 32 +++++++++++++++++++++++++++++++- fs/ext4/super.c | 8 ++++++-- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4a7d18511c3f..0d2852159fa3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4026,7 +4026,7 @@ static int ext4_iomap_buffered_do_write_begin(struct inode *inode, /* Inline data support is not yet available. */ if (WARN_ON_ONCE(ext4_has_inline_data(inode))) return -ERANGE; - if (WARN_ON_ONCE(!(flags & IOMAP_WRITE))) + if (WARN_ON_ONCE(!(flags & (IOMAP_WRITE | IOMAP_FAULT)))) return -EINVAL; if (delalloc) @@ -4086,6 +4086,14 @@ static int ext4_iomap_buffered_da_write_end(struct inode *inode, loff_t offset, if (iomap->type != IOMAP_DELALLOC || !(iomap->flags & IOMAP_F_NEW)) return 0; + /* + * iomap_page_mkwrite() will never fail in a way that requires delalloc + * extents that it allocated to be revoked. Hence never try to release + * them here. + */ + if (flags & IOMAP_FAULT) + return 0; + /* Nothing to do if we've written the entire delalloc extent */ start_byte = iomap_last_written_block(inode, offset, written); end_byte = round_up(offset + length, i_blocksize(inode)); @@ -7135,6 +7143,23 @@ static int ext4_block_page_mkwrite(struct inode *inode, struct folio *folio, return ret; } +static vm_fault_t ext4_iomap_page_mkwrite(struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vmf->vma->vm_file); + const struct iomap_ops *iomap_ops; + + /* + * ext4_nonda_switch() could writeback this folio, so have to + * call it before lock folio. + */ + if (test_opt(inode->i_sb, DELALLOC) && !ext4_nonda_switch(inode->i_sb)) + iomap_ops = &ext4_iomap_buffered_da_write_ops; + else + iomap_ops = &ext4_iomap_buffered_write_ops; + + return iomap_page_mkwrite(vmf, iomap_ops, NULL); +} + vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; @@ -7157,6 +7182,11 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf) filemap_invalidate_lock_shared(mapping); + if (ext4_inode_buffered_iomap(inode)) { + ret = ext4_iomap_page_mkwrite(vmf); + goto out; + } + err = ext4_convert_inline_data(inode); if (err) goto out_ret; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cffe63deba31..4bb77703ffe1 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -100,8 +100,12 @@ static const struct fs_parameter_spec ext4_param_specs[]; * Lock ordering * * page fault path: - * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start - * -> page lock -> i_data_sem (rw) + * - buffer_head path: + * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> + * transaction start -> folio lock -> i_data_sem (rw) + * - iomap path: + * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> + * folio lock -> transaction start -> i_data_sem (rw) * * buffered write path: * sb_start_write -> i_rwsem (w) -> mmap_lock -- 2.52.0