From: Oven Liyang If the current page fault is using the per-VMA lock, and we only released the lock to wait for I/O completion (e.g., using folio_lock()), then when the fault is retried after the I/O completes, it should still qualify for the per-VMA-lock path. Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Huacai Chen Cc: WANG Xuerui Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Alexandre Ghiti Cc: Alexander Gordeev Cc: Gerald Schaefer Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Sven Schnelle Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: x86@kernel.org Cc: H. Peter Anvin Cc: David Hildenbrand Cc: Lorenzo Stoakes Cc: Liam R. Howlett Cc: Vlastimil Babka Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Michal Hocko Cc: Matthew Wilcox Cc: Pedro Falcato Cc: Jarkko Sakkinen Cc: Oscar Salvador Cc: Kuninori Morimoto Cc: Mark Rutland Cc: Ada Couprie Diaz Cc: Robin Murphy Cc: Kristina Martšenko Cc: Kevin Brodsky Cc: Yeoreum Yun Cc: Wentao Guan Cc: Thorsten Blum Cc: Steven Rostedt Cc: Yunhui Cui Cc: Nam Cao Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: loongarch@lists.linux.dev Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-riscv@lists.infradead.org Cc: linux-s390@vger.kernel.org Cc: linux-mm@kvack.org Cc: linux-fsdevel@vger.kernel.org Cc: Chris Li Cc: Kairui Song Cc: Kemeng Shi Cc: Nhat Pham Cc: Baoquan He Signed-off-by: Oven Liyang Signed-off-by: Barry Song --- arch/arm/mm/fault.c | 5 +++++ arch/arm64/mm/fault.c | 5 +++++ arch/loongarch/mm/fault.c | 4 ++++ arch/powerpc/mm/fault.c | 5 ++++- arch/riscv/mm/fault.c | 4 ++++ arch/s390/mm/fault.c | 4 ++++ arch/x86/mm/fault.c | 4 ++++ include/linux/mm_types.h | 9 +++++---- mm/filemap.c | 5 ++++- 9 files changed, 39 insertions(+), 6 deletions(-) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 2bc828a1940c..49fc0340821c 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -313,6 +313,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; +retry_vma: vma = lock_vma_under_rcu(mm, addr); if (!vma) goto lock_mmap; @@ -342,6 +343,10 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) goto no_context; return 0; } + + /* If the first try is only about waiting for the I/O to complete */ + if (fault & VM_FAULT_RETRY_VMA) + goto retry_vma; lock_mmap: retry: diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 125dfa6c613b..842f50b99d3e 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -622,6 +622,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, if (!(mm_flags & FAULT_FLAG_USER)) goto lock_mmap; +retry_vma: vma = lock_vma_under_rcu(mm, addr); if (!vma) goto lock_mmap; @@ -668,6 +669,10 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, goto no_context; return 0; } + + /* If the first try is only about waiting for the I/O to complete */ + if (fault & VM_FAULT_RETRY_VMA) + goto retry_vma; lock_mmap: retry: diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c index 2c93d33356e5..738f495560c0 100644 --- a/arch/loongarch/mm/fault.c +++ b/arch/loongarch/mm/fault.c @@ -219,6 +219,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; +retry_vma: vma = lock_vma_under_rcu(mm, address); if (!vma) goto lock_mmap; @@ -265,6 +266,9 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, no_context(regs, write, address); return; } + /* If the first try is only about waiting for the I/O to complete */ + if (fault & VM_FAULT_RETRY_VMA) + goto retry_vma; lock_mmap: retry: diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 806c74e0d5ab..cb7ffc20c760 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -487,6 +487,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; +retry_vma: vma = lock_vma_under_rcu(mm, address); if (!vma) goto lock_mmap; @@ -516,7 +517,9 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, if (fault_signal_pending(fault, regs)) return user_mode(regs) ? 0 : SIGBUS; - + /* If the first try is only about waiting for the I/O to complete */ + if (fault & VM_FAULT_RETRY_VMA) + goto retry_vma; lock_mmap: /* When running in the kernel we expect faults to occur only to diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 04ed6f8acae4..b94cf57c2b9a 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -347,6 +347,7 @@ void handle_page_fault(struct pt_regs *regs) if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; +retry_vma: vma = lock_vma_under_rcu(mm, addr); if (!vma) goto lock_mmap; @@ -376,6 +377,9 @@ void handle_page_fault(struct pt_regs *regs) no_context(regs, addr); return; } + /* If the first try is only about waiting for the I/O to complete */ + if (fault & VM_FAULT_RETRY_VMA) + goto retry_vma; lock_mmap: retry: diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index e1ad05bfd28a..8d91c6495e13 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -286,6 +286,7 @@ static void do_exception(struct pt_regs *regs, int access) flags |= FAULT_FLAG_WRITE; if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; +retry_vma: vma = lock_vma_under_rcu(mm, address); if (!vma) goto lock_mmap; @@ -310,6 +311,9 @@ static void do_exception(struct pt_regs *regs, int access) handle_fault_error_nolock(regs, 0); return; } + /* If the first try is only about waiting for the I/O to complete */ + if (fault & VM_FAULT_RETRY_VMA) + goto retry_vma; lock_mmap: retry: vma = lock_mm_and_find_vma(mm, address, regs); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 998bd807fc7b..6023d0083903 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1324,6 +1324,7 @@ void do_user_addr_fault(struct pt_regs *regs, if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; +retry_vma: vma = lock_vma_under_rcu(mm, address); if (!vma) goto lock_mmap; @@ -1353,6 +1354,9 @@ void do_user_addr_fault(struct pt_regs *regs, ARCH_DEFAULT_PKEY); return; } + /* If the first try is only about waiting for the I/O to complete */ + if (fault & VM_FAULT_RETRY_VMA) + goto retry_vma; lock_mmap: retry: diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b71625378ce3..12b2d65ef1b9 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1670,10 +1670,11 @@ enum vm_fault_reason { VM_FAULT_NOPAGE = (__force vm_fault_t)0x000100, VM_FAULT_LOCKED = (__force vm_fault_t)0x000200, VM_FAULT_RETRY = (__force vm_fault_t)0x000400, - VM_FAULT_FALLBACK = (__force vm_fault_t)0x000800, - VM_FAULT_DONE_COW = (__force vm_fault_t)0x001000, - VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x002000, - VM_FAULT_COMPLETED = (__force vm_fault_t)0x004000, + VM_FAULT_RETRY_VMA = (__force vm_fault_t)0x000800, + VM_FAULT_FALLBACK = (__force vm_fault_t)0x001000, + VM_FAULT_DONE_COW = (__force vm_fault_t)0x002000, + VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x004000, + VM_FAULT_COMPLETED = (__force vm_fault_t)0x008000, VM_FAULT_HINDEX_MASK = (__force vm_fault_t)0x0f0000, }; diff --git a/mm/filemap.c b/mm/filemap.c index 7d15a9c216ef..57dfd2211109 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3464,6 +3464,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) struct folio *folio; vm_fault_t ret = 0; bool mapping_locked = false; + bool retry_by_vma_lock = false; max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); if (unlikely(index >= max_idx)) @@ -3560,6 +3561,8 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) */ if (fpin) { folio_unlock(folio); + if (vmf->flags & FAULT_FLAG_VMA_LOCK) + retry_by_vma_lock = true; goto out_retry; } if (mapping_locked) @@ -3610,7 +3613,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) filemap_invalidate_unlock_shared(mapping); if (fpin) fput(fpin); - return ret | VM_FAULT_RETRY; + return ret | VM_FAULT_RETRY | (retry_by_vma_lock ? VM_FAULT_RETRY_VMA : 0); } EXPORT_SYMBOL(filemap_fault); -- 2.39.3 (Apple Git-146)