The deadlock occurs due to the following lock ordering: Task A (punch_hole): Task B (migration): -------------------- ------------------- 1. i_mmap_lock_write(mapping) 1. folio_lock(folio) 2. folio_lock(folio) 2. i_mmap_lock_read(mapping) (blocks waiting for B) (blocks waiting for A) Task A is blocked in the punch-hole path: hugetlbfs_fallocate hugetlbfs_punch_hole hugetlbfs_zero_partial_page filemap_lock_hugetlb_folio filemap_lock_folio __filemap_get_folio folio_lock Task B is blocked in the migration path: migrate_pages migrate_hugetlbs unmap_and_move_huge_page remove_migration_ptes __rmap_walk_file i_mmap_lock_read To break this circular dependency, use filemap_lock_folio_nowait() in the punch-hole path. If the folio is already locked, Task A drops the i_mmap_rwsem and retries. This allows Task B to finish its rmap walk and release the folio lock. Link: https://lore.kernel.org/all/68e9715a.050a0220.1186a4.000d.GAE@google.com Reported-by: syzbot+2d9c96466c978346b55f@syzkaller.appspotmail.com Signed-off-by: Jinchao Wang --- fs/hugetlbfs/inode.c | 34 +++++++++++++++++++++++----------- include/linux/hugetlb.h | 2 +- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 3b4c152c5c73..e903344aa0ec 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -653,17 +653,16 @@ static void hugetlb_vmtruncate(struct inode *inode, loff_t offset) remove_inode_hugepages(inode, offset, LLONG_MAX); } -static void hugetlbfs_zero_partial_page(struct hstate *h, - struct address_space *mapping, - loff_t start, - loff_t end) +static int hugetlbfs_zero_partial_page(struct hstate *h, + struct address_space *mapping, + loff_t start, loff_t end) { pgoff_t idx = start >> huge_page_shift(h); struct folio *folio; folio = filemap_lock_hugetlb_folio(h, mapping, idx); if (IS_ERR(folio)) - return; + return PTR_ERR(folio); start = start & ~huge_page_mask(h); end = end & ~huge_page_mask(h); @@ -674,6 +673,7 @@ static void hugetlbfs_zero_partial_page(struct hstate *h, folio_unlock(folio); folio_put(folio); + return 0; } static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) @@ -683,6 +683,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) struct hstate *h = hstate_inode(inode); loff_t hpage_size = huge_page_size(h); loff_t hole_start, hole_end; + int rc; /* * hole_start and hole_end indicate the full pages within the hole. @@ -698,12 +699,18 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) return -EPERM; } +repeat: i_mmap_lock_write(mapping); /* If range starts before first full page, zero partial page. */ - if (offset < hole_start) - hugetlbfs_zero_partial_page(h, mapping, - offset, min(offset + len, hole_start)); + if (offset < hole_start) { + rc = hugetlbfs_zero_partial_page(h, mapping, offset, + min(offset + len, hole_start)); + if (rc == -EAGAIN) { + i_mmap_unlock_write(mapping); + goto repeat; + } + } /* Unmap users of full pages in the hole. */ if (hole_end > hole_start) { @@ -714,9 +721,14 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) } /* If range extends beyond last full page, zero partial page. */ - if ((offset + len) > hole_end && (offset + len) > hole_start) - hugetlbfs_zero_partial_page(h, mapping, - hole_end, offset + len); + if ((offset + len) > hole_end && (offset + len) > hole_start) { + rc = hugetlbfs_zero_partial_page(h, mapping, hole_end, + offset + len); + if (rc == -EAGAIN) { + i_mmap_unlock_write(mapping); + goto repeat; + } + } i_mmap_unlock_write(mapping); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 019a1c5281e4..ad55b9dada0a 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -814,7 +814,7 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h) static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h, struct address_space *mapping, pgoff_t idx) { - return filemap_lock_folio(mapping, idx << huge_page_order(h)); + return filemap_lock_folio_nowait(mapping, idx << huge_page_order(h)); } #include -- 2.43.0