hugetlb_fault_mutex_hash() is used to serialize faults and page cache operations on the same hugetlb file offset. The helper currently expects its index argument in hugetlb page granularity, so callers have to open-code conversions from the PAGE_SIZE-based indices commonly used in the rest of MM helpers. Change hugetlb_fault_mutex_hash() to take a PAGE_SIZE-based index instead, and perform the hugetlb-granularity conversion inside the helper. Update all callers accordingly. This makes the helper interface consistent with filemap_get_folio(), and linear_page_index(), while preserving the same lock selection for a given hugetlb file offset. Signed-off-by: Jane Chu --- fs/hugetlbfs/inode.c | 19 ++++++++++--------- mm/hugetlb.c | 28 +++++++++++++++++++--------- mm/memfd.c | 11 ++++++----- mm/userfaultfd.c | 7 +++---- 4 files changed, 38 insertions(+), 27 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index cf79fb830377..e24e9bf54e14 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -575,7 +575,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, struct address_space *mapping = &inode->i_data; const pgoff_t end = lend >> PAGE_SHIFT; struct folio_batch fbatch; - pgoff_t next, index; + pgoff_t next, idx; int i, freed = 0; bool truncate_op = (lend == LLONG_MAX); @@ -586,15 +586,15 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, struct folio *folio = fbatch.folios[i]; u32 hash = 0; - index = folio->index >> huge_page_order(h); - hash = hugetlb_fault_mutex_hash(mapping, index); + hash = hugetlb_fault_mutex_hash(mapping, folio->index); mutex_lock(&hugetlb_fault_mutex_table[hash]); /* * Remove folio that was part of folio_batch. */ + idx = folio->index >> huge_page_order(h); remove_inode_single_folio(h, inode, mapping, folio, - index, truncate_op); + idx, truncate_op); freed++; mutex_unlock(&hugetlb_fault_mutex_table[hash]); @@ -734,7 +734,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, struct mm_struct *mm = current->mm; loff_t hpage_size = huge_page_size(h); unsigned long hpage_shift = huge_page_shift(h); - pgoff_t start, index, end; + pgoff_t start, end, idx, index; int error; u32 hash; @@ -774,7 +774,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, vm_flags_init(&pseudo_vma, VM_HUGETLB | VM_MAYSHARE | VM_SHARED); pseudo_vma.vm_file = file; - for (index = start; index < end; index++) { + for (idx = start; idx < end; idx++) { /* * This is supposed to be the vaddr where the page is being * faulted in, but we have no vaddr here. @@ -794,14 +794,15 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, } /* addr is the offset within the file (zero based) */ - addr = index * hpage_size; + addr = idx * hpage_size; /* mutex taken here, fault path and hole punch */ + index = idx << huge_page_order(h); hash = hugetlb_fault_mutex_hash(mapping, index); mutex_lock(&hugetlb_fault_mutex_table[hash]); /* See if already present in mapping to avoid alloc/free */ - folio = filemap_get_folio(mapping, index << huge_page_order(h)); + folio = filemap_get_folio(mapping, index); if (!IS_ERR(folio)) { folio_put(folio); mutex_unlock(&hugetlb_fault_mutex_table[hash]); @@ -824,7 +825,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, } folio_zero_user(folio, addr); __folio_mark_uptodate(folio); - error = hugetlb_add_to_page_cache(folio, mapping, index); + error = hugetlb_add_to_page_cache(folio, mapping, idx); if (unlikely(error)) { restore_reserve_on_error(h, &pseudo_vma, addr, folio); folio_put(folio); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 38b39eaf46cc..9d5ae1f87850 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5515,7 +5515,7 @@ static vm_fault_t hugetlb_wp(struct vm_fault *vmf) */ if (cow_from_owner) { struct address_space *mapping = vma->vm_file->f_mapping; - pgoff_t idx; + pgoff_t index; u32 hash; folio_put(old_folio); @@ -5528,8 +5528,9 @@ static vm_fault_t hugetlb_wp(struct vm_fault *vmf) * * Reacquire both after unmap operation. */ - idx = vma_hugecache_offset(h, vma, vmf->address); - hash = hugetlb_fault_mutex_hash(mapping, idx); + index = linear_page_index(vma, vmf->address); + hash = hugetlb_fault_mutex_hash(mapping, index); + hugetlb_vma_unlock_read(vma); mutex_unlock(&hugetlb_fault_mutex_table[hash]); @@ -5664,6 +5665,10 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_fault *vmf, unsigned long reason) { u32 hash; + pgoff_t index; + + index = linear_page_index((const struct vm_area_struct *)vmf, vmf->address); + hash = hugetlb_fault_mutex_hash(mapping, index); /* * vma_lock and hugetlb_fault_mutex must be dropped before handling @@ -5671,7 +5676,6 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_fault *vmf, * userfault, any vma operation should be careful from here. */ hugetlb_vma_unlock_read(vmf->vma); - hash = hugetlb_fault_mutex_hash(mapping, vmf->pgoff); mutex_unlock(&hugetlb_fault_mutex_table[hash]); return handle_userfault(vmf, reason); } @@ -5696,7 +5700,8 @@ static bool hugetlb_pte_stable(struct hstate *h, struct mm_struct *mm, unsigned static vm_fault_t hugetlb_no_page(struct address_space *mapping, struct vm_fault *vmf) { - u32 hash = hugetlb_fault_mutex_hash(mapping, vmf->pgoff); + u32 hash; + pgoff_t index; bool new_folio, new_anon_folio = false; struct vm_area_struct *vma = vmf->vma; struct mm_struct *mm = vma->vm_mm; @@ -5707,6 +5712,8 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping, unsigned long size; pte_t new_pte; + index = vmf->pgoff << huge_page_order(h); + hash = hugetlb_fault_mutex_hash(mapping, index); /* * Currently, we are forced to kill the process in the event the * original mapper has unmapped pages from the child due to a failed @@ -5920,13 +5927,14 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping, } #ifdef CONFIG_SMP -u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx) +/* 'index' is expected to be in PAGE_SIZE granularity */ +u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t index) { unsigned long key[2]; u32 hash; key[0] = (unsigned long) mapping; - key[1] = idx; + key[1] = index >> huge_page_order(hstate_inode(mapping->host)); hash = jhash2((u32 *)&key, sizeof(key)/(sizeof(u32)), 0); @@ -5937,7 +5945,7 @@ u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx) * For uniprocessor systems we always use a single mutex, so just * return 0 and avoid the hashing overhead. */ -u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx) +u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t index) { return 0; } @@ -5952,6 +5960,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, struct hstate *h = hstate_vma(vma); struct address_space *mapping; bool need_wait_lock = false; + pgoff_t index; struct vm_fault vmf = { .vma = vma, .address = address & huge_page_mask(h), @@ -5972,8 +5981,9 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, * get spurious allocation failures if two CPUs race to instantiate * the same page in the page cache. */ + index = linear_page_index(vma, vmf.address); mapping = vma->vm_file->f_mapping; - hash = hugetlb_fault_mutex_hash(mapping, vmf.pgoff); + hash = hugetlb_fault_mutex_hash(mapping, index); mutex_lock(&hugetlb_fault_mutex_table[hash]); /* diff --git a/mm/memfd.c b/mm/memfd.c index fb425f4e315f..911ff8220d05 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -64,7 +64,7 @@ static void memfd_tag_pins(struct xa_state *xas) * (memfd_pin_folios()) cannot find a folio in the page cache at a given * index in the mapping. */ -struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx) +struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t index) { #ifdef CONFIG_HUGETLB_PAGE struct folio *folio; @@ -79,12 +79,13 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx) */ struct inode *inode = file_inode(memfd); struct hstate *h = hstate_file(memfd); - int err = -ENOMEM; long nr_resv; + pgoff_t idx; + int err = -ENOMEM; gfp_mask = htlb_alloc_mask(h); gfp_mask &= ~(__GFP_HIGHMEM | __GFP_MOVABLE); - idx >>= huge_page_order(h); + idx = index >> huge_page_order(h); nr_resv = hugetlb_reserve_pages(inode, idx, idx + 1, NULL, EMPTY_VMA_FLAGS); if (nr_resv < 0) @@ -116,7 +117,7 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx) * races with concurrent allocations, as required by all other * callers of hugetlb_add_to_page_cache(). */ - hash = hugetlb_fault_mutex_hash(memfd->f_mapping, idx); + hash = hugetlb_fault_mutex_hash(memfd->f_mapping, index); mutex_lock(&hugetlb_fault_mutex_table[hash]); err = hugetlb_add_to_page_cache(folio, @@ -140,7 +141,7 @@ struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx) return ERR_PTR(err); } #endif - return shmem_read_folio(memfd->f_mapping, idx); + return shmem_read_folio(memfd->f_mapping, index); } /* diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index c053aa4389b6..9482b25d3d84 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -504,7 +504,7 @@ static __always_inline ssize_t mfill_atomic_hugetlb( long copied; struct folio *folio; unsigned long vma_hpagesize; - pgoff_t idx; + pgoff_t index; u32 hash; struct address_space *mapping; @@ -573,10 +573,9 @@ static __always_inline ssize_t mfill_atomic_hugetlb( * in the case of shared pmds. fault mutex prevents * races with other faulting threads. */ - idx = linear_page_index(dst_vma, dst_addr); - idx >>= huge_page_order(hstate_vma(dst_vma)); + index = linear_page_index(dst_vma, dst_addr); mapping = dst_vma->vm_file->f_mapping; - hash = hugetlb_fault_mutex_hash(mapping, idx); + hash = hugetlb_fault_mutex_hash(mapping, index); mutex_lock(&hugetlb_fault_mutex_table[hash]); hugetlb_vma_lock_read(dst_vma); -- 2.43.5