While we handle pte_lockptr() == pmd_lockptr() correctly in zap_pte_table_if_empty(), we don't handle it in zap_empty_pte_table(), making the spin_trylock() always fail and forcing us onto the slow path. So let's handle the scenario where pte_lockptr() == pmd_lockptr() better, which can only happen if CONFIG_SPLIT_PTE_PTLOCKS is not set. This is only relevant once we unlock CONFIG_PT_RECLAIM on architectures that are not x86-64. Signed-off-by: David Hildenbrand (Red Hat) --- mm/memory.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index c3055b2577c27..3852075ea62d4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1833,16 +1833,18 @@ static bool pte_table_reclaim_possible(unsigned long start, unsigned long end, return details && details->reclaim_pt && (end - start >= PMD_SIZE); } -static bool zap_empty_pte_table(struct mm_struct *mm, pmd_t *pmd, pmd_t *pmdval) +static bool zap_empty_pte_table(struct mm_struct *mm, pmd_t *pmd, + spinlock_t *ptl, pmd_t *pmdval) { spinlock_t *pml = pmd_lockptr(mm, pmd); - if (!spin_trylock(pml)) + if (ptl != pml && !spin_trylock(pml)) return false; *pmdval = pmdp_get(pmd); pmd_clear(pmd); - spin_unlock(pml); + if (ptl != pml) + spin_unlock(pml); return true; } @@ -1934,7 +1936,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, * from being repopulated by another thread. */ if (can_reclaim_pt && direct_reclaim && addr == end) - direct_reclaim = zap_empty_pte_table(mm, pmd, &pmdval); + direct_reclaim = zap_empty_pte_table(mm, pmd, ptl, &pmdval); add_mm_rss_vec(mm, rss); lazy_mmu_mode_disable(); -- 2.52.0