From: Dave Hansen This introduces a conditional asynchronous mechanism, enabled by CONFIG_ASYNC_PGTABLE_FREE. When enabled, this mechanism defers the freeing of pages that are used as page tables for kernel address mappings. These pages are now queued to a work struct instead of being freed immediately. This deferred freeing provides a safe context for a future patch to add an IOMMU-specific callback, which might be expensive on large-scale systems. This ensures the necessary IOMMU cache invalidation is performed before the page is finally returned to the page allocator outside of any critical, non-sleepable path. Signed-off-by: Dave Hansen Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian --- include/linux/mm.h | 16 +++++++++++++--- mm/pgtable-generic.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 668d519edc0f..2d7b4af40442 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2891,6 +2891,14 @@ static inline void __pagetable_free(struct ptdesc *pt) __free_pages(page, compound_order(page)); } +#ifdef CONFIG_ASYNC_PGTABLE_FREE +void pagetable_free_kernel(struct ptdesc *pt); +#else +static inline void pagetable_free_kernel(struct ptdesc *pt) +{ + __pagetable_free(pt); +} +#endif /** * pagetable_free - Free pagetables * @pt: The page table descriptor @@ -2900,10 +2908,12 @@ static inline void __pagetable_free(struct ptdesc *pt) */ static inline void pagetable_free(struct ptdesc *pt) { - if (ptdesc_test_kernel(pt)) + if (ptdesc_test_kernel(pt)) { ptdesc_clear_kernel(pt); - - __pagetable_free(pt); + pagetable_free_kernel(pt); + } else { + __pagetable_free(pt); + } } #if defined(CONFIG_SPLIT_PTE_PTLOCKS) diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 567e2d084071..0279399d4910 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -406,3 +406,40 @@ pte_t *__pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd, pte_unmap_unlock(pte, ptl); goto again; } + +#ifdef CONFIG_ASYNC_PGTABLE_FREE +static void kernel_pgtable_work_func(struct work_struct *work); + +static struct { + struct list_head list; + /* protect above ptdesc lists */ + spinlock_t lock; + struct work_struct work; +} kernel_pgtable_work = { + .list = LIST_HEAD_INIT(kernel_pgtable_work.list), + .lock = __SPIN_LOCK_UNLOCKED(kernel_pgtable_work.lock), + .work = __WORK_INITIALIZER(kernel_pgtable_work.work, kernel_pgtable_work_func), +}; + +static void kernel_pgtable_work_func(struct work_struct *work) +{ + struct ptdesc *pt, *next; + LIST_HEAD(page_list); + + spin_lock(&kernel_pgtable_work.lock); + list_splice_tail_init(&kernel_pgtable_work.list, &page_list); + spin_unlock(&kernel_pgtable_work.lock); + + list_for_each_entry_safe(pt, next, &page_list, pt_list) + __pagetable_free(pt); +} + +void pagetable_free_kernel(struct ptdesc *pt) +{ + spin_lock(&kernel_pgtable_work.lock); + list_add(&pt->pt_list, &kernel_pgtable_work.list); + spin_unlock(&kernel_pgtable_work.lock); + + schedule_work(&kernel_pgtable_work.work); +} +#endif -- 2.43.0