From: Shivam Kalra Extract the page-freeing loop, memcg accounting, and nr_vmalloc_pages update from vfree() into a reusable vmalloc_free_pages() helper. The helper operates on a range [start, end) of pages from a vm_struct, making it suitable for both full free (vfree) and partial free (upcoming vrealloc shrink). No functional change. Signed-off-by: Shivam Kalra --- mm/vmalloc.c | 48 +++++++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 628f96e83b118..54e76a47e9957 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3409,6 +3409,35 @@ void vfree_atomic(const void *addr) schedule_work(&p->wq); } +/* + * vmalloc_free_pages - free a range of pages from a vmalloc allocation + * @vm: the vm_struct containing the pages + * @start: first page index to free (inclusive) + * @end: last page index to free (exclusive) + * + * Free pages [start, end) updating memcg accounting and the global + * nr_vmalloc_pages counter. Caller is responsible for unmapping + * (vunmap_range) and KASAN poisoning before calling this. + */ +static void vmalloc_free_pages(struct vm_struct *vm, + unsigned int start, unsigned int end) +{ + unsigned int i; + unsigned int nr = end - start; + + if (!(vm->flags & VM_MAP_PUT_PAGES)) + mod_memcg_page_state(vm->pages[start], MEMCG_VMALLOC, -nr); + + for (i = start; i < end; i++) { + BUG_ON(!vm->pages[i]); + __free_page(vm->pages[i]); + cond_resched(); + } + + if (!(vm->flags & VM_MAP_PUT_PAGES)) + atomic_long_sub(nr, &nr_vmalloc_pages); +} + /** * vfree - Release memory allocated by vmalloc() * @addr: Memory base address @@ -3429,7 +3458,6 @@ void vfree_atomic(const void *addr) void vfree(const void *addr) { struct vm_struct *vm; - int i; if (unlikely(in_interrupt())) { vfree_atomic(addr); @@ -3452,22 +3480,8 @@ void vfree(const void *addr) if (unlikely(vm->flags & VM_FLUSH_RESET_PERMS)) vm_reset_perms(vm); - /* All pages of vm should be charged to same memcg, so use first one. */ - if (vm->nr_pages && !(vm->flags & VM_MAP_PUT_PAGES)) - mod_memcg_page_state(vm->pages[0], MEMCG_VMALLOC, -vm->nr_pages); - for (i = 0; i < vm->nr_pages; i++) { - struct page *page = vm->pages[i]; - - BUG_ON(!page); - /* - * High-order allocs for huge vmallocs are split, so - * can be freed as an array of order-0 allocations - */ - __free_page(page); - cond_resched(); - } - if (!(vm->flags & VM_MAP_PUT_PAGES)) - atomic_long_sub(vm->nr_pages, &nr_vmalloc_pages); + if (vm->nr_pages) + vmalloc_free_pages(vm, 0, vm->nr_pages); kvfree(vm->pages); kfree(vm); } -- 2.43.0 From: Shivam Kalra When vrealloc() shrinks an allocation and the new size crosses a page boundary, unmap and free the tail pages that are no longer needed. This reclaims physical memory that was previously wasted for the lifetime of the allocation. The heuristic is simple: always free when at least one full page becomes unused. Huge page allocations (page_order > 0) are skipped, as partial freeing would require splitting. The virtual address reservation (vm->size / vmap_area) is intentionally kept unchanged, preserving the address for potential future grow-in-place support. Fix the grow-in-place check to compare against vm->nr_pages rather than get_vm_area_size(), since the latter reflects the virtual reservation which does not shrink. Without this fix, a grow after shrink would access freed pages. Signed-off-by: Shivam Kalra --- mm/vmalloc.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 54e76a47e995..7a4c59422638 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -4327,14 +4327,29 @@ void *vrealloc_node_align_noprof(const void *p, size_t size, unsigned long align goto need_realloc; } - /* - * TODO: Shrink the vm_area, i.e. unmap and free unused pages. What - * would be a good heuristic for when to shrink the vm_area? - */ if (size <= old_size) { + unsigned int new_nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + /* Zero out "freed" memory, potentially for future realloc. */ if (want_init_on_free() || want_init_on_alloc(flags)) memset((void *)p + size, 0, old_size - size); + + /* Free tail pages when shrink crosses a page boundary. */ + if (new_nr_pages < vm->nr_pages && + !vm_area_page_order(vm)) { + unsigned long addr = (unsigned long)p; + + vunmap_range(addr + (new_nr_pages << PAGE_SHIFT), + addr + (vm->nr_pages << PAGE_SHIFT)); + + kasan_poison_vmalloc( + (void *)(addr + (new_nr_pages << PAGE_SHIFT)), + (vm->nr_pages - new_nr_pages) << PAGE_SHIFT); + + vmalloc_free_pages(vm, new_nr_pages, vm->nr_pages); + vm->nr_pages = new_nr_pages; + } + vm->requested_size = size; kasan_poison_vmalloc(p + size, old_size - size); return (void *)p; @@ -4343,7 +4358,7 @@ void *vrealloc_node_align_noprof(const void *p, size_t size, unsigned long align /* * We already have the bytes available in the allocation; use them. */ - if (size <= alloced_size) { + if (size <= (size_t)vm->nr_pages << PAGE_SHIFT) { kasan_unpoison_vmalloc(p + old_size, size - old_size, KASAN_VMALLOC_PROT_NORMAL | KASAN_VMALLOC_VM_ALLOC | -- 2.43.0