The check against the max map count (sysctl_max_map_count) was open-coded in several places. This led to inconsistent enforcement and subtle bugs where the limit could be exceeded. For example, some paths would check map_count > sysctl_max_map_count before allocating a new VMA and incrementing the count, allowing the process to reach sysctl_max_map_count + 1: int do_brk_flags(...) { if (mm->map_count > sysctl_max_map_count) return -ENOMEM; /* We can get here with mm->map_count == sysctl_max_map_count */ vma = vm_area_alloc(mm); ... mm->map_count++ /* We've now exceeded the threshold. */ } To fix this and unify the logic, introduce a new function, exceeds_max_map_count(), to consolidate the check. All open-coded checks are replaced with calls to this new function, ensuring the limit is applied uniformly and correctly. To improve encapsulation, sysctl_max_map_count is now static to mm/mmap.c. The new helper also adds a rate-limited warning to make debugging applications that exhaust their VMA limit easier. Cc: Andrew Morton Cc: Minchan Kim Cc: Lorenzo Stoakes Signed-off-by: Kalesh Singh --- include/linux/mm.h | 11 ++++++++++- mm/mmap.c | 15 ++++++++++++++- mm/mremap.c | 7 ++++--- mm/nommu.c | 2 +- mm/util.c | 1 - mm/vma.c | 6 +++--- 6 files changed, 32 insertions(+), 10 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 1ae97a0b8ec7..d4e64e6a9814 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -192,7 +192,16 @@ static inline void __mm_zero_struct_page(struct page *page) #define MAPCOUNT_ELF_CORE_MARGIN (5) #define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) -extern int sysctl_max_map_count; +/** + * exceeds_max_map_count - check if a VMA operation would exceed max_map_count + * @mm: The memory descriptor for the process. + * @new_vmas: The number of new VMAs the operation will create. + * + * Returns true if the operation would cause the number of VMAs to exceed + * the sysctl_max_map_count limit, false otherwise. A rate-limited warning + * is logged if the limit is exceeded. + */ +extern bool exceeds_max_map_count(struct mm_struct *mm, unsigned int new_vmas); extern unsigned long sysctl_user_reserve_kbytes; extern unsigned long sysctl_admin_reserve_kbytes; diff --git a/mm/mmap.c b/mm/mmap.c index 7306253cc3b5..693a0105e6a5 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -374,7 +374,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, return -EOVERFLOW; /* Too many mappings? */ - if (mm->map_count > sysctl_max_map_count) + if (exceeds_max_map_count(mm, 0)) return -ENOMEM; /* @@ -1504,6 +1504,19 @@ struct vm_area_struct *_install_special_mapping( int sysctl_legacy_va_layout; #endif +static int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; + +bool exceeds_max_map_count(struct mm_struct *mm, unsigned int new_vmas) +{ + if (unlikely(mm->map_count + new_vmas > sysctl_max_map_count)) { + pr_warn_ratelimited("%s (%d): Map count limit %u exceeded\n", + current->comm, current->pid, + sysctl_max_map_count); + return true; + } + return false; +} + static const struct ctl_table mmap_table[] = { { .procname = "max_map_count", diff --git a/mm/mremap.c b/mm/mremap.c index e618a706aff5..793fad58302c 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -1040,7 +1040,7 @@ static unsigned long prep_move_vma(struct vma_remap_struct *vrm) * We'd prefer to avoid failure later on in do_munmap: * which may split one vma into three before unmapping. */ - if (current->mm->map_count >= sysctl_max_map_count - 3) + if (exceeds_max_map_count(current->mm, 4)) return -ENOMEM; if (vma->vm_ops && vma->vm_ops->may_split) { @@ -1811,9 +1811,10 @@ static unsigned long check_mremap_params(struct vma_remap_struct *vrm) * split in 3 before unmapping it. * That means 2 more maps (1 for each) to the ones we already hold. * Check whether current map count plus 2 still leads us to 4 maps below - * the threshold, otherwise return -ENOMEM here to be more safe. + * the threshold. In other words, is the current map count + 6 at or + * below the threshold? Otherwise return -ENOMEM here to be more safe. */ - if ((current->mm->map_count + 2) >= sysctl_max_map_count - 3) + if (exceeds_max_map_count(current->mm, 6)) return -ENOMEM; return 0; diff --git a/mm/nommu.c b/mm/nommu.c index 8b819fafd57b..0533e1e3b266 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1316,7 +1316,7 @@ static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, return -ENOMEM; mm = vma->vm_mm; - if (mm->map_count >= sysctl_max_map_count) + if (exceeds_max_map_count(mm, 1)) return -ENOMEM; region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL); diff --git a/mm/util.c b/mm/util.c index f814e6a59ab1..b6e83922cafe 100644 --- a/mm/util.c +++ b/mm/util.c @@ -751,7 +751,6 @@ EXPORT_SYMBOL(folio_mc_copy); int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; static int sysctl_overcommit_ratio __read_mostly = 50; static unsigned long sysctl_overcommit_kbytes __read_mostly; -int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */ diff --git a/mm/vma.c b/mm/vma.c index 3b12c7579831..f804c8ac8fbb 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -592,7 +592,7 @@ __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long addr, int new_below) { - if (vma->vm_mm->map_count >= sysctl_max_map_count) + if (exceeds_max_map_count(vma->vm_mm, 1)) return -ENOMEM; return __split_vma(vmi, vma, addr, new_below); @@ -1345,7 +1345,7 @@ static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, * its limit temporarily, to help free resources as expected. */ if (vms->end < vms->vma->vm_end && - vms->vma->vm_mm->map_count >= sysctl_max_map_count) { + exceeds_max_map_count(vms->vma->vm_mm, 1)) { error = -ENOMEM; goto map_count_exceeded; } @@ -2772,7 +2772,7 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma, if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) return -ENOMEM; - if (mm->map_count > sysctl_max_map_count) + if (exceeds_max_map_count(mm, 1)) return -ENOMEM; if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT)) -- 2.51.0.338.gd7d06c2dae-goog