memblock_estimated_nr_free_pages() returns the difference between the total size of the "memory" memblock type and the "reserved" memblock type. The "soft-reserved" memory regions are added to the "reserved" memblock type, but not to the "memory" memblock type. Therefore, memblock_estimated_nr_free_pages() may return a smaller value than expected, or if it underflows, an extremely large value. /proc/sys/kernel/threads-max is determined by the value of memblock_estimated_nr_free_pages(). This issue was discovered on machines with CXL memory because kernel.threads-max was either smaller than expected or extremely large for the installed DRAM size. This fixes the issue by improving the accuracy of memblock_estimated_nr_free_pages() by subtracting only the overlapping size of regions with "memory" and "reserved" memblock types. Signed-off-by: Akinobu Mita --- mm/memblock.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index e23e16618e9b..af014fa10a44 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1812,6 +1812,22 @@ phys_addr_t __init_memblock memblock_reserved_kern_size(phys_addr_t limit, int n return total; } +static phys_addr_t __init memblock_addrs_overlap_size(phys_addr_t base1, phys_addr_t size1, + phys_addr_t base2, phys_addr_t size2) +{ + phys_addr_t start, end; + + if (!memblock_addrs_overlap(base1, size1, base2, size2)) + return 0; + + memblock_cap_size(base1, &size1); + memblock_cap_size(base2, &size2); + start = max(base1, base2); + end = min(base1 + size1, base2 + size2); + + return end - start; +} + /** * memblock_estimated_nr_free_pages - return estimated number of free pages * from memblock point of view @@ -1826,7 +1842,22 @@ phys_addr_t __init_memblock memblock_reserved_kern_size(phys_addr_t limit, int n */ unsigned long __init memblock_estimated_nr_free_pages(void) { - return PHYS_PFN(memblock_phys_mem_size() - memblock_reserved_size()); + int memory_idx, reserved_idx; + struct memblock_type *memory_type = &memblock.memory; + struct memblock_type *reserved_type = &memblock.reserved; + struct memblock_region *memory_region, *reserved_region; + phys_addr_t phys_mem_size = 0; + + for_each_memblock_type(memory_idx, memory_type, memory_region) { + phys_mem_size += memory_region->size; + for_each_memblock_type(reserved_idx, reserved_type, reserved_region) { + phys_mem_size -= memblock_addrs_overlap_size(memory_region->base, + memory_region->size, reserved_region->base, + reserved_region->size); + } + } + + return PHYS_PFN(phys_mem_size); } /* lowest address */ -- 2.43.0