Make set_recommended_min_free_kbytes() callable from outside khugepaged.c by removing the static qualifier and adding a declaration in mm/internal.h. This allows callers that change THP settings to recalculate watermarks without going through start_stop_khugepaged(). Suggested-by: Lorenzo Stoakes (Oracle) Reviewed-by: Lorenzo Stoakes (Oracle) Acked-by: David Hildenbrand (Arm) Reviewed-by: Zi Yan Signed-off-by: Breno Leitao --- mm/internal.h | 5 +++++ mm/khugepaged.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/internal.h b/mm/internal.h index cb0af847d7d99..7bd768e367793 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -623,6 +623,11 @@ int user_proactive_reclaim(char *buf, */ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address); +/* + * in mm/khugepaged.c + */ +void set_recommended_min_free_kbytes(void); + /* * in mm/page_alloc.c */ diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 1dd3cfca610db..56a41c21b44c9 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2630,7 +2630,7 @@ static int khugepaged(void *none) return 0; } -static void set_recommended_min_free_kbytes(void) +void set_recommended_min_free_kbytes(void) { struct zone *zone; int nr_zones = 0; -- 2.52.0 Consolidate the repeated spin_lock/set_bit/clear_bit pattern in anon_enabled_store() into a new set_anon_enabled_mode() helper that loops over an orders[] array, setting the bit for the selected mode and clearing the others. Introduce enum anon_enabled_mode and anon_enabled_mode_strings[] for the per-order anon THP setting. Use sysfs_match_string() with the anon_enabled_mode_strings[] table to replace the if/else chain of sysfs_streq() calls. The helper uses __test_and_set_bit()/__test_and_clear_bit() to track whether the state actually changed, so start_stop_khugepaged() is only called when needed. When the mode is unchanged, set_recommended_min_free_kbytes() is called directly to preserve the watermark recalculation behavior of the original code. Signed-off-by: Breno Leitao Reviewed-by: Lorenzo Stoakes (Oracle) --- mm/huge_memory.c | 84 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 32 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8e2746ea74adf..f6af90e6cf05d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -316,6 +316,20 @@ static ssize_t enabled_show(struct kobject *kobj, return sysfs_emit(buf, "%s\n", output); } +enum anon_enabled_mode { + ANON_ENABLED_ALWAYS = 0, + ANON_ENABLED_INHERIT = 1, + ANON_ENABLED_MADVISE = 2, + ANON_ENABLED_NEVER = 3, +}; + +static const char * const anon_enabled_mode_strings[] = { + [ANON_ENABLED_ALWAYS] = "always", + [ANON_ENABLED_INHERIT] = "inherit", + [ANON_ENABLED_MADVISE] = "madvise", + [ANON_ENABLED_NEVER] = "never", +}; + static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) @@ -515,48 +529,54 @@ static ssize_t anon_enabled_show(struct kobject *kobj, return sysfs_emit(buf, "%s\n", output); } +static bool set_anon_enabled_mode(int order, enum anon_enabled_mode mode) +{ + static unsigned long *enabled_orders[] = { + &huge_anon_orders_always, + &huge_anon_orders_inherit, + &huge_anon_orders_madvise, + }; + enum anon_enabled_mode m; + bool changed = false; + + spin_lock(&huge_anon_orders_lock); + for (m = 0; m < ARRAY_SIZE(enabled_orders); m++) { + if (m == mode) + changed |= !__test_and_set_bit(order, enabled_orders[m]); + else + changed |= __test_and_clear_bit(order, enabled_orders[m]); + } + spin_unlock(&huge_anon_orders_lock); + + return changed; +} + static ssize_t anon_enabled_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { int order = to_thpsize(kobj)->order; - ssize_t ret = count; + int mode; - if (sysfs_streq(buf, "always")) { - spin_lock(&huge_anon_orders_lock); - clear_bit(order, &huge_anon_orders_inherit); - clear_bit(order, &huge_anon_orders_madvise); - set_bit(order, &huge_anon_orders_always); - spin_unlock(&huge_anon_orders_lock); - } else if (sysfs_streq(buf, "inherit")) { - spin_lock(&huge_anon_orders_lock); - clear_bit(order, &huge_anon_orders_always); - clear_bit(order, &huge_anon_orders_madvise); - set_bit(order, &huge_anon_orders_inherit); - spin_unlock(&huge_anon_orders_lock); - } else if (sysfs_streq(buf, "madvise")) { - spin_lock(&huge_anon_orders_lock); - clear_bit(order, &huge_anon_orders_always); - clear_bit(order, &huge_anon_orders_inherit); - set_bit(order, &huge_anon_orders_madvise); - spin_unlock(&huge_anon_orders_lock); - } else if (sysfs_streq(buf, "never")) { - spin_lock(&huge_anon_orders_lock); - clear_bit(order, &huge_anon_orders_always); - clear_bit(order, &huge_anon_orders_inherit); - clear_bit(order, &huge_anon_orders_madvise); - spin_unlock(&huge_anon_orders_lock); - } else - ret = -EINVAL; + mode = sysfs_match_string(anon_enabled_mode_strings, buf); + if (mode < 0) + return -EINVAL; - if (ret > 0) { - int err; + if (set_anon_enabled_mode(order, mode)) { + int err = start_stop_khugepaged(); - err = start_stop_khugepaged(); if (err) - ret = err; + return err; + } else { + /* + * Recalculate watermarks even when the mode didn't + * change, as the previous code always called + * start_stop_khugepaged() which does this internally. + */ + set_recommended_min_free_kbytes(); } - return ret; + + return count; } static struct kobj_attribute anon_enabled_attr = -- 2.52.0 Refactor enabled_store() to use a new set_global_enabled_mode() helper. Introduce a separate enum global_enabled_mode and global_enabled_mode_strings[], mirroring the anon_enabled_mode pattern from the previous commit. A separate enum is necessary because the global THP setting does not support "inherit", only "always", "madvise", and "never". Reusing anon_enabled_mode would leave a NULL gap in the string array, causing sysfs_match_string() to stop early and fail to match entries after the gap. The helper uses the same loop pattern as set_anon_enabled_mode(), iterating over an array of flag bit positions and using test_and_set_bit()/test_and_clear_bit() to track whether the state actually changed. Reviewed-by: Lorenzo Stoakes (Oracle) Reviewed-by: Zi Yan Reviewed-by: Baolin Wang Reviewed-by: Wei Yang Signed-off-by: Breno Leitao --- mm/huge_memory.c | 63 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 15 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f6af90e6cf05d..b95fde5843399 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -330,30 +330,63 @@ static const char * const anon_enabled_mode_strings[] = { [ANON_ENABLED_NEVER] = "never", }; +enum global_enabled_mode { + GLOBAL_ENABLED_ALWAYS = 0, + GLOBAL_ENABLED_MADVISE = 1, + GLOBAL_ENABLED_NEVER = 2, +}; + +static const char * const global_enabled_mode_strings[] = { + [GLOBAL_ENABLED_ALWAYS] = "always", + [GLOBAL_ENABLED_MADVISE] = "madvise", + [GLOBAL_ENABLED_NEVER] = "never", +}; + +static bool set_global_enabled_mode(enum global_enabled_mode mode) +{ + static const unsigned long thp_flags[] = { + TRANSPARENT_HUGEPAGE_FLAG, + TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, + }; + enum global_enabled_mode m; + bool changed = false; + + for (m = 0; m < ARRAY_SIZE(thp_flags); m++) { + if (m == mode) + changed |= !test_and_set_bit(thp_flags[m], + &transparent_hugepage_flags); + else + changed |= test_and_clear_bit(thp_flags[m], + &transparent_hugepage_flags); + } + + return changed; +} + static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - ssize_t ret = count; + int mode; - if (sysfs_streq(buf, "always")) { - clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); - set_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); - } else if (sysfs_streq(buf, "madvise")) { - clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); - set_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); - } else if (sysfs_streq(buf, "never")) { - clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); - clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); - } else - ret = -EINVAL; + mode = sysfs_match_string(global_enabled_mode_strings, buf); + if (mode < 0) + return -EINVAL; - if (ret > 0) { + if (set_global_enabled_mode(mode)) { int err = start_stop_khugepaged(); + if (err) - ret = err; + return err; + } else { + /* + * Recalculate watermarks even when the mode didn't + * change, as the previous code always called + * start_stop_khugepaged() which does this internally. + */ + set_recommended_min_free_kbytes(); } - return ret; + return count; } static struct kobj_attribute enabled_attr = __ATTR_RW(enabled); -- 2.52.0 The "raising min_free_kbytes" pr_info message in set_recommended_min_free_kbytes() and the "min_free_kbytes is not updated to" pr_warn in calculate_min_free_kbytes() can spam the kernel log when called repeatedly. Switch the pr_info in set_recommended_min_free_kbytes() and the pr_warn in calculate_min_free_kbytes() to their _ratelimited variants to prevent the log spam for this message. Reviewed-by: Lorenzo Stoakes (Oracle) Acked-by: David Hildenbrand (Arm) Reviewed-by: Baolin Wang Acked-by: Zi Yan Signed-off-by: Breno Leitao --- mm/khugepaged.c | 4 ++-- mm/page_alloc.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 56a41c21b44c9..d44d463ccfd3e 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2671,8 +2671,8 @@ void set_recommended_min_free_kbytes(void) if (recommended_min > min_free_kbytes) { if (user_min_free_kbytes >= 0) - pr_info("raising min_free_kbytes from %d to %lu to help transparent hugepage allocations\n", - min_free_kbytes, recommended_min); + pr_info_ratelimited("raising min_free_kbytes from %d to %lu to help transparent hugepage allocations\n", + min_free_kbytes, recommended_min); min_free_kbytes = recommended_min; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2d4b6f1a554ed..c840c886807bf 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6553,8 +6553,8 @@ void calculate_min_free_kbytes(void) if (new_min_free_kbytes > user_min_free_kbytes) min_free_kbytes = clamp(new_min_free_kbytes, 128, 262144); else - pr_warn("min_free_kbytes is not updated to %d because user defined value %d is preferred\n", - new_min_free_kbytes, user_min_free_kbytes); + pr_warn_ratelimited("min_free_kbytes is not updated to %d because user defined value %d is preferred\n", + new_min_free_kbytes, user_min_free_kbytes); } -- 2.52.0