Currently, kdamond_apply_schemes() iterates over all targets, then over all regions, and finally calls damon_do_apply_schemes() which iterates over all schemes. This nested structure causes scheme-level invariants (such as time intervals, activation status, and quota limits) to be evaluated inside the innermost loop for every single region. If a scheme is inactive, has not reached its apply interval, or has already fulfilled its quota (quota->charged_sz >= quota->esz), the kernel still needlessly iterates through thousands of regions only to repeatedly evaluate these same scheme-level conditions and continue. This patch inlines damon_do_apply_schemes() into kdamond_apply_schemes() and inverts the loop ordering. It now iterates over schemes on the outside, and targets/regions on the inside. This allows the code to evaluate scheme-level limits once per scheme. If a scheme's quota is met or it is inactive, we completely bypass the O(Targets * Regions) inner loop for that scheme. This drastically reduces unnecessary branching, cache thrashing, and CPU overhead in the kdamond hot path. Signed-off-by: Josh Law --- mm/damon/core.c | 72 +++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 42 deletions(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index c884bb31c9b8..dece0da079c8 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -2112,40 +2112,6 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t, damos_update_stat(s, sz, sz_applied, sz_ops_filter_passed); } -static void damon_do_apply_schemes(struct damon_ctx *c, - struct damon_target *t, - struct damon_region *r) -{ - struct damos *s; - - damon_for_each_scheme(s, c) { - struct damos_quota *quota = &s->quota; - - if (time_before(c->passed_sample_intervals, s->next_apply_sis)) - continue; - - if (!s->wmarks.activated) - continue; - - /* Check the quota */ - if (quota->esz && quota->charged_sz >= quota->esz) - continue; - - if (damos_skip_charged_region(t, r, s, c->min_region_sz)) - continue; - - if (s->max_nr_snapshots && - s->max_nr_snapshots <= s->stat.nr_snapshots) - continue; - - if (damos_valid_target(c, r, s)) - damos_apply_scheme(c, t, r, s); - - if (damon_is_last_region(r, t)) - s->stat.nr_snapshots++; - } -} - /* * damon_feed_loop_next_input() - get next input to achieve a target score. * @last_input The last input. @@ -2494,17 +2460,39 @@ static void kdamond_apply_schemes(struct damon_ctx *c) return; mutex_lock(&c->walk_control_lock); - damon_for_each_target(t, c) { - if (c->ops.target_valid && c->ops.target_valid(t) == false) - continue; - - damon_for_each_region(r, t) - damon_do_apply_schemes(c, t, r); - } - damon_for_each_scheme(s, c) { + struct damos_quota *quota = &s->quota; + if (time_before(c->passed_sample_intervals, s->next_apply_sis)) continue; + + if (!s->wmarks.activated) + continue; + + damon_for_each_target(t, c) { + if (c->ops.target_valid && c->ops.target_valid(t) == false) + continue; + + damon_for_each_region(r, t) { + /* Check the quota */ + if (quota->esz && quota->charged_sz >= quota->esz) + goto next_scheme; + + if (s->max_nr_snapshots && + s->max_nr_snapshots <= s->stat.nr_snapshots) + goto next_scheme; + + if (damos_skip_charged_region(t, r, s, c->min_region_sz)) + continue; + + if (damos_valid_target(c, r, s)) + damos_apply_scheme(c, t, r, s); + + if (damon_is_last_region(r, t)) + s->stat.nr_snapshots++; + } + } +next_scheme: damos_walk_complete(c, s); damos_set_next_apply_sis(s, c); s->last_applied = NULL; -- 2.34.1 Hardware integer division is slow. The function damon_max_nr_accesses(), which is called very frequently (e.g., once per region per sample interval inside damon_update_region_access_rate), performs an integer division: attrs->aggr_interval / attrs->sample_interval. However, the struct damon_attrs already caches this exact ratio in the internal field aggr_samples (since earlier commits). We can eliminate the hardware division in the hot path by simply returning aggr_samples. This significantly reduces the CPU cycle overhead of updating the access rates for thousands of regions. Signed-off-by: Josh Law --- include/linux/damon.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index 6bd71546f7b2..fffdb08326a2 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -960,8 +960,7 @@ static inline bool damon_target_has_pid(const struct damon_ctx *ctx) static inline unsigned int damon_max_nr_accesses(const struct damon_attrs *attrs) { /* {aggr,sample}_interval are unsigned long, hence could overflow */ - return min(attrs->aggr_interval / attrs->sample_interval, - (unsigned long)UINT_MAX); + return min(attrs->aggr_samples, (unsigned long)UINT_MAX); } -- 2.34.1