To report access information to DAMON, DAMON API callers should implement their DAMON operation set and register that to the DAMON core layer. It is a burden to do such implementation and registration, especially when existing kernel components want to simply report their observed access information. Add a new DAMON API function for simply reporting identified data accesses to DAMON, on the reporter' schedule. The function internally uses mutex, so reporting kernel code should be safe to sleep. This API was also discussed at LSFMMBPF'25: https://lwn.net/Articles/1016525/ Signed-off-by: SeongJae Park --- include/linux/damon.h | 24 ++++++++++++++++++++++++ mm/damon/core.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index a67292a2f09d..1bee6e7fed1d 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -110,6 +110,22 @@ struct damon_target { bool obsolete; }; +/** + * struct damon_access_report - Represent single acces report information. + * @addr: The start address of the accessed address range. + * @size: The size of the accessed address range. + * + * Any DAMON API callers that notified access events can report the information + * to DAMON using damon_report_access(). This struct contains the reporting + * infomration. Refer to damon_report_access() for more details. + */ +struct damon_access_report { + unsigned long addr; + unsigned long size; +/* private: */ + unsigned long report_jiffies; /* when this report is made */ +}; + /** * enum damos_action - Represents an action of a Data Access Monitoring-based * Operation Scheme. @@ -972,10 +988,18 @@ bool damon_is_running(struct damon_ctx *ctx); int damon_call(struct damon_ctx *ctx, struct damon_call_control *control); int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control); +void damon_report_access(struct damon_access_report *report); + int damon_set_region_biggest_system_ram_default(struct damon_target *t, unsigned long *start, unsigned long *end, unsigned long min_sz_region); +#else /* CONFIG_DAMON */ + +static inline void damon_report_access(struct damon_access_report *report) +{ +} + #endif /* CONFIG_DAMON */ #endif /* _DAMON_H */ diff --git a/mm/damon/core.c b/mm/damon/core.c index cc15d8ec9dce..7abd8c550c60 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -20,6 +20,8 @@ #define CREATE_TRACE_POINTS #include +#define DAMON_ACCESS_REPORTS_CAP 1000 + static DEFINE_MUTEX(damon_lock); static int nr_running_ctxs; static bool running_exclusive_ctxs; @@ -29,6 +31,11 @@ static struct damon_operations damon_registered_ops[NR_DAMON_OPS]; static struct kmem_cache *damon_region_cache __ro_after_init; +static DEFINE_MUTEX(damon_access_reports_lock); +static struct damon_access_report damon_access_reports[ + DAMON_ACCESS_REPORTS_CAP]; +static int damon_access_reports_len; + /* Should be called under damon_ops_lock with id smaller than NR_DAMON_OPS */ static bool __damon_is_registered_ops(enum damon_ops_id id) { @@ -1271,6 +1278,8 @@ int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src) return err; } dst->ops = src->ops; + if (err) + return err; dst->addr_unit = src->addr_unit; dst->min_sz_region = src->min_sz_region; @@ -1521,6 +1530,34 @@ int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control) return 0; } +/** + * damon_report_access() - Report identified access events to DAMON. + * @report: The reporting access information. + * + * Report access events to DAMON. + * + * Context: May sleep. + * + * NOTE: we may be able to implement this as a lockless queue, and allow any + * context. As the overhead is unknown, and region-based DAMON logics would + * guarantee the reports would be not made that frequently, let's start with + * this simple implementation. + */ +void damon_report_access(struct damon_access_report *report) +{ + struct damon_access_report *dst; + + /* silently fail for races */ + if (!mutex_trylock(&damon_access_reports_lock)) + return; + dst = &damon_access_reports[damon_access_reports_len++]; + if (damon_access_reports_len == DAMON_ACCESS_REPORTS_CAP) + damon_access_reports_len = 0; + *dst = *report; + dst->report_jiffies = jiffies; + mutex_unlock(&damon_access_reports_lock); +} + /* * Warn and fix corrupted ->nr_accesses[_bp] for investigations and preventing * the problem being propagated. -- 2.47.3 All existing DAMON operation sets are using page table Accessed bits as their access check sampling primitive. In following commits, the operation set for physical address space will be extended for multiple access sampling primitives, specifically page fault events. Define a new DAMON core layer API data structure for controlling which primitives the given operation set should use. Signed-off-by: SeongJae Park --- include/linux/damon.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index 1bee6e7fed1d..b9359c76a6f1 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -765,6 +765,33 @@ struct damon_attrs { unsigned long aggr_samples; }; +/** + * struct damon_primitives_enabled - Enablement of access sampling primitives. + * + * @page_table: Page table Accessed bits scanning. + * @page_fault: Page faults monitoring. + * + * Read &struct damon_sample_control for more details. + */ +struct damon_primitives_enabled { + bool page_table; + bool page_fault; +}; + +/** + * struct damon_sample_control - Low level access check sampling rules. + * + * @primitives_enabled: Enablement of access check primitives. + * + * DAMON collect low level access information using sampling, and aggregate + * that to make higher access pattern picture. It can use multiple sampling + * primitives including page table accessed bits and page fault events. This + * struct is for controlling what sampling primitives to use (enable). + */ +struct damon_sample_control { + struct damon_primitives_enabled primitives_enabled; +}; + /** * struct damon_ctx - Represents a context for each monitoring. This is the * main interface that allows users to set the attributes and get the results @@ -833,6 +860,7 @@ struct damon_ctx { struct mutex kdamond_lock; struct damon_operations ops; + struct damon_sample_control sample_control; unsigned long addr_unit; unsigned long min_sz_region; -- 2.47.3 The parameters for sampling primitives usage control is not committed with damon_commit_ctx(). Hence it doesn't work with online parameters update. Implement the functionality. Signed-off-by: SeongJae Park --- mm/damon/core.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/mm/damon/core.c b/mm/damon/core.c index 7abd8c550c60..89305d42e116 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -559,6 +559,8 @@ struct damon_ctx *damon_new_ctx(void) ctx->attrs.min_nr_regions = 10; ctx->attrs.max_nr_regions = 1000; + ctx->sample_control.primitives_enabled.page_table = true; + ctx->addr_unit = 1; ctx->min_sz_region = DAMON_MIN_REGION; @@ -1242,6 +1244,23 @@ static int damon_commit_targets( return 0; } +static bool damon_primitives_enabled_invalid( + struct damon_primitives_enabled *config) +{ + return config->page_table == config->page_fault; +} + +static int damon_commit_sample_control( + struct damon_sample_control *dst, + struct damon_sample_control *src) +{ + if (damon_primitives_enabled_invalid(&src->primitives_enabled)) + return -EINVAL; + + dst->primitives_enabled = src->primitives_enabled; + return 0; +} + /** * damon_commit_ctx() - Commit parameters of a DAMON context to another. * @dst: The commit destination DAMON context. @@ -1278,6 +1297,8 @@ int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src) return err; } dst->ops = src->ops; + err = damon_commit_sample_control(&dst->sample_control, + &src->sample_control); if (err) return err; dst->addr_unit = src->addr_unit; -- 2.47.3 Implement a DAMON core API for reporting page fault events to DAMON. It is a wrapper of damon_report_access(). This is a temporal hack. In future versions that will have no RFC tag, only damon_report_access() may be used. Signed-off-by: SeongJae Park --- include/linux/damon.h | 10 ++++++++++ mm/damon/core.c | 17 +++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index b9359c76a6f1..b8ebb2aa02c8 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -1017,6 +1017,13 @@ int damon_call(struct damon_ctx *ctx, struct damon_call_control *control); int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control); void damon_report_access(struct damon_access_report *report); +#ifdef CONFIG_MMU +void damon_report_page_fault(struct vm_fault *vmf, bool huge_pmd); +#else +static inline void damon_report_page_fault(struct vm_fault *vmf, bool huge_pmd) +{ +} +#endif int damon_set_region_biggest_system_ram_default(struct damon_target *t, unsigned long *start, unsigned long *end, @@ -1027,6 +1034,9 @@ int damon_set_region_biggest_system_ram_default(struct damon_target *t, static inline void damon_report_access(struct damon_access_report *report) { } +static inline void damon_report_page_fault(struct vm_fault *vmf, bool huge_pmd) +{ +} #endif /* CONFIG_DAMON */ diff --git a/mm/damon/core.c b/mm/damon/core.c index 89305d42e116..296117d5e7f7 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1579,6 +1579,23 @@ void damon_report_access(struct damon_access_report *report) mutex_unlock(&damon_access_reports_lock); } +#ifdef CONFIG_MMU +void damon_report_page_fault(struct vm_fault *vmf, bool huge_pmd) +{ + struct damon_access_report access_report = { + .size = 1, /* todo: set appripriately */ + }; + + if (huge_pmd) + access_report.addr = PFN_PHYS(pmd_pfn(vmf->orig_pmd)); + else + access_report.addr = PFN_PHYS(pte_pfn(vmf->orig_pte)); + /* todo: report vmf->address as virtual address */ + + damon_report_access(&access_report); +} +#endif + /* * Warn and fix corrupted ->nr_accesses[_bp] for investigations and preventing * the problem being propagated. -- 2.47.3 Note that this is not upstreamable as-is. This is only for helping discussion of other changes of its series. DAMON is using Accessed bits of page table entries as the major source of the access information. It lacks some additional information such as which CPU was making the access. Page faults could be another source of information for such additional information. Implement another change_protection() flag for such use cases, namely MM_CP_DAMON. DAMON will install PAGE_NONE protections using the flag. To avoid interfering with NUMA_BALANCING, which is also using PAGE_NON protection, pass the faults to DAMON only when NUMA_BALANCING is disabled. Again, this is not upstreamable as-is. There were comments about this on the previous version, and I was unable to take time on addressing those. As a result, this version is not addressing any of those previous comments. I'm sending this, though, to help discussions on patches of its series, except this one. Please forgive me adding this to your inbox without addressing your comments, and ignore. I will establish another discussion for this part later. Signed-off-by: SeongJae Park --- include/linux/mm.h | 1 + mm/memory.c | 60 ++++++++++++++++++++++++++++++++++++++++++++-- mm/mprotect.c | 5 ++++ 3 files changed, 64 insertions(+), 2 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 553cf9f438f1..2cba5a0196da 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2848,6 +2848,7 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen); #define MM_CP_UFFD_WP_RESOLVE (1UL << 3) /* Resolve wp */ #define MM_CP_UFFD_WP_ALL (MM_CP_UFFD_WP | \ MM_CP_UFFD_WP_RESOLVE) +#define MM_CP_DAMON (1UL << 4) bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr, pte_t pte); diff --git a/mm/memory.c b/mm/memory.c index 6675e87eb7dd..5dc85adb1e59 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -78,6 +78,7 @@ #include #include #include +#include #include @@ -6172,6 +6173,54 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) return VM_FAULT_FALLBACK; } +/* + * NOTE: This is only poc purpose "hack" that will not be upstreamed as is. + * More discussions between all stakeholders including maintainers of MM core, + * NUMA balancing, and DAMON should be made to make this upstreamable. + * (https://lore.kernel.org/20251128193947.80866-1-sj@kernel.org) + * + * This function is called from page fault handler, for page faults on + * P{TE,MD}-protected but vma-accessible pages. DAMON is making the fake + * protection for access sampling purpose. This function simply clear the + * protection and report this access to DAMON, by calling + * damon_report_page_fault(). + * + * The protection clear code is copied from NUMA fault handling code for PTE. + * Again, this is only poc purpose "hack" to show what information DAMON want + * from page fault events, rather than an upstream-aimed version. + */ +static vm_fault_t do_damon_page(struct vm_fault *vmf, bool huge_pmd) +{ + struct vm_area_struct *vma = vmf->vma; + struct folio *folio; + pte_t pte, old_pte; + bool writable = false, ignore_writable = false; + bool pte_write_upgrade = vma_wants_manual_pte_write_upgrade(vma); + + spin_lock(vmf->ptl); + old_pte = ptep_get(vmf->pte); + if (unlikely(!pte_same(old_pte, vmf->orig_pte))) { + pte_unmap_unlock(vmf->pte, vmf->ptl); + return 0; + } + pte = pte_modify(old_pte, vma->vm_page_prot); + writable = pte_write(pte); + if (!writable && pte_write_upgrade && + can_change_pte_writable(vma, vmf->address, pte)) + writable = true; + folio = vm_normal_folio(vma, vmf->address, pte); + if (folio && folio_test_large(folio)) + numa_rebuild_large_mapping(vmf, vma, folio, pte, + ignore_writable, pte_write_upgrade); + else + numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte, + writable); + pte_unmap_unlock(vmf->pte, vmf->ptl); + + damon_report_page_fault(vmf, huge_pmd); + return 0; +} + /* * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most @@ -6236,8 +6285,11 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) if (!pte_present(vmf->orig_pte)) return do_swap_page(vmf); - if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma)) + if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma)) { + if (sysctl_numa_balancing_mode == NUMA_BALANCING_DISABLED) + return do_damon_page(vmf, false); return do_numa_page(vmf); + } spin_lock(vmf->ptl); entry = vmf->orig_pte; @@ -6363,8 +6415,12 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, return 0; } if (pmd_trans_huge(vmf.orig_pmd)) { - if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma)) + if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma)) { + if (sysctl_numa_balancing_mode == + NUMA_BALANCING_DISABLED) + return do_damon_page(&vmf, true); return do_huge_pmd_numa_page(&vmf); + } if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) && !pmd_write(vmf.orig_pmd)) { diff --git a/mm/mprotect.c b/mm/mprotect.c index 5c330e817129..d2c14162f93d 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -651,6 +651,11 @@ long change_protection(struct mmu_gather *tlb, WARN_ON_ONCE(cp_flags & MM_CP_PROT_NUMA); #endif +#ifdef CONFIG_ARCH_SUPPORTS_NUMA_BALANCING + if (cp_flags & MM_CP_DAMON) + newprot = PAGE_NONE; +#endif + if (is_vm_hugetlb_page(vma)) pages = hugetlb_change_protection(tlb, vma, start, end, newprot, cp_flags); -- 2.47.3 Extend DAMON operation set for the physical address space (paddr) to support page fault access check primitive. When DAMON core layer asks it to use page fault events as its access check primitive, paddr will install PROT_NONE protection to access sampling target pages, in a way similar to NUMA_HINT_FAULTS, using the non-upstreamable hack that was added by the previous commit. Signed-off-by: SeongJae Park --- mm/damon/paddr.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index 07a8aead439e..698ca6b9dde6 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "../internal.h" #include "ops-common.h" @@ -56,7 +57,8 @@ static void __damon_pa_prepare_access_check(struct damon_region *r, damon_pa_mkold(damon_pa_phys_addr(r->sampling_addr, addr_unit)); } -static void damon_pa_prepare_access_checks(struct damon_ctx *ctx) +/* Use page table accessed bits */ +static void damon_pa_prepare_access_checks_abit(struct damon_ctx *ctx) { struct damon_target *t; struct damon_region *r; @@ -67,6 +69,68 @@ static void damon_pa_prepare_access_checks(struct damon_ctx *ctx) } } +static bool damon_pa_change_protection_one(struct folio *folio, + struct vm_area_struct *vma, unsigned long addr, void *arg) +{ + /* todo: batch or remove tlb flushing */ + struct mmu_gather tlb; + + if (!vma_is_accessible(vma)) + return true; + + tlb_gather_mmu(&tlb, vma->vm_mm); + + change_protection(&tlb, vma, addr, addr + PAGE_SIZE, MM_CP_DAMON); + + tlb_finish_mmu(&tlb); + return true; +} + +static void damon_pa_change_protection(unsigned long paddr) +{ + struct folio *folio = damon_get_folio(PHYS_PFN(paddr)); + struct rmap_walk_control rwc = { + .rmap_one = damon_pa_change_protection_one, + .anon_lock = folio_lock_anon_vma_read, + }; + bool need_lock; + + if (!folio) + return; + if (!folio_mapped(folio) || !folio_raw_mapping(folio)) + return; + + need_lock = !folio_test_anon(folio) || folio_test_ksm(folio); + if (need_lock && !folio_trylock(folio)) + return; + + rmap_walk(folio, &rwc); + + if (need_lock) + folio_unlock(folio); +} + +static void damon_pa_prepare_access_checks_faults(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct damon_region *r; + + damon_for_each_target(t, ctx) { + damon_for_each_region(r, t) { + r->sampling_addr = damon_rand(r->ar.start, r->ar.end); + damon_pa_change_protection(r->sampling_addr); + } + } +} + +static void damon_pa_prepare_access_checks(struct damon_ctx *ctx) +{ + if (ctx->sample_control.primitives_enabled.page_table) + damon_pa_prepare_access_checks_abit(ctx); + if (ctx->sample_control.primitives_enabled.page_fault) + damon_pa_prepare_access_checks_faults(ctx); +} + static bool damon_pa_young(phys_addr_t paddr, unsigned long *folio_sz) { struct folio *folio = damon_get_folio(PHYS_PFN(paddr)); -- 2.47.3 Now any DAMON API callers can report their observed access information. The DAMON core layer is just ignoring those, though. Update the core to use the reported information at building the high level access pattern snapshot. Signed-off-by: SeongJae Park --- include/linux/damon.h | 1 + mm/damon/core.c | 68 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index b8ebb2aa02c8..b04c2e36833a 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -83,6 +83,7 @@ struct damon_region { unsigned int age; /* private: Internal value for age calculation. */ unsigned int last_nr_accesses; + bool access_reported; }; /** diff --git a/mm/damon/core.c b/mm/damon/core.c index 296117d5e7f7..a14754a47c7f 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -137,6 +137,7 @@ struct damon_region *damon_new_region(unsigned long start, unsigned long end) region->age = 0; region->last_nr_accesses = 0; + region->access_reported = false; return region; } @@ -2745,6 +2746,68 @@ static void kdamond_init_ctx(struct damon_ctx *ctx) } } +static void kdamond_apply_access_report(struct damon_access_report *report, + struct damon_target *t, struct damon_ctx *ctx) +{ + struct damon_region *r; + + /* todo: make search faster, e.g., binary search? */ + damon_for_each_region(r, t) { + if (report->addr < r->ar.start) + continue; + if (r->ar.end < report->addr + report->size) + continue; + if (!r->access_reported) + damon_update_region_access_rate(r, true, &ctx->attrs); + r->access_reported = true; + } +} + +static unsigned int kdamond_apply_zero_access_report(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct damon_region *r; + unsigned int max_nr_accesses = 0; + + damon_for_each_target(t, ctx) { + damon_for_each_region(r, t) { + if (r->access_reported) + r->access_reported = false; + else + damon_update_region_access_rate(r, false, + &ctx->attrs); + max_nr_accesses = max(max_nr_accesses, r->nr_accesses); + } + } + return max_nr_accesses; +} + +static unsigned int kdamond_check_reported_accesses(struct damon_ctx *ctx) +{ + int i; + struct damon_access_report *report; + struct damon_target *t; + + /* currently damon_access_report supports only physical address */ + if (damon_target_has_pid(ctx)) + return 0; + + mutex_lock(&damon_access_reports_lock); + for (i = 0; i < damon_access_reports_len; i++) { + report = &damon_access_reports[i]; + if (time_before(report->report_jiffies, + jiffies - + usecs_to_jiffies( + ctx->attrs.sample_interval))) + continue; + damon_for_each_target(t, ctx) + kdamond_apply_access_report(report, t, ctx); + } + mutex_unlock(&damon_access_reports_lock); + /* For nr_accesses_bp, absence of access should also be reported. */ + return kdamond_apply_zero_access_report(ctx); +} + /* * The monitoring daemon that runs as a kernel thread */ @@ -2790,7 +2853,10 @@ static int kdamond_fn(void *data) kdamond_usleep(sample_interval); ctx->passed_sample_intervals++; - if (ctx->ops.check_accesses) + /* todo: make these non-exclusive */ + if (ctx->sample_control.primitives_enabled.page_fault) + max_nr_accesses = kdamond_check_reported_accesses(ctx); + else if (ctx->ops.check_accesses) max_nr_accesses = ctx->ops.check_accesses(ctx); if (ctx->passed_sample_intervals >= next_aggregation_sis) -- 2.47.3 Add a sysfs directory for letting DAMON ABI users control what access sampling primitives to use, among page table Accessed bit and page fault events. Signed-off-by: SeongJae Park --- mm/damon/sysfs.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index e2bd2d7becdd..851e8c0dc989 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -749,6 +749,41 @@ static const struct kobj_type damon_sysfs_intervals_ktype = { .default_groups = damon_sysfs_intervals_groups, }; +/* + * sample directory + */ + +struct damon_sysfs_sample { + struct kobject kobj; +}; + +static struct damon_sysfs_sample *damon_sysfs_sample_alloc(void) +{ + struct damon_sysfs_sample *sample = kmalloc( + sizeof(*sample), GFP_KERNEL); + + if (!sample) + return NULL; + sample->kobj = (struct kobject){}; + return sample; +} + +static void damon_sysfs_sample_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_sample, kobj)); +} + +static struct attribute *damon_sysfs_sample_attrs[] = { + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_sample); + +static const struct kobj_type damon_sysfs_sample_ktype = { + .release = damon_sysfs_sample_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_sample_groups, +}; + /* * monitoring_attrs directory */ @@ -757,6 +792,7 @@ struct damon_sysfs_attrs { struct kobject kobj; struct damon_sysfs_intervals *intervals; struct damon_sysfs_ul_range *nr_regions_range; + struct damon_sysfs_sample *sample; }; static struct damon_sysfs_attrs *damon_sysfs_attrs_alloc(void) @@ -773,6 +809,7 @@ static int damon_sysfs_attrs_add_dirs(struct damon_sysfs_attrs *attrs) { struct damon_sysfs_intervals *intervals; struct damon_sysfs_ul_range *nr_regions_range; + struct damon_sysfs_sample *sample; int err; intervals = damon_sysfs_intervals_alloc(5000, 100000, 60000000); @@ -801,8 +838,23 @@ static int damon_sysfs_attrs_add_dirs(struct damon_sysfs_attrs *attrs) if (err) goto put_nr_regions_intervals_out; attrs->nr_regions_range = nr_regions_range; + + sample = damon_sysfs_sample_alloc(); + if (!sample) { + err = -ENOMEM; + goto put_nr_regions_intervals_out; + } + err = kobject_init_and_add(&sample->kobj, + &damon_sysfs_sample_ktype, &attrs->kobj, + "sample"); + if (err) + goto put_sample_out; + attrs->sample = sample; return 0; +put_sample_out: + kobject_put(&sample->kobj); + attrs->sample = NULL; put_nr_regions_intervals_out: kobject_put(&nr_regions_range->kobj); attrs->nr_regions_range = NULL; @@ -817,6 +869,7 @@ static void damon_sysfs_attrs_rm_dirs(struct damon_sysfs_attrs *attrs) kobject_put(&attrs->nr_regions_range->kobj); damon_sysfs_intervals_rm_dirs(attrs->intervals); kobject_put(&attrs->intervals->kobj); + kobject_put(&attrs->sample->kobj); } static void damon_sysfs_attrs_release(struct kobject *kobj) -- 2.47.3 Add a sysfs directory that will be used to let DAMON ABI users control what access sampling primitives to use, among page table Accessed bit, and page fault events. Signed-off-by: SeongJae Park --- mm/damon/sysfs.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 851e8c0dc989..14478fd79953 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -749,12 +749,111 @@ static const struct kobj_type damon_sysfs_intervals_ktype = { .default_groups = damon_sysfs_intervals_groups, }; +/* + * access check primitives directory + */ + +struct damon_sysfs_primitives { + struct kobject kobj; + bool page_table; + bool page_fault; +}; + +static struct damon_sysfs_primitives *damon_sysfs_primitives_alloc( + bool page_table, bool page_fault) +{ + struct damon_sysfs_primitives *primitives = kmalloc( + sizeof(*primitives), GFP_KERNEL); + + if (!primitives) + return NULL; + + primitives->kobj = (struct kobject){}; + primitives->page_table = page_table; + primitives->page_fault = page_fault; + return primitives; +} + +static ssize_t page_table_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_primitives *primitives = container_of(kobj, + struct damon_sysfs_primitives, kobj); + + return sysfs_emit(buf, "%c\n", primitives->page_table ? 'Y' : 'N'); +} + +static ssize_t page_table_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_primitives *primitives = container_of(kobj, + struct damon_sysfs_primitives, kobj); + bool enable; + int err = kstrtobool(buf, &enable); + + if (err) + return err; + primitives->page_table = enable; + return count; +} + +static ssize_t page_fault_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_primitives *primitives = container_of(kobj, + struct damon_sysfs_primitives, kobj); + + return sysfs_emit(buf, "%c\n", primitives->page_fault ? 'Y' : 'N'); +} + +static ssize_t page_fault_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_primitives *primitives = container_of(kobj, + struct damon_sysfs_primitives, kobj); + bool enable; + int err = kstrtobool(buf, &enable); + + if (err) + return err; + primitives->page_fault = enable; + return count; +} + +static void damon_sysfs_primitives_release(struct kobject *kobj) +{ + struct damon_sysfs_primitives *primitives = container_of(kobj, + struct damon_sysfs_primitives, kobj); + + kfree(primitives); +} + +static struct kobj_attribute damon_sysfs_primitives_page_table_attr = + __ATTR_RW_MODE(page_table, 0600); + +static struct kobj_attribute damon_sysfs_primitives_page_fault_attr = + __ATTR_RW_MODE(page_fault, 0600); + +static struct attribute *damon_sysfs_primitives_attrs[] = { + &damon_sysfs_primitives_page_table_attr.attr, + &damon_sysfs_primitives_page_fault_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_primitives); + +static const struct kobj_type damon_sysfs_primitives_ktype = { + .release = damon_sysfs_primitives_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_primitives_groups, +}; + /* * sample directory */ struct damon_sysfs_sample { struct kobject kobj; + struct damon_sysfs_primitives *primitives; }; static struct damon_sysfs_sample *damon_sysfs_sample_alloc(void) @@ -768,6 +867,35 @@ static struct damon_sysfs_sample *damon_sysfs_sample_alloc(void) return sample; } +static int damon_sysfs_sample_add_dirs( + struct damon_sysfs_sample *sample) +{ + struct damon_sysfs_primitives *primitives; + int err; + + primitives = damon_sysfs_primitives_alloc(true, false); + if (!primitives) + return -ENOMEM; + err = kobject_init_and_add(&primitives->kobj, + &damon_sysfs_primitives_ktype, &sample->kobj, + "primitives"); + if (err) + goto put_primitives_out; + sample->primitives = primitives; + +put_primitives_out: + kobject_put(&primitives->kobj); + sample->primitives = NULL; + return err; +} + +static void damon_sysfs_sample_rm_dirs( + struct damon_sysfs_sample *sample) +{ + if (sample->primitives) + kobject_put(&sample->primitives->kobj); +} + static void damon_sysfs_sample_release(struct kobject *kobj) { kfree(container_of(kobj, struct damon_sysfs_sample, kobj)); @@ -847,6 +975,9 @@ static int damon_sysfs_attrs_add_dirs(struct damon_sysfs_attrs *attrs) err = kobject_init_and_add(&sample->kobj, &damon_sysfs_sample_ktype, &attrs->kobj, "sample"); + if (err) + goto put_sample_out; + err = damon_sysfs_sample_add_dirs(sample); if (err) goto put_sample_out; attrs->sample = sample; @@ -869,6 +1000,7 @@ static void damon_sysfs_attrs_rm_dirs(struct damon_sysfs_attrs *attrs) kobject_put(&attrs->nr_regions_range->kobj); damon_sysfs_intervals_rm_dirs(attrs->intervals); kobject_put(&attrs->intervals->kobj); + damon_sysfs_sample_rm_dirs(attrs->sample); kobject_put(&attrs->sample->kobj); } -- 2.47.3 The monitoring_attrs/sample/primitives/ directory is not connected with the core layer. Make the connection. Signed-off-by: SeongJae Park --- mm/damon/sysfs.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 14478fd79953..16d58cde03e8 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1640,6 +1640,17 @@ static inline bool damon_sysfs_kdamond_running( damon_is_running(kdamond->damon_ctx); } +static int damon_sysfs_set_sample_control( + struct damon_sample_control *control, + struct damon_sysfs_sample *sysfs_sample) +{ + control->primitives_enabled.page_table = + sysfs_sample->primitives->page_table; + control->primitives_enabled.page_fault = + sysfs_sample->primitives->page_fault; + return 0; +} + static int damon_sysfs_apply_inputs(struct damon_ctx *ctx, struct damon_sysfs_context *sys_ctx) { @@ -1654,6 +1665,10 @@ static int damon_sysfs_apply_inputs(struct damon_ctx *ctx, ctx->min_sz_region = max( DAMON_MIN_REGION / sys_ctx->addr_unit, 1); err = damon_sysfs_set_attrs(ctx, sys_ctx->attrs); + if (err) + return err; + err = damon_sysfs_set_sample_control(&ctx->sample_control, + sys_ctx->attrs->sample); if (err) return err; err = damon_sysfs_add_targets(ctx, sys_ctx->targets); -- 2.47.3 Update DAMON design document for support of page fault events based access check sampling primitives. Signed-off-by: SeongJae Park --- Documentation/mm/damon/design.rst | 33 +++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst index 7fd819b8bbf7..a21973b33bc3 100644 --- a/Documentation/mm/damon/design.rst +++ b/Documentation/mm/damon/design.rst @@ -117,6 +117,7 @@ to make a reasonable trade-off. Below shows this in detail:: +.. _damon_design_pte_accessed_bit_access_check: PTE Accessed-bit Based Access Check ----------------------------------- @@ -135,6 +136,23 @@ the interference is the responsibility of sysadmins. However, it solves the conflict with the reclaim logic using ``PG_idle`` and ``PG_young`` page flags, as Idle page tracking does. +.. _damon_design_page_fault_access_check: + +Monitoring-purpose Page Faults Based Access Check +------------------------------------------------- + +The operation set implementation for the physical address space (``paddr``) can +do the access check using monitoring-purpose page fault events in a way similar +to NUMA balancing hinting faults. When it is being used, ``paddr`` installs +the page protection in a way similar to that of NUMA balancing hinting faults. +Then the page fault handler reports the faults happend by the installed page +protection to DAMON core layer. + +``paddr`` uses :ref:`accessed-bit based check +` by default, and users can change +it to use this page faults based one, using :ref:`access sampling primitives +selection `. + .. _damon_design_addr_unit: Address Unit @@ -244,6 +262,21 @@ maximum number of regions after the split. In this way, DAMON provides its best-effort quality and minimal overhead while keeping the bounds users set for their trade-off. +.. _damon_design_sampling_primitives_selection: + +Access Sampling Primitives Selection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some operations set implementations could utilize multiple access sampling +primitives. For example, the operations set implementation for physical +address space (``paddr``) support two promitives at the moment: 1) page tables' +Accessed bits, and access monitoring-purpose page fault events. In future, it +could be extended for more hardware features such as AMD IBS or CXL HMU, and +support from the operations set implementation for virtual address spaces +(``vaddr``). DAMON API callers or ABI users can select what primitives to use. + +At the moment, only exclusive use of the primitives is supported. + .. _damon_design_age_tracking: -- 2.47.3 Update DAMON usage document for the selective access sampling primitives activation, which is essential for using page fault events as the access check sampling primitive. Signed-off-by: SeongJae Park --- Documentation/admin-guide/mm/damon/usage.rst | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst index 7da4c002cb39..897a89950351 100644 --- a/Documentation/admin-guide/mm/damon/usage.rst +++ b/Documentation/admin-guide/mm/damon/usage.rst @@ -65,6 +65,7 @@ comma (","). │ │ │ │ │ :ref:`monitoring_attrs `/ │ │ │ │ │ │ intervals/sample_us,aggr_us,update_us │ │ │ │ │ │ │ intervals_goal/access_bp,aggrs,min_sample_us,max_sample_us + │ │ │ │ │ │ sample/primitives/page_table,page_faults │ │ │ │ │ │ nr_regions/min,max │ │ │ │ │ :ref:`targets `/nr_targets │ │ │ │ │ │ :ref:`0 `/pid_target,obsolete_target @@ -215,9 +216,9 @@ contexts//monitoring_attrs/ ------------------------------ Files for specifying attributes of the monitoring including required quality -and efficiency of the monitoring are in ``monitoring_attrs`` directory. -Specifically, two directories, ``intervals`` and ``nr_regions`` exist in this -directory. +and efficiency of the monitoring, and types of accesses to monitor are in +``monitoring_attrs`` directory. Specifically, three directories, ``intervals`` +, ``nr_regions`` and ``sample`` exist in this directory. Under ``intervals`` directory, three files for DAMON's sampling interval (``sample_us``), aggregation interval (``aggr_us``), and update interval @@ -232,6 +233,18 @@ writing to and rading from the files. For more details about the intervals and monitoring regions range, please refer to the Design document (:doc:`/mm/damon/design`). +Under ``sample`` directory, a directory, ``primitives`` exists. + +contexts//monitoring_attrs/sample/primitives/ +------------------------------------------------ + +This directory is for the access sampling primitives :ref:`selection +`. Under ``primitives`` directory, +two files, ``page_table`` and ``page_faults`` exist. By writing ``Y`` or ``N`` +to these files, users can select whether to use the :ref:`page table accessed +bit ` and :ref:`page fault events +`, respectively. + .. _damon_usage_sysfs_monitoring_intervals_goal: contexts//monitoring_attrs/intervals/intervals_goal/ -- 2.47.3 Add a field to damon_access_report that can be used to report which CPU has made the reporting access. Signed-off-by: SeongJae Park --- include/linux/damon.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index b04c2e36833a..e23a10ba7c92 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -115,6 +115,7 @@ struct damon_target { * struct damon_access_report - Represent single acces report information. * @addr: The start address of the accessed address range. * @size: The size of the accessed address range. + * @cpu: The id of the CPU that made the access. * * Any DAMON API callers that notified access events can report the information * to DAMON using damon_report_access(). This struct contains the reporting @@ -123,6 +124,7 @@ struct damon_target { struct damon_access_report { unsigned long addr; unsigned long size; + unsigned int cpu; /* private: */ unsigned long report_jiffies; /* when this report is made */ }; -- 2.47.3 The CPU that executes the page fault event reporting function is the CPU that made the access. Report the origin CPU of the access. Signed-off-by: SeongJae Park --- mm/damon/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/damon/core.c b/mm/damon/core.c index a14754a47c7f..d2f842d8afd2 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1585,6 +1585,7 @@ void damon_report_page_fault(struct vm_fault *vmf, bool huge_pmd) { struct damon_access_report access_report = { .size = 1, /* todo: set appripriately */ + .cpu = smp_processor_id(), }; if (huge_pmd) -- 2.47.3 Access information that is reported via damon_report_access() can inform DAMON not only whether there was an access, but also additional things such as the access-generated CPU. By filtering in/out specific types of the reported access based on such additional information, DAMON can support more fine-tuned monitoring, such as per-CPUs access monitoring. Implement a core API data structure for controlling such filtering. Signed-off-by: SeongJae Park --- include/linux/damon.h | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index e23a10ba7c92..78be584ce5dd 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -781,18 +781,53 @@ struct damon_primitives_enabled { bool page_fault; }; +/** + * enum damon_sample_filter_type - Type of &struct damon_sample_filter. + * + * @DAMON_FILTER_TYPE_CPUMASK: Filter by access-generated CPUs. + * + * Read &struct damon_sample_control for more details. + */ +enum damon_sample_filter_type { + DAMON_FILTER_TYPE_CPUMASK, +}; + +/** + * struct damon_sample_filter - &struct damon_access_report filter. + * + * @type: The type of this filter. + * @matching: Whether it is for condition-matching reports. + * @allow: Whether to include or excludie the @matching reports. + * @cpumask: Access-generated CPUs if @type is DAMON_FILTER_TYPE_CPUMASK. + * @list: List head for siblings. + * + * Read &struct damon_sample_control for more details. + */ +struct damon_sample_filter { + enum damon_sample_filter_type type; + bool matching; + bool allow; + cpumask_t cpumask; + struct list_head list; +}; + /** * struct damon_sample_control - Low level access check sampling rules. * * @primitives_enabled: Enablement of access check primitives. + * @sample_filters: List of access check sample filters. * * DAMON collect low level access information using sampling, and aggregate * that to make higher access pattern picture. It can use multiple sampling - * primitives including page table accessed bits and page fault events. This - * struct is for controlling what sampling primitives to use (enable). + * primitives including page table accessed bits and page fault events. It can + * also filter in/out specific types of sampled access events to monitor + * accesses of specific types, such as access-generated CPUs. This struct is + * for controlling what sampling primitives to use (enable), and what sampled + * access events should be filtered in/out. */ struct damon_sample_control { struct damon_primitives_enabled primitives_enabled; + struct list_head sample_filters; }; /** -- 2.47.3 Add basic manipulation helper functions for damon_sample_filter data structure. Signed-off-by: SeongJae Park --- include/linux/damon.h | 15 +++++++++++++++ mm/damon/core.c | 45 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index 78be584ce5dd..aff34dce7c7c 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -947,6 +947,12 @@ static inline unsigned long damon_sz_region(struct damon_region *r) #define damon_for_each_target_safe(t, next, ctx) \ list_for_each_entry_safe(t, next, &(ctx)->adaptive_targets, list) +#define damon_for_each_sample_filter(f, control) \ + list_for_each_entry(f, &(control)->sample_filters, list) + +#define damon_for_each_sample_filter_safe(f, next, control) \ + list_for_each_entry_safe(f, next, &(control)->sample_filters, list) + #define damon_for_each_scheme(s, ctx) \ list_for_each_entry(s, &(ctx)->schemes, list) @@ -1022,6 +1028,15 @@ void damon_free_target(struct damon_target *t); void damon_destroy_target(struct damon_target *t, struct damon_ctx *ctx); unsigned int damon_nr_regions(struct damon_target *t); +struct damon_sample_filter *damon_new_sample_filter( + enum damon_sample_filter_type filter_type, bool matching, + bool allow); +void damon_add_sample_filter(struct damon_sample_control *ctrl, + struct damon_sample_filter *filter); +void damon_free_sample_filter(struct damon_sample_filter *filter); +void damon_destroy_sample_filter(struct damon_sample_filter *filter, + struct damon_sample_control *ctrl); + struct damon_ctx *damon_new_ctx(void); void damon_destroy_ctx(struct damon_ctx *ctx); int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs); diff --git a/mm/damon/core.c b/mm/damon/core.c index d2f842d8afd2..4b98473eef84 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -533,6 +533,46 @@ unsigned int damon_nr_regions(struct damon_target *t) return t->nr_regions; } +struct damon_sample_filter *damon_new_sample_filter( + enum damon_sample_filter_type filter_type, bool matching, + bool allow) +{ + struct damon_sample_filter *filter; + + filter = kmalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return NULL; + filter->type = filter_type; + filter->matching = matching; + filter->allow = allow; + INIT_LIST_HEAD(&filter->list); + return filter; +} + +void damon_add_sample_filter(struct damon_sample_control *ctrl, + struct damon_sample_filter *filter) +{ + list_add_tail(&filter->list, &ctrl->sample_filters); +} + +static void damon_del_sample_filter(struct damon_sample_filter *f, + struct damon_sample_control *ctrl) +{ + list_del(&f->list); +} + +void damon_free_sample_filter(struct damon_sample_filter *f) +{ + kfree(f); +} + +void damon_destroy_sample_filter(struct damon_sample_filter *f, + struct damon_sample_control *ctrl) +{ + damon_del_sample_filter(f, ctrl); + damon_free_sample_filter(f); +} + struct damon_ctx *damon_new_ctx(void) { struct damon_ctx *ctx; @@ -561,6 +601,7 @@ struct damon_ctx *damon_new_ctx(void) ctx->attrs.max_nr_regions = 1000; ctx->sample_control.primitives_enabled.page_table = true; + INIT_LIST_HEAD(&ctx->sample_control.sample_filters); ctx->addr_unit = 1; ctx->min_sz_region = DAMON_MIN_REGION; @@ -582,12 +623,16 @@ static void damon_destroy_targets(struct damon_ctx *ctx) void damon_destroy_ctx(struct damon_ctx *ctx) { struct damos *s, *next_s; + struct damon_sample_filter *f, *next_f; damon_destroy_targets(ctx); damon_for_each_scheme_safe(s, next_s, ctx) damon_destroy_scheme(s); + damon_for_each_sample_filter_safe(f, next_f, &ctx->sample_control) + damon_destroy_sample_filter(f, &ctx->sample_control); + kfree(ctx); } -- 2.47.3 The data structure for access sample results filtering is not supported on the online parameters update function. Hence the feature cannot be used with the online parameters update. Add the support. Signed-off-by: SeongJae Park --- mm/damon/core.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 4b98473eef84..d952a833a05e 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -573,6 +573,19 @@ void damon_destroy_sample_filter(struct damon_sample_filter *f, damon_free_sample_filter(f); } +static struct damon_sample_filter *damon_nth_sample_filter(int n, + struct damon_sample_control *ctrl) +{ + struct damon_sample_filter *f; + int i = 0; + + damon_for_each_sample_filter(f, ctrl) { + if (i++ == n) + return f; + } + return NULL; +} + struct damon_ctx *damon_new_ctx(void) { struct damon_ctx *ctx; @@ -1290,6 +1303,67 @@ static int damon_commit_targets( return 0; } +static int damon_commit_sample_filter_arg(struct damon_sample_filter *dst, + struct damon_sample_filter *src) +{ + switch (src->type) { + case DAMON_FILTER_TYPE_CPUMASK: + dst->cpumask = src->cpumask; + break; + default: + break; + } + return 0; +} + +static int damon_commit_sample_filter(struct damon_sample_filter *dst, + struct damon_sample_filter *src) +{ + int err; + + err = damon_commit_sample_filter_arg(dst, src); + if (err) + return err; + dst->matching = src->matching; + dst->allow = src->allow; + return 0; +} + +static int damon_commit_sample_filters(struct damon_sample_control *dst, + struct damon_sample_control *src) +{ + struct damon_sample_filter *dst_filter, *next, *src_filter, *new_filter; + int i = 0, j = 0, err; + + damon_for_each_sample_filter_safe(dst_filter, next, dst) { + src_filter = damon_nth_sample_filter(i++, src); + if (src_filter) { + err = damon_commit_sample_filter(dst_filter, + src_filter); + if (err) + return err; + } else { + damon_destroy_sample_filter(dst_filter, dst); + } + } + + damon_for_each_sample_filter_safe(src_filter, next, src) { + if (j++ < i) + continue; + + new_filter = damon_new_sample_filter( + src_filter->type, src_filter->matching, + src_filter->allow); + if (!new_filter) + return -ENOMEM; + err = damon_commit_sample_filter_arg(new_filter, src_filter); + if (err) + return err; + damon_add_sample_filter(dst, new_filter); + } + return 0; +} + static bool damon_primitives_enabled_invalid( struct damon_primitives_enabled *config) { @@ -1304,7 +1378,7 @@ static int damon_commit_sample_control( return -EINVAL; dst->primitives_enabled = src->primitives_enabled; - return 0; + return damon_commit_sample_filters(dst, src); } /** -- 2.47.3 The data structure for access sample results filtering is not really being used on the core layer. Implement the support. Signed-off-by: SeongJae Park --- mm/damon/core.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/mm/damon/core.c b/mm/damon/core.c index d952a833a05e..b627fc84161c 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -586,6 +586,13 @@ static struct damon_sample_filter *damon_nth_sample_filter(int n, return NULL; } +static struct damon_sample_filter *damon_last_sample_filter_or_null( + struct damon_sample_control *ctrl) +{ + return list_last_entry_or_null(&ctrl->sample_filters, + struct damon_sample_filter, list); +} + struct damon_ctx *damon_new_ctx(void) { struct damon_ctx *ctx; @@ -2866,11 +2873,45 @@ static void kdamond_init_ctx(struct damon_ctx *ctx) } } +static bool damon_sample_filter_matching(struct damon_access_report *report, + struct damon_sample_filter *filter) +{ + bool matched = false; + + switch (filter->type) { + case DAMON_FILTER_TYPE_CPUMASK: + matched = cpumask_test_cpu(report->cpu, &filter->cpumask); + break; + default: + break; + } + return matched == filter->matching; +} + +static bool damon_sample_filter_out(struct damon_access_report *report, + struct damon_sample_control *ctrl) +{ + struct damon_sample_filter *filter; + + damon_for_each_sample_filter(filter, ctrl) { + if (damon_sample_filter_matching(report, filter) && + !filter->allow) + return true; + } + filter = damon_last_sample_filter_or_null(ctrl); + if (!filter) + return false; + return !filter->allow; +} + static void kdamond_apply_access_report(struct damon_access_report *report, struct damon_target *t, struct damon_ctx *ctx) { struct damon_region *r; + if (damon_sample_filter_out(report, &ctx->sample_control)) + return; + /* todo: make search faster, e.g., binary search? */ damon_for_each_region(r, t) { if (report->addr < r->ar.start) -- 2.47.3 Add a directory for letting DAMON ABI users install and uninstall DAMON sample filters. Signed-off-by: SeongJae Park --- mm/damon/sysfs.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 16d58cde03e8..4208fed2b8df 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -749,6 +749,36 @@ static const struct kobj_type damon_sysfs_intervals_ktype = { .default_groups = damon_sysfs_intervals_groups, }; +/* + * access check report filters directory + */ + +struct damon_sysfs_sample_filters { + struct kobject kobj; +}; + +static struct damon_sysfs_sample_filters * +damon_sysfs_sample_filters_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_sample_filters), GFP_KERNEL); +} + +static void damon_sysfs_sample_filters_release(struct kobject *kobj) +{ + kfree(container_of(kobj, struct damon_sysfs_sample_filters, kobj)); +} + +static struct attribute *damon_sysfs_sample_filters_attrs[] = { + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_sample_filters); + +static const struct kobj_type damon_sysfs_sample_filters_ktype = { + .release = damon_sysfs_sample_filters_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_sample_filters_groups, +}; + /* * access check primitives directory */ @@ -854,6 +884,7 @@ static const struct kobj_type damon_sysfs_primitives_ktype = { struct damon_sysfs_sample { struct kobject kobj; struct damon_sysfs_primitives *primitives; + struct damon_sysfs_sample_filters *filters; }; static struct damon_sysfs_sample *damon_sysfs_sample_alloc(void) @@ -871,6 +902,7 @@ static int damon_sysfs_sample_add_dirs( struct damon_sysfs_sample *sample) { struct damon_sysfs_primitives *primitives; + struct damon_sysfs_sample_filters *filters; int err; primitives = damon_sysfs_primitives_alloc(true, false); @@ -883,6 +915,19 @@ static int damon_sysfs_sample_add_dirs( goto put_primitives_out; sample->primitives = primitives; + filters = damon_sysfs_sample_filters_alloc(); + if (!filters) + return -ENOMEM; + err = kobject_init_and_add(&filters->kobj, + &damon_sysfs_sample_filters_ktype, &sample->kobj, + "filters"); + if (err) + goto put_filters_out; + sample->filters = filters; + return 0; +put_filters_out: + kobject_put(&filters->kobj); + sample->filters = NULL; put_primitives_out: kobject_put(&primitives->kobj); sample->primitives = NULL; @@ -894,6 +939,9 @@ static void damon_sysfs_sample_rm_dirs( { if (sample->primitives) kobject_put(&sample->primitives->kobj); + if (sample->filters) { + kobject_put(&sample->filters->kobj); + } } static void damon_sysfs_sample_release(struct kobject *kobj) -- 2.47.3 Implement sysfs directory for setting individual DAMON sample filters. Signed-off-by: SeongJae Park --- mm/damon/sysfs.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 4208fed2b8df..ba0c76c1300c 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -749,12 +749,46 @@ static const struct kobj_type damon_sysfs_intervals_ktype = { .default_groups = damon_sysfs_intervals_groups, }; +/* + * access check report filter directory + */ + +struct damon_sysfs_sample_filter { + struct kobject kobj; +}; + +static struct damon_sysfs_sample_filter *damon_sysfs_sample_filter_alloc(void) +{ + return kzalloc(sizeof(struct damon_sysfs_sample_filter), GFP_KERNEL); +} + +static void damon_sysfs_sample_filter_release(struct kobject *kobj) +{ + struct damon_sysfs_sample_filter *filter = container_of(kobj, + struct damon_sysfs_sample_filter, kobj); + + kfree(filter); +} + +static struct attribute *damon_sysfs_sample_filter_attrs[] = { + NULL, +}; +ATTRIBUTE_GROUPS(damon_sysfs_sample_filter); + +static const struct kobj_type damon_sysfs_sample_filter_ktype = { + .release = damon_sysfs_sample_filter_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = damon_sysfs_sample_filter_groups, +}; + /* * access check report filters directory */ struct damon_sysfs_sample_filters { struct kobject kobj; + struct damon_sysfs_sample_filter **filters_arr; + int nr; }; static struct damon_sysfs_sample_filters * @@ -763,12 +797,99 @@ damon_sysfs_sample_filters_alloc(void) return kzalloc(sizeof(struct damon_sysfs_sample_filters), GFP_KERNEL); } +static void damon_sysfs_sample_filters_rm_dirs( + struct damon_sysfs_sample_filters *filters) +{ + struct damon_sysfs_sample_filter **filters_arr = filters->filters_arr; + int i; + + for (i = 0; i < filters->nr; i++) + kobject_put(&filters_arr[i]->kobj); + filters->nr = 0; + kfree(filters_arr); + filters->filters_arr = NULL; +} + +static int damon_sysfs_sample_filters_add_dirs( + struct damon_sysfs_sample_filters *filters, int nr_filters) +{ + struct damon_sysfs_sample_filter **filters_arr, *filter; + int err, i; + + damon_sysfs_sample_filters_rm_dirs(filters); + if (!nr_filters) + return 0; + + filters_arr = kmalloc_array(nr_filters, sizeof(*filters_arr), + GFP_KERNEL | __GFP_NOWARN); + if (!filters_arr) + return -ENOMEM; + filters->filters_arr = filters_arr; + + for (i = 0; i < nr_filters; i++) { + filter = damon_sysfs_sample_filter_alloc(); + if (!filter) { + damon_sysfs_sample_filters_rm_dirs(filters); + return -ENOMEM; + } + + err = kobject_init_and_add(&filter->kobj, + &damon_sysfs_sample_filter_ktype, + &filters->kobj, "%d", i); + if (err) { + kobject_put(&filter->kobj); + damon_sysfs_sample_filters_rm_dirs(filters); + return err; + } + + filters_arr[i] = filter; + filters->nr++; + } + return 0; +} + +static ssize_t nr_filters_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_sample_filters *filters = container_of(kobj, + struct damon_sysfs_sample_filters, kobj); + + return sysfs_emit(buf, "%d\n", filters->nr); +} + +static ssize_t nr_filters_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_sample_filters *filters; + int nr, err = kstrtoint(buf, 0, &nr); + + if (err) + return err; + if (nr < 0) + return -EINVAL; + + filters = container_of(kobj, struct damon_sysfs_sample_filters, kobj); + + if (!mutex_trylock(&damon_sysfs_lock)) + return -EBUSY; + err = damon_sysfs_sample_filters_add_dirs(filters, nr); + mutex_unlock(&damon_sysfs_lock); + if (err) + return err; + + return count; +} + static void damon_sysfs_sample_filters_release(struct kobject *kobj) { kfree(container_of(kobj, struct damon_sysfs_sample_filters, kobj)); } +static struct kobj_attribute damon_sysfs_sample_filters_nr_attr = + __ATTR_RW_MODE(nr_filters, 0600); + static struct attribute *damon_sysfs_sample_filters_attrs[] = { + &damon_sysfs_sample_filters_nr_attr.attr, NULL, }; ATTRIBUTE_GROUPS(damon_sysfs_sample_filters); @@ -940,6 +1061,7 @@ static void damon_sysfs_sample_rm_dirs( if (sample->primitives) kobject_put(&sample->primitives->kobj); if (sample->filters) { + damon_sysfs_sample_filters_rm_dirs(sample->filters); kobject_put(&sample->filters->kobj); } } -- 2.47.3 The sample filter directory is not really having the file to set up for the properties of the representing DAMON sample filter. Implement files for setting the properties. Signed-off-by: SeongJae Park --- mm/damon/sysfs.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index ba0c76c1300c..74594e6e461c 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -755,6 +755,9 @@ static const struct kobj_type damon_sysfs_intervals_ktype = { struct damon_sysfs_sample_filter { struct kobject kobj; + enum damon_sample_filter_type type; + bool matching; + bool allow; }; static struct damon_sysfs_sample_filter *damon_sysfs_sample_filter_alloc(void) @@ -762,6 +765,92 @@ static struct damon_sysfs_sample_filter *damon_sysfs_sample_filter_alloc(void) return kzalloc(sizeof(struct damon_sysfs_sample_filter), GFP_KERNEL); } +static ssize_t type_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_sample_filter *filter = container_of(kobj, + struct damon_sysfs_sample_filter, kobj); + int i = 0; + + for (; i < ARRAY_SIZE(damon_sysfs_sample_filter_type_names); i++) { + const struct damon_sysfs_sample_filter_type_name *type_name; + + type_name = &damon_sysfs_sample_filter_type_names[i]; + if (type_name->type == filter->type) + return sysfs_emit(buf, "%s\n", type_name->name); + } + return -EINVAL; +} + +static ssize_t type_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_sample_filter *filter = container_of(kobj, + struct damon_sysfs_sample_filter, kobj); + ssize_t ret = -EINVAL; + int i = 0; + + for (; i < ARRAY_SIZE(damon_sysfs_sample_filter_type_names); i++) { + const struct damon_sysfs_sample_filter_type_name *type_name; + + type_name = &damon_sysfs_sample_filter_type_names[i]; + if (sysfs_streq(buf, type_name->name)) { + filter->type = type_name->type; + ret = count; + break; + } + } + return ret; +} + +static ssize_t matching_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_sample_filter *filter = container_of(kobj, + struct damon_sysfs_sample_filter, kobj); + + return sysfs_emit(buf, "%c\n", filter->matching ? 'Y' : 'N'); +} + +static ssize_t matching_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_sample_filter *filter = container_of(kobj, + struct damon_sysfs_sample_filter, kobj); + bool matching; + int err = kstrtobool(buf, &matching); + + if (err) + return err; + + filter->matching = matching; + return count; +} + +static ssize_t allow_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct damon_sysfs_sample_filter *filter = container_of(kobj, + struct damon_sysfs_sample_filter, kobj); + + return sysfs_emit(buf, "%c\n", filter->allow ? 'Y' : 'N'); +} + +static ssize_t allow_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + struct damon_sysfs_sample_filter *filter = container_of(kobj, + struct damon_sysfs_sample_filter, kobj); + bool allow; + int err = kstrtobool(buf, &allow); + + if (err) + return err; + + filter->allow = allow; + return count; +} + static void damon_sysfs_sample_filter_release(struct kobject *kobj) { struct damon_sysfs_sample_filter *filter = container_of(kobj, @@ -770,7 +859,19 @@ static void damon_sysfs_sample_filter_release(struct kobject *kobj) kfree(filter); } +static struct kobj_attribute damon_sysfs_sample_filter_type_attr = + __ATTR_RW_MODE(type, 0600); + +static struct kobj_attribute damon_sysfs_sample_filter_matching_attr = + __ATTR_RW_MODE(matching, 0600); + +static struct kobj_attribute damon_sysfs_sample_filter_allow_attr = + __ATTR_RW_MODE(allow, 0600); + static struct attribute *damon_sysfs_sample_filter_attrs[] = { + &damon_sysfs_sample_filter_type_attr.attr, + &damon_sysfs_sample_filter_matching_attr.attr, + &damon_sysfs_sample_filter_allow_attr.attr, NULL, }; ATTRIBUTE_GROUPS(damon_sysfs_sample_filter); -- 2.47.3 The DAON sample filter directory has files for only common properties. Add a file to further specify the CPUs to filter in or out. Signed-off-by: SeongJae Park --- mm/damon/sysfs.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 74594e6e461c..d6b0b6513fd1 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -758,6 +758,7 @@ struct damon_sysfs_sample_filter { enum damon_sample_filter_type type; bool matching; bool allow; + cpumask_t cpumask; }; static struct damon_sysfs_sample_filter *damon_sysfs_sample_filter_alloc(void) @@ -765,6 +766,19 @@ static struct damon_sysfs_sample_filter *damon_sysfs_sample_filter_alloc(void) return kzalloc(sizeof(struct damon_sysfs_sample_filter), GFP_KERNEL); } +struct damon_sysfs_sample_filter_type_name { + enum damon_sample_filter_type type; + char *name; +}; + +static const struct damon_sysfs_sample_filter_type_name +damon_sysfs_sample_filter_type_names[] = { + { + .type = DAMON_FILTER_TYPE_CPUMASK, + .name = "cpumask", + }, +}; + static ssize_t type_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -851,6 +865,29 @@ static ssize_t allow_store(struct kobject *kobj, return count; } +static ssize_t cpumask_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_sample_filter *filter = container_of(kobj, + struct damon_sysfs_sample_filter, kobj); + + return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(&filter->cpumask)); +} + +static ssize_t cpumask_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_sample_filter *filter = container_of(kobj, + struct damon_sysfs_sample_filter, kobj); + cpumask_t cpumask; + int err = cpulist_parse(buf, &cpumask); + + if (err) + return err; + filter->cpumask = cpumask; + return count; +} + static void damon_sysfs_sample_filter_release(struct kobject *kobj) { struct damon_sysfs_sample_filter *filter = container_of(kobj, @@ -868,10 +905,14 @@ static struct kobj_attribute damon_sysfs_sample_filter_matching_attr = static struct kobj_attribute damon_sysfs_sample_filter_allow_attr = __ATTR_RW_MODE(allow, 0600); +static struct kobj_attribute damon_sysfs_sample_filter_cpumask_attr = + __ATTR_RW_MODE(cpumask, 0600); + static struct attribute *damon_sysfs_sample_filter_attrs[] = { &damon_sysfs_sample_filter_type_attr.attr, &damon_sysfs_sample_filter_matching_attr.attr, &damon_sysfs_sample_filter_allow_attr.attr, + &damon_sysfs_sample_filter_cpumask_attr.attr, NULL, }; ATTRIBUTE_GROUPS(damon_sysfs_sample_filter); -- 2.47.3 Only basic file operations are implemented for the DAMON sample filters sysfs directory. The user inputs are not really passed to the core layer. Make the connection. Signed-off-by: SeongJae Park --- mm/damon/sysfs.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index d6b0b6513fd1..3aac2aea6b0c 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1952,6 +1952,35 @@ static inline bool damon_sysfs_kdamond_running( damon_is_running(kdamond->damon_ctx); } +static int damon_sysfs_set_sample_filters( + struct damon_sample_control *control, + struct damon_sysfs_sample_filters *sysfs_filters) +{ + int i, err; + + for (i = 0; i < sysfs_filters->nr; i++) { + struct damon_sysfs_sample_filter *sysfs_filter = + sysfs_filters->filters_arr[i]; + struct damon_sample_filter *filter; + + filter = damon_new_sample_filter( + sysfs_filter->type, sysfs_filter->matching, + sysfs_filter->allow); + if (!filter) + return -ENOMEM; + switch (filter->type) { + case DAMON_FILTER_TYPE_CPUMASK: + filter->cpumask = sysfs_filter->cpumask; + break; + default: + break; + } + damon_add_sample_filter(control, filter); + } + return 0; +} + + static int damon_sysfs_set_sample_control( struct damon_sample_control *control, struct damon_sysfs_sample *sysfs_sample) @@ -1960,7 +1989,9 @@ static int damon_sysfs_set_sample_control( sysfs_sample->primitives->page_table; control->primitives_enabled.page_fault = sysfs_sample->primitives->page_fault; - return 0; + + return damon_sysfs_set_sample_filters(control, + sysfs_sample->filters); } static int damon_sysfs_apply_inputs(struct damon_ctx *ctx, -- 2.47.3 Update DAMON design document for the access sample results filtering. Signed-off-by: SeongJae Park --- Documentation/mm/damon/design.rst | 42 +++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst index a21973b33bc3..9a4679de437f 100644 --- a/Documentation/mm/damon/design.rst +++ b/Documentation/mm/damon/design.rst @@ -146,12 +146,17 @@ do the access check using monitoring-purpose page fault events in a way similar to NUMA balancing hinting faults. When it is being used, ``paddr`` installs the page protection in a way similar to that of NUMA balancing hinting faults. Then the page fault handler reports the faults happend by the installed page -protection to DAMON core layer. +protection to DAMON core layer. Compared to the accessed-bit based one, this +approach gives more information to DAMON, such as the access-generated +CPU. ``paddr`` uses :ref:`accessed-bit based check ` by default, and users can change it to use this page faults based one, using :ref:`access sampling primitives -selection `. +selection `. Also, the addtional +information can be used for doing monitoring of only specific type accesses, +using :ref:`access sampling results filters +`. .. _damon_design_addr_unit: @@ -277,6 +282,39 @@ support from the operations set implementation for virtual address spaces At the moment, only exclusive use of the primitives is supported. +.. _damon_design_sampling_results_filters: + +Access Sampling Results Filter +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Depending on the access sampling primitives that used, the sampling results +could inform more than whether an access is made, such as the CPU or the thread +the sampled access was made from, and whether the sampled access was for +writing or reading. By filtering the results based on the additional +information, DAMON can perform for more detailed access monitoring, such as +per-CPUs/threads or read/write-only monitoring. + +For such special types of monitoring, DAMON provides a feature called "sampling +results filter". The feature allows users to set an arbitrary number of +filters for the sampling results. Each of the filters specifies + +- a type of the additional information (``type``), +- whether it is for the sampling results of the type or all except the type + (``matching``), and +- whether it is to allow (include) or reject (exclude) consuming of the type of + sampling results. + +Each filter is applied in the order of the installation. Only sampling results +that allowed by previous filters are continue to be evaluated by the next +filters. If a sampling result is decided to be rejected by a filter, it is +just discarded and no later filter is applied. + +Below ``type`` of access sampling results filter are currently supported. + +- cpumask + - Whether the sampled access was generated by a cpu that included in a + given cpumask. + .. _damon_design_age_tracking: -- 2.47.3 Update DAMON usage document for the added DAMON sysfs files for installing and setting the access sample results filtering. Signed-off-by: SeongJae Park --- Documentation/admin-guide/mm/damon/usage.rst | 25 +++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst index 897a89950351..365a4548a5ba 100644 --- a/Documentation/admin-guide/mm/damon/usage.rst +++ b/Documentation/admin-guide/mm/damon/usage.rst @@ -66,6 +66,8 @@ comma (","). │ │ │ │ │ │ intervals/sample_us,aggr_us,update_us │ │ │ │ │ │ │ intervals_goal/access_bp,aggrs,min_sample_us,max_sample_us │ │ │ │ │ │ sample/primitives/page_table,page_faults + │ │ │ │ │ │ sample/filters/nr_filters + │ │ │ │ │ │ │ 0/type,matching,allow,cpumask │ │ │ │ │ │ nr_regions/min,max │ │ │ │ │ :ref:`targets `/nr_targets │ │ │ │ │ │ :ref:`0 `/pid_target,obsolete_target @@ -233,7 +235,8 @@ writing to and rading from the files. For more details about the intervals and monitoring regions range, please refer to the Design document (:doc:`/mm/damon/design`). -Under ``sample`` directory, a directory, ``primitives`` exists. +Under ``sample`` directory, two directories, ``primitives`` and ``filters`` +exist. contexts//monitoring_attrs/sample/primitives/ ------------------------------------------------ @@ -245,6 +248,26 @@ to these files, users can select whether to use the :ref:`page table accessed bit ` and :ref:`page fault events `, respectively. +contexts//monitoring_attrs/sample/filters/ +--------------------------------------------- + +This directory is for control of the :ref:`access sampling results filters +`. At the beginning, this directory has +only one file, ``nr_filters``. Writing a positive integer ```` to the file +generates directories of the number, named ``0`` to ````. The generated +directories represent the sampling result filters to install to the DAMON +context, in the order of the directory names. Writing ``0`` to ``nr_filters`` +removes the directories. + +Under the individual filter directory, five files, ``type``, ``matching``, +``allow``, and ``cpumask`` exist. The first three files are same to the +properties of the filter specified on the :ref:`design doc +`. + +Users can specify the cpumask of the filter by writing it to ``cpumask``. The +format for ``cpumask`` input is same to that for other cpumask inputs like that +for cgroup cpusets. + .. _damon_usage_sysfs_monitoring_intervals_goal: contexts//monitoring_attrs/intervals/intervals_goal/ -- 2.47.3 The access reporter might be able to know which thread has made the reporting access. Extend the report data structure for the information. Signed-off-by: SeongJae Park --- include/linux/damon.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index aff34dce7c7c..80332bb2b73c 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -116,6 +116,7 @@ struct damon_target { * @addr: The start address of the accessed address range. * @size: The size of the accessed address range. * @cpu: The id of the CPU that made the access. + * @tid: The task id of the task that made the access. * * Any DAMON API callers that notified access events can report the information * to DAMON using damon_report_access(). This struct contains the reporting @@ -125,6 +126,7 @@ struct damon_access_report { unsigned long addr; unsigned long size; unsigned int cpu; + pid_t tid; /* private: */ unsigned long report_jiffies; /* when this report is made */ }; -- 2.47.3 Page fault event reporting function can know which thread is making the access. Add the information to the report. Signed-off-by: SeongJae Park --- mm/damon/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/damon/core.c b/mm/damon/core.c index b627fc84161c..4971647d4b5e 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1712,6 +1712,7 @@ void damon_report_page_fault(struct vm_fault *vmf, bool huge_pmd) struct damon_access_report access_report = { .size = 1, /* todo: set appripriately */ .cpu = smp_processor_id(), + .tid = task_pid_vnr(current), }; if (huge_pmd) -- 2.47.3 Now DAMON access reports could inform which thread was making the reporting access. By filtering the reports based on the threads, DAMON can do per-threads monitoring. Extend DAMON access sample filter to do such filtering. Signed-off-by: SeongJae Park --- include/linux/damon.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index 80332bb2b73c..e3408280ea72 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -787,11 +787,13 @@ struct damon_primitives_enabled { * enum damon_sample_filter_type - Type of &struct damon_sample_filter. * * @DAMON_FILTER_TYPE_CPUMASK: Filter by access-generated CPUs. + * @DAMON_FILTER_TYPE_THREADS: Filter by access-generated threads. * * Read &struct damon_sample_control for more details. */ enum damon_sample_filter_type { DAMON_FILTER_TYPE_CPUMASK, + DAMON_FILTER_TYPE_THREADS, }; /** @@ -801,6 +803,9 @@ enum damon_sample_filter_type { * @matching: Whether it is for condition-matching reports. * @allow: Whether to include or excludie the @matching reports. * @cpumask: Access-generated CPUs if @type is DAMON_FILTER_TYPE_CPUMASK. + * @tid_arr: Array of access-generated thread ids, if @type is + * DAMON_FILTER_TYPE_THREADS. + * @nr_tids: Size of @tid_arr, if @type is DAMON_FILTER_TYPE_THREADS. * @list: List head for siblings. * * Read &struct damon_sample_control for more details. @@ -809,7 +814,13 @@ struct damon_sample_filter { enum damon_sample_filter_type type; bool matching; bool allow; - cpumask_t cpumask; + union { + cpumask_t cpumask; + struct { + pid_t *tid_arr; + int nr_tids; + }; + }; struct list_head list; }; @@ -823,9 +834,9 @@ struct damon_sample_filter { * that to make higher access pattern picture. It can use multiple sampling * primitives including page table accessed bits and page fault events. It can * also filter in/out specific types of sampled access events to monitor - * accesses of specific types, such as access-generated CPUs. This struct is - * for controlling what sampling primitives to use (enable), and what sampled - * access events should be filtered in/out. + * accesses of specific types, such as access-generated CPUs and threads. This + * struct is for controlling what sampling primitives to use (enable), and what + * sampled access events should be filtered in/out. */ struct damon_sample_control { struct damon_primitives_enabled primitives_enabled; -- 2.47.3 Access-generated threads based access sample filter type is not really being respected on the core layer. Implement the support for doing the filtering, and committing the information when doing the online parameters update. Signed-off-by: SeongJae Park --- mm/damon/core.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/mm/damon/core.c b/mm/damon/core.c index 4971647d4b5e..782af39ef0c0 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -546,6 +546,10 @@ struct damon_sample_filter *damon_new_sample_filter( filter->matching = matching; filter->allow = allow; INIT_LIST_HEAD(&filter->list); + if (filter_type == DAMON_FILTER_TYPE_THREADS) { + filter->tid_arr = NULL; + filter->nr_tids = 0; + } return filter; } @@ -570,6 +574,10 @@ void damon_destroy_sample_filter(struct damon_sample_filter *f, struct damon_sample_control *ctrl) { damon_del_sample_filter(f, ctrl); + if (f->type == DAMON_FILTER_TYPE_THREADS) { + kfree(f->tid_arr); + f->nr_tids = 0; + } damon_free_sample_filter(f); } @@ -1317,6 +1325,17 @@ static int damon_commit_sample_filter_arg(struct damon_sample_filter *dst, case DAMON_FILTER_TYPE_CPUMASK: dst->cpumask = src->cpumask; break; + case DAMON_FILTER_TYPE_THREADS: + if (dst->type == DAMON_FILTER_TYPE_THREADS) + kfree(dst->tid_arr); + dst->tid_arr = kmalloc_array(src->nr_tids, + sizeof(*dst->tid_arr), GFP_KERNEL); + if (!dst->tid_arr) + return -ENOMEM; + memcpy(dst->tid_arr, src->tid_arr, sizeof(*dst->tid_arr) * + src->nr_tids); + dst->nr_tids = src->nr_tids; + break; default: break; } @@ -2878,11 +2897,20 @@ static bool damon_sample_filter_matching(struct damon_access_report *report, struct damon_sample_filter *filter) { bool matched = false; + int i; switch (filter->type) { case DAMON_FILTER_TYPE_CPUMASK: matched = cpumask_test_cpu(report->cpu, &filter->cpumask); break; + case DAMON_FILTER_TYPE_THREADS: + for (i = 0; i < filter->nr_tids; i++) { + if (report->tid != filter->tid_arr[i]) + continue; + matched = true; + break; + } + break; default: break; } -- 2.47.3 Add support of the threads based access sample filtering on DAMON sysfs interface. For this, add a new file for setting the threads of interest of the filter, and pass it to the core layer. Signed-off-by: SeongJae Park --- mm/damon/sysfs.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 3aac2aea6b0c..b96df2d2d17e 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -759,6 +759,7 @@ struct damon_sysfs_sample_filter { bool matching; bool allow; cpumask_t cpumask; + int *tid_arr; /* first entry is the length of the array */ }; static struct damon_sysfs_sample_filter *damon_sysfs_sample_filter_alloc(void) @@ -777,6 +778,10 @@ damon_sysfs_sample_filter_type_names[] = { .type = DAMON_FILTER_TYPE_CPUMASK, .name = "cpumask", }, + { + .type = DAMON_FILTER_TYPE_THREADS, + .name = "threads", + }, }; static ssize_t type_show(struct kobject *kobj, @@ -888,6 +893,47 @@ static ssize_t cpumask_store(struct kobject *kobj, struct kobj_attribute *attr, return count; } +static ssize_t tid_arr_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct damon_sysfs_sample_filter *sample_filter = container_of(kobj, + struct damon_sysfs_sample_filter, kobj); + char *str; + int nr_tids, *tid_arr; + int i, ret; + + if (!sample_filter->tid_arr) + return sysfs_emit(buf, "\n"); + + str = kcalloc(2048, sizeof(*str), GFP_KERNEL); + if (!str) + return -ENOMEM; + nr_tids = sample_filter->tid_arr[0]; + tid_arr = &sample_filter->tid_arr[1]; + for (i = 0; i < nr_tids; i++) { + snprintf(&str[strlen(str)], 2048 - strlen(str), "%d", + tid_arr[i]); + if (i < nr_tids - 1) + snprintf(&str[strlen(str)], 2048 - strlen(str), ","); + } + ret = sysfs_emit(buf, "%s\n", str); + kfree(str); + return ret; +} + +static ssize_t tid_arr_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct damon_sysfs_sample_filter *sample_filter = container_of(kobj, + struct damon_sysfs_sample_filter, kobj); + int err; + + err = parse_int_array(buf, count, &sample_filter->tid_arr); + if (err) + return err; + return count; +} + static void damon_sysfs_sample_filter_release(struct kobject *kobj) { struct damon_sysfs_sample_filter *filter = container_of(kobj, @@ -908,11 +954,15 @@ static struct kobj_attribute damon_sysfs_sample_filter_allow_attr = static struct kobj_attribute damon_sysfs_sample_filter_cpumask_attr = __ATTR_RW_MODE(cpumask, 0600); +static struct kobj_attribute damon_sysfs_sample_filter_tid_arr_attr = + __ATTR_RW_MODE(tid_arr, 0600); + static struct attribute *damon_sysfs_sample_filter_attrs[] = { &damon_sysfs_sample_filter_type_attr.attr, &damon_sysfs_sample_filter_matching_attr.attr, &damon_sysfs_sample_filter_allow_attr.attr, &damon_sysfs_sample_filter_cpumask_attr.attr, + &damon_sysfs_sample_filter_tid_arr_attr.attr, NULL, }; ATTRIBUTE_GROUPS(damon_sysfs_sample_filter); @@ -1952,6 +2002,25 @@ static inline bool damon_sysfs_kdamond_running( damon_is_running(kdamond->damon_ctx); } +static int damon_sysfs_set_threads_filter(struct damon_sample_filter *filter, + int *sysfs_tid_arr) +{ + int nr_tids, i; + pid_t *tid_arr; + + if (!sysfs_tid_arr) + return -EINVAL; + nr_tids = sysfs_tid_arr[0]; + tid_arr = kmalloc_array(nr_tids, sizeof(*tid_arr), GFP_KERNEL); + if (!tid_arr) + return -ENOMEM; + for (i = 0; i < nr_tids; i++) + tid_arr[i] = sysfs_tid_arr[i + 1]; + filter->tid_arr = tid_arr; + filter->nr_tids = nr_tids; + return 0; +} + static int damon_sysfs_set_sample_filters( struct damon_sample_control *control, struct damon_sysfs_sample_filters *sysfs_filters) @@ -1972,6 +2041,12 @@ static int damon_sysfs_set_sample_filters( case DAMON_FILTER_TYPE_CPUMASK: filter->cpumask = sysfs_filter->cpumask; break; + case DAMON_FILTER_TYPE_THREADS: + err = damon_sysfs_set_threads_filter(filter, + sysfs_filter->tid_arr); + if (err) + damon_free_sample_filter(filter); + break; default: break; } -- 2.47.3 Update DAMON design document for threads type access sample results filtering. Signed-off-by: SeongJae Park --- Documentation/mm/damon/design.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst index 9a4679de437f..4c6d83263a7b 100644 --- a/Documentation/mm/damon/design.rst +++ b/Documentation/mm/damon/design.rst @@ -148,7 +148,7 @@ the page protection in a way similar to that of NUMA balancing hinting faults. Then the page fault handler reports the faults happend by the installed page protection to DAMON core layer. Compared to the accessed-bit based one, this approach gives more information to DAMON, such as the access-generated -CPU. +CPU/threads. ``paddr`` uses :ref:`accessed-bit based check ` by default, and users can change @@ -314,6 +314,9 @@ Below ``type`` of access sampling results filter are currently supported. - cpumask - Whether the sampled access was generated by a cpu that included in a given cpumask. +- threads + - Whether the sampled access was generated by a thread that having a thread + (task) id that exist in a given array of ids. .. _damon_design_age_tracking: -- 2.47.3 Update DAMON usage document for the threads based access sampling results filtering. Signed-off-by: SeongJae Park --- Documentation/admin-guide/mm/damon/usage.rst | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst index 365a4548a5ba..d22d80710355 100644 --- a/Documentation/admin-guide/mm/damon/usage.rst +++ b/Documentation/admin-guide/mm/damon/usage.rst @@ -67,7 +67,7 @@ comma (","). │ │ │ │ │ │ │ intervals_goal/access_bp,aggrs,min_sample_us,max_sample_us │ │ │ │ │ │ sample/primitives/page_table,page_faults │ │ │ │ │ │ sample/filters/nr_filters - │ │ │ │ │ │ │ 0/type,matching,allow,cpumask + │ │ │ │ │ │ │ 0/type,matching,allow,cpumask,tid_arr │ │ │ │ │ │ nr_regions/min,max │ │ │ │ │ :ref:`targets `/nr_targets │ │ │ │ │ │ :ref:`0 `/pid_target,obsolete_target @@ -260,13 +260,14 @@ context, in the order of the directory names. Writing ``0`` to ``nr_filters`` removes the directories. Under the individual filter directory, five files, ``type``, ``matching``, -``allow``, and ``cpumask`` exist. The first three files are same to the -properties of the filter specified on the :ref:`design doc +``allow``, ``cpumask`` and ``tid_arr`` exist. The first three files are same +to the properties of the filter specified on the :ref:`design doc `. -Users can specify the cpumask of the filter by writing it to ``cpumask``. The -format for ``cpumask`` input is same to that for other cpumask inputs like that -for cgroup cpusets. +Users can specify the cpumask and thread id array of the filter by writing +those to ``cpumask`` and ``tid_arr``. The format for ``cpumask`` input is same +to that for other cpumask inputs like that for cgroup cpusets. For +``tid_arr``, users can write array of the thread ids, separated by a comma. .. _damon_usage_sysfs_monitoring_intervals_goal: -- 2.47.3 damon_report_access() callers could know whether the reporting access was for reading or writing. The information can be used for fine-grained monitoring, such as monitoring only read-only accesses or write-only accesses. Extend the reporting data structure so that the caller can pass the information. Signed-off-by: SeongJae Park --- include/linux/damon.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/damon.h b/include/linux/damon.h index e3408280ea72..9299fc91ba27 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -117,6 +117,7 @@ struct damon_target { * @size: The size of the accessed address range. * @cpu: The id of the CPU that made the access. * @tid: The task id of the task that made the access. + * @is_write: Whether the access is write. * * Any DAMON API callers that notified access events can report the information * to DAMON using damon_report_access(). This struct contains the reporting @@ -127,6 +128,7 @@ struct damon_access_report { unsigned long size; unsigned int cpu; pid_t tid; + bool is_write; /* private: */ unsigned long report_jiffies; /* when this report is made */ }; -- 2.47.3 The page fault event reporting function can know if the access was for writing or reading. Add the information to the report. Signed-off-by: SeongJae Park --- mm/damon/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/damon/core.c b/mm/damon/core.c index 782af39ef0c0..e2fd17b83a92 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1732,6 +1732,7 @@ void damon_report_page_fault(struct vm_fault *vmf, bool huge_pmd) .size = 1, /* todo: set appripriately */ .cpu = smp_processor_id(), .tid = task_pid_vnr(current), + .is_write = vmf->flags & FAULT_FLAG_WRITE, }; if (huge_pmd) -- 2.47.3 Now page fault events based access reports contain information about if the access was for a read or a write. Doing sampling results filtering based on the information can be useful. Introduce a new type of damon sample filter for that. Signed-off-by: SeongJae Park --- include/linux/damon.h | 9 ++++++--- mm/damon/core.c | 5 +++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index 9299fc91ba27..f9fd26f7eab8 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -790,12 +790,14 @@ struct damon_primitives_enabled { * * @DAMON_FILTER_TYPE_CPUMASK: Filter by access-generated CPUs. * @DAMON_FILTER_TYPE_THREADS: Filter by access-generated threads. + * @DAMON_FILTER_TYPE_WRITE: Filter by whether the access was for writing. * * Read &struct damon_sample_control for more details. */ enum damon_sample_filter_type { DAMON_FILTER_TYPE_CPUMASK, DAMON_FILTER_TYPE_THREADS, + DAMON_FILTER_TYPE_WRITE, }; /** @@ -836,9 +838,10 @@ struct damon_sample_filter { * that to make higher access pattern picture. It can use multiple sampling * primitives including page table accessed bits and page fault events. It can * also filter in/out specific types of sampled access events to monitor - * accesses of specific types, such as access-generated CPUs and threads. This - * struct is for controlling what sampling primitives to use (enable), and what - * sampled access events should be filtered in/out. + * accesses of specific types, such as access-generated CPUs, threads, and + * whether it was for read or writes. This struct is for controlling what + * sampling primitives to use (enable), and what sampled access events should + * be filtered in/out. */ struct damon_sample_control { struct damon_primitives_enabled primitives_enabled; diff --git a/mm/damon/core.c b/mm/damon/core.c index e2fd17b83a92..5f29e4dee267 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1336,6 +1336,8 @@ static int damon_commit_sample_filter_arg(struct damon_sample_filter *dst, src->nr_tids); dst->nr_tids = src->nr_tids; break; + case DAMON_FILTER_TYPE_WRITE: + break; default: break; } @@ -2912,6 +2914,9 @@ static bool damon_sample_filter_matching(struct damon_access_report *report, break; } break; + case DAMON_FILTER_TYPE_WRITE: + matched = report->is_write; + break; default: break; } -- 2.47.3 Allow users utilizing the write/read-only access sample results filtering, by adding a new supported input for specifying the filter is for that purpose. Signed-off-by: SeongJae Park --- mm/damon/sysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index b96df2d2d17e..d6f6a012a0e2 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -782,6 +782,10 @@ damon_sysfs_sample_filter_type_names[] = { .type = DAMON_FILTER_TYPE_THREADS, .name = "threads", }, + { + .type = DAMON_FILTER_TYPE_WRITE, + .name = "write", + }, }; static ssize_t type_show(struct kobject *kobj, -- 2.47.3 Update DAMON design document for the write access sample results filtering. Signed-off-by: SeongJae Park --- Documentation/mm/damon/design.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst index 4c6d83263a7b..b194bf4a6519 100644 --- a/Documentation/mm/damon/design.rst +++ b/Documentation/mm/damon/design.rst @@ -148,7 +148,7 @@ the page protection in a way similar to that of NUMA balancing hinting faults. Then the page fault handler reports the faults happend by the installed page protection to DAMON core layer. Compared to the accessed-bit based one, this approach gives more information to DAMON, such as the access-generated -CPU/threads. +CPU/threads and whether it was for writing or reading. ``paddr`` uses :ref:`accessed-bit based check ` by default, and users can change @@ -317,6 +317,8 @@ Below ``type`` of access sampling results filter are currently supported. - threads - Whether the sampled access was generated by a thread that having a thread (task) id that exist in a given array of ids. +- write + - Whether the sampled access was for writing. .. _damon_design_age_tracking: -- 2.47.3