Currently, struct seq_file.private is used as an iterator in stack_list by stack_start|next(), for stack_print(). Create a context struct for this, in order to add another field next. No behavior change intended. P.S.: page_owner_stack_open() is expanded with separate statements for variable definition and return just in preparation for the next patch. Signed-off-by: Mauricio Faria de Oliveira --- mm/page_owner.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/mm/page_owner.c b/mm/page_owner.c index c3ca21132c2c..c1a7d7afe945 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -45,6 +45,10 @@ static struct stack failure_stack; static struct stack *stack_list; static DEFINE_SPINLOCK(stack_list_lock); +struct stack_print_ctx { + struct stack *stack; +}; + static bool page_owner_enabled __initdata; DEFINE_STATIC_KEY_FALSE(page_owner_inited); @@ -856,6 +860,7 @@ static const struct file_operations proc_page_owner_operations = { static void *stack_start(struct seq_file *m, loff_t *ppos) { struct stack *stack; + struct stack_print_ctx *ctx = m->private; if (*ppos == -1UL) return NULL; @@ -867,9 +872,9 @@ static void *stack_start(struct seq_file *m, loff_t *ppos) * value of stack_list. */ stack = smp_load_acquire(&stack_list); - m->private = stack; + ctx->stack = stack; } else { - stack = m->private; + stack = ctx->stack; } return stack; @@ -878,10 +883,11 @@ static void *stack_start(struct seq_file *m, loff_t *ppos) static void *stack_next(struct seq_file *m, void *v, loff_t *ppos) { struct stack *stack = v; + struct stack_print_ctx *ctx = m->private; stack = stack->next; *ppos = stack ? *ppos + 1 : -1UL; - m->private = stack; + ctx->stack = stack; return stack; } @@ -926,7 +932,10 @@ static const struct seq_operations page_owner_stack_op = { static int page_owner_stack_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &page_owner_stack_op, 0); + int ret = seq_open_private(file, &page_owner_stack_op, + sizeof(struct stack_print_ctx)); + + return ret; } static const struct file_operations page_owner_stack_operations = { -- 2.48.1 Add the flags field to stack_print_ctx, and define two flags for current behavior (printing stack traces and their number of base pages). The plumbing of flags is debugfs_create_file(data) -> inode.i_private -> page_owner_stack_open() -> stack_print_ctx.flags -> stack_print(). No behavior change intended. Signed-off-by: Mauricio Faria de Oliveira --- mm/page_owner.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/mm/page_owner.c b/mm/page_owner.c index c1a7d7afe945..e983ac21a4db 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -45,8 +45,12 @@ static struct stack failure_stack; static struct stack *stack_list; static DEFINE_SPINLOCK(stack_list_lock); +#define STACK_PRINT_FLAG_STACK 0x1 +#define STACK_PRINT_FLAG_PAGES 0x2 + struct stack_print_ctx { struct stack *stack; + u8 flags; }; static bool page_owner_enabled __initdata; @@ -901,20 +905,24 @@ static int stack_print(struct seq_file *m, void *v) unsigned long *entries; unsigned long nr_entries; struct stack_record *stack_record = stack->stack_record; + struct stack_print_ctx *ctx = m->private; if (!stack->stack_record) return 0; - nr_entries = stack_record->size; - entries = stack_record->entries; nr_base_pages = refcount_read(&stack_record->count) - 1; if (nr_base_pages < 1 || nr_base_pages < page_owner_pages_threshold) return 0; - for (i = 0; i < nr_entries; i++) - seq_printf(m, " %pS\n", (void *)entries[i]); - seq_printf(m, "nr_base_pages: %d\n\n", nr_base_pages); + if (ctx->flags & STACK_PRINT_FLAG_STACK) { + nr_entries = stack_record->size; + entries = stack_record->entries; + for (i = 0; i < nr_entries; i++) + seq_printf(m, " %pS\n", (void *)entries[i]); + } + if (ctx->flags & STACK_PRINT_FLAG_PAGES) + seq_printf(m, "nr_base_pages: %d\n\n", nr_base_pages); return 0; } @@ -935,6 +943,13 @@ static int page_owner_stack_open(struct inode *inode, struct file *file) int ret = seq_open_private(file, &page_owner_stack_op, sizeof(struct stack_print_ctx)); + if (!ret) { + struct seq_file *m = file->private_data; + struct stack_print_ctx *ctx = m->private; + + ctx->flags = (uintptr_t) inode->i_private; + } + return ret; } @@ -973,7 +988,9 @@ static int __init pageowner_init(void) debugfs_create_file("page_owner", 0400, NULL, NULL, &proc_page_owner_operations); dir = debugfs_create_dir("page_owner_stacks", NULL); - debugfs_create_file("show_stacks", 0400, dir, NULL, + debugfs_create_file("show_stacks", 0400, dir, + (void *)(STACK_PRINT_FLAG_STACK | + STACK_PRINT_FLAG_PAGES), &page_owner_stack_operations); debugfs_create_file("count_threshold", 0600, dir, NULL, &proc_page_owner_threshold); -- 2.48.1 Add the flag STACK_PRINT_FLAG_HANDLE to print a stack's handle number from stackdepot, and add the file 'show_handles' to show just handles and their number of pages. This is similar to 'show_stacks', with handles instead of stack traces. Signed-off-by: Mauricio Faria de Oliveira --- mm/page_owner.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mm/page_owner.c b/mm/page_owner.c index e983ac21a4db..5d488fce0e3d 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -47,6 +47,7 @@ static DEFINE_SPINLOCK(stack_list_lock); #define STACK_PRINT_FLAG_STACK 0x1 #define STACK_PRINT_FLAG_PAGES 0x2 +#define STACK_PRINT_FLAG_HANDLE 0x4 struct stack_print_ctx { struct stack *stack; @@ -921,6 +922,8 @@ static int stack_print(struct seq_file *m, void *v) for (i = 0; i < nr_entries; i++) seq_printf(m, " %pS\n", (void *)entries[i]); } + if (ctx->flags & STACK_PRINT_FLAG_HANDLE) + seq_printf(m, "handle: %d\n", stack_record->handle.handle); if (ctx->flags & STACK_PRINT_FLAG_PAGES) seq_printf(m, "nr_base_pages: %d\n\n", nr_base_pages); @@ -992,6 +995,10 @@ static int __init pageowner_init(void) (void *)(STACK_PRINT_FLAG_STACK | STACK_PRINT_FLAG_PAGES), &page_owner_stack_operations); + debugfs_create_file("show_handles", 0400, dir, + (void *)(STACK_PRINT_FLAG_HANDLE | + STACK_PRINT_FLAG_PAGES), + &page_owner_stack_operations); debugfs_create_file("count_threshold", 0600, dir, NULL, &proc_page_owner_threshold); -- 2.48.1 Add the file 'show_stacks_handles' to show just stack traces and their handles, in order to resolve stack traces and handles (i.e., to identify the stack traces for handles in previous reads from 'show_handles'). All stacks/handles must show up, regardless of their number of pages, that might have become zero or no longer make 'count_threshold', but made it in previous reads from 'show_handles' -- and need to be resolved later. P.S.: now, print the extra newline independently of the number of pages. Signed-off-by: Mauricio Faria de Oliveira --- mm/page_owner.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/page_owner.c b/mm/page_owner.c index 5d488fce0e3d..0e5c7bb3e4e8 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -913,7 +913,8 @@ static int stack_print(struct seq_file *m, void *v) nr_base_pages = refcount_read(&stack_record->count) - 1; - if (nr_base_pages < 1 || nr_base_pages < page_owner_pages_threshold) + if (ctx->flags & STACK_PRINT_FLAG_PAGES && + (nr_base_pages < 1 || nr_base_pages < page_owner_pages_threshold)) return 0; if (ctx->flags & STACK_PRINT_FLAG_STACK) { @@ -925,7 +926,8 @@ static int stack_print(struct seq_file *m, void *v) if (ctx->flags & STACK_PRINT_FLAG_HANDLE) seq_printf(m, "handle: %d\n", stack_record->handle.handle); if (ctx->flags & STACK_PRINT_FLAG_PAGES) - seq_printf(m, "nr_base_pages: %d\n\n", nr_base_pages); + seq_printf(m, "nr_base_pages: %d\n", nr_base_pages); + seq_putc(m, '\n'); return 0; } @@ -999,6 +1001,10 @@ static int __init pageowner_init(void) (void *)(STACK_PRINT_FLAG_HANDLE | STACK_PRINT_FLAG_PAGES), &page_owner_stack_operations); + debugfs_create_file("show_stacks_handles", 0400, dir, + (void *)(STACK_PRINT_FLAG_STACK | + STACK_PRINT_FLAG_HANDLE), + &page_owner_stack_operations); debugfs_create_file("count_threshold", 0600, dir, NULL, &proc_page_owner_threshold); -- 2.48.1 Describe and provide examples for 'show_handles' and 'show_stacks_handles'. Signed-off-by: Mauricio Faria de Oliveira --- Documentation/mm/page_owner.rst | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/Documentation/mm/page_owner.rst b/Documentation/mm/page_owner.rst index 3a45a20fc05a..6b12f3b007ec 100644 --- a/Documentation/mm/page_owner.rst +++ b/Documentation/mm/page_owner.rst @@ -27,7 +27,10 @@ enabled. Other usages are more than welcome. It can also be used to show all the stacks and their current number of allocated base pages, which gives us a quick overview of where the memory is going without the need to screen through all the pages and match the -allocation and free operation. +allocation and free operation. It's also possible to show only a numeric +identifier of all the stacks (without stack traces) and their number of +allocated base pages (faster to read and parse, eg, for monitoring) that +can be matched with stacks later (show_handles and show_stacks_handles). page owner is disabled by default. So, if you'd like to use it, you need to add "page_owner=on" to your boot cmdline. If the kernel is built @@ -116,6 +119,33 @@ Usage nr_base_pages: 20824 ... + cat /sys/kernel/debug/page_owner_stacks/show_handles > handles_7000.txt + cat handles_7000.txt + handle: 42 + nr_base_pages: 20824 + ... + + cat /sys/kernel/debug/page_owner_stacks/show_stacks_handles > stacks_handles.txt + cat stacks_handles.txt + post_alloc_hook+0x177/0x1a0 + get_page_from_freelist+0xd01/0xd80 + __alloc_pages+0x39e/0x7e0 + alloc_pages_mpol+0x22e/0x490 + folio_alloc+0xd5/0x110 + filemap_alloc_folio+0x78/0x230 + page_cache_ra_order+0x287/0x6f0 + filemap_get_pages+0x517/0x1160 + filemap_read+0x304/0x9f0 + xfs_file_buffered_read+0xe6/0x1d0 [xfs] + xfs_file_read_iter+0x1f0/0x380 [xfs] + __kernel_read+0x3b9/0x730 + kernel_read_file+0x309/0x4d0 + __do_sys_finit_module+0x381/0x730 + do_syscall_64+0x8d/0x150 + entry_SYSCALL_64_after_hwframe+0x62/0x6a + handle: 42 + ... + cat /sys/kernel/debug/page_owner > page_owner_full.txt ./page_owner_sort page_owner_full.txt sorted_page_owner.txt -- 2.48.1