From: pengdonglin The current funcgraph-retval implementation suffers from two accuracy issues: 1. Void-returning functions still print a return value, creating misleading noise in the trace output. 2. For functions returning narrower types (e.g., char, short), the displayed value can be incorrect because high bits of the register may contain undefined data. This patch addresses both problems by leveraging BTF to obtain the exact return type of each traced kernel function. The key changes are: 1. Void function filtering: Functions with void return type no longer display any return value in the trace output, eliminating unnecessary clutter. 2. Type-aware value formatting: The return value is now properly truncated to match the actual width of the return type before being displayed. Additionally, the value is formatted according to its type for better human readability. Here is an output comparison: Before: # perf ftrace -G vfs_read --graph-opts retval ... 1) | touch_atime() { 1) | atime_needs_update() { 1) 0.069 us | make_vfsuid(); /* ret=0x0 */ 1) 0.067 us | make_vfsgid(); /* ret=0x0 */ 1) | current_time() { 1) 0.197 us | ktime_get_coarse_real_ts64_mg(); /* ret=0x187f886aec3ed6f5 */ 1) 0.352 us | } /* current_time ret=0x69380753 */ 1) 0.792 us | } /* atime_needs_update ret=0x0 */ 1) 0.937 us | } /* touch_atime ret=0x0 */ After: # perf ftrace -G vfs_read --graph-opts retval ... 2) | touch_atime() { 2) | atime_needs_update() { 2) 0.070 us | make_vfsuid(); /* ret=0x0 */ 2) 0.070 us | make_vfsgid(); /* ret=0x0 */ 2) | current_time() { 2) 0.162 us | ktime_get_coarse_real_ts64_mg(); 2) 0.312 us | } /* current_time ret=0x69380649(trunc) */ 2) 0.753 us | } /* atime_needs_update ret=false */ 2) 0.899 us | } /* touch_atime */ Cc: Steven Rostedt (Google) Cc: Masami Hiramatsu Cc: Xiaoqin Zhang Signed-off-by: pengdonglin --- kernel/trace/trace_functions_graph.c | 124 ++++++++++++++++++++++++--- 1 file changed, 111 insertions(+), 13 deletions(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 17c75cf2348e..46b66b1cfc16 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -15,6 +15,7 @@ #include "trace.h" #include "trace_output.h" +#include "trace_btf.h" /* When set, irq functions might be ignored */ static int ftrace_graph_skip_irqs; @@ -120,6 +121,13 @@ enum { FLAGS_FILL_END = 3 << TRACE_GRAPH_PRINT_FILL_SHIFT, }; +enum { + RETVAL_FMT_HEX = BIT(0), + RETVAL_FMT_DEC = BIT(1), + RETVAL_FMT_BOOL = BIT(2), + RETVAL_FMT_TRUNC = BIT(3), +}; + static void print_graph_duration(struct trace_array *tr, unsigned long long duration, struct trace_seq *s, u32 flags); @@ -865,6 +873,73 @@ static void print_graph_retaddr(struct trace_seq *s, struct fgraph_retaddr_ent_e #if defined(CONFIG_FUNCTION_GRAPH_RETVAL) || defined(CONFIG_FUNCTION_GRAPH_RETADDR) +static void trim_retval(unsigned long func, unsigned long *retval, bool *print_retval, + int *fmt) +{ + const struct btf_type *t; + char name[KSYM_NAME_LEN]; + struct btf *btf; + u32 v, msb; + int kind; + + if (!IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) + return; + + if (lookup_symbol_name(func, name)) + return; + + t = btf_find_func_proto(name, &btf); + if (IS_ERR_OR_NULL(t)) + return; + + t = btf_type_skip_modifiers(btf, t->type, NULL); + kind = t ? BTF_INFO_KIND(t->info) : BTF_KIND_UNKN; + switch (kind) { + case BTF_KIND_UNKN: + *print_retval = false; + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + if (kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION) + *fmt = RETVAL_FMT_HEX; + else + *fmt = RETVAL_FMT_DEC; + + if (t->size > sizeof(unsigned long)) { + *fmt |= RETVAL_FMT_TRUNC; + } else { + msb = BITS_PER_BYTE * t->size - 1; + *retval &= GENMASK(msb, 0); + } + break; + case BTF_KIND_INT: + v = *(u32 *)(t + 1); + if (BTF_INT_ENCODING(v) == BTF_INT_BOOL) { + *fmt = RETVAL_FMT_BOOL; + msb = 0; + } else { + if (BTF_INT_ENCODING(v) == BTF_INT_SIGNED) + *fmt = RETVAL_FMT_DEC; + else + *fmt = RETVAL_FMT_HEX; + + if (t->size > sizeof(unsigned long)) { + *fmt |= RETVAL_FMT_TRUNC; + msb = BITS_PER_LONG - 1; + } else { + msb = BTF_INT_BITS(v) - 1; + } + } + *retval &= GENMASK(msb, 0); + break; + default: + *fmt = RETVAL_FMT_HEX; + break; + } +} + static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entry *entry, struct ftrace_graph_ret *graph_ret, void *func, u32 opt_flags, u32 trace_flags, int args_size) @@ -873,7 +948,7 @@ static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entr unsigned long retval = 0; bool print_retaddr = false; bool print_retval = false; - bool hex_format = !!(opt_flags & TRACE_GRAPH_PRINT_RETVAL_HEX); + int retval_fmt = 0; #ifdef CONFIG_FUNCTION_GRAPH_RETVAL retval = graph_ret->retval; @@ -884,17 +959,35 @@ static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entr print_retaddr = !!(opt_flags & TRACE_GRAPH_PRINT_RETADDR); #endif - if (print_retval && retval && !hex_format) { - /* Check if the return value matches the negative format */ - if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) && - (((u64)retval) >> 32) == 0) { - err_code = sign_extend64(retval, 31); - } else { - err_code = retval; + if (print_retval) { + int fmt = RETVAL_FMT_HEX; + + trim_retval((unsigned long)func, &retval, &print_retval, &fmt); + if (print_retval) { + if (opt_flags & TRACE_GRAPH_PRINT_RETVAL_HEX) + retval_fmt = RETVAL_FMT_HEX; + + if (retval && retval_fmt != RETVAL_FMT_HEX) { + /* Check if the return value matches the negative format */ + if (IS_ENABLED(CONFIG_64BIT) && (retval & BIT(31)) && + (((u64)retval) >> 32) == 0) { + err_code = sign_extend64(retval, 31); + } else { + err_code = retval; + } + + if (!IS_ERR_VALUE(err_code)) + err_code = 0; + } + + if (retval_fmt == RETVAL_FMT_HEX) { + retval_fmt |= (fmt & RETVAL_FMT_TRUNC); + } else { + if (err_code && fmt & RETVAL_FMT_HEX) + fmt = (fmt & ~RETVAL_FMT_HEX) | RETVAL_FMT_DEC; + retval_fmt = fmt; + } } - - if (!IS_ERR_VALUE(err_code)) - err_code = 0; } if (entry) { @@ -921,10 +1014,15 @@ static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entr trace_flags, false); if (print_retval) { - if (hex_format || (err_code == 0)) + if (retval_fmt & RETVAL_FMT_HEX) trace_seq_printf(s, " ret=0x%lx", retval); + else if (retval_fmt & RETVAL_FMT_BOOL) + trace_seq_printf(s, " ret=%s", retval ? "true" : "false"); else - trace_seq_printf(s, " ret=%ld", err_code); + trace_seq_printf(s, " ret=%ld", err_code ?: retval); + + if (retval_fmt & RETVAL_FMT_TRUNC) + trace_seq_printf(s, "(trunc)"); } if (!entry || print_retval || print_retaddr) -- 2.34.1 From: pengdonglin The existing documentation for funcgraph-retval is outdated and partially incorrect, as it describes limitations that have now been resolved. Recent changes (e.g., using BTF to obtain function return types) have addressed key issues: 1. Return values are now printed only for non-void functions. 2. Values are trimmed to the correct width of the return type, avoiding garbage data from high bits. Cc: Steven Rostedt (Google) Cc: Masami Hiramatsu Cc: Xiaoqin Zhang Signed-off-by: pengdonglin --- Documentation/trace/ftrace.rst | 78 ++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst index d1f313a5f4ad..03c8c433c803 100644 --- a/Documentation/trace/ftrace.rst +++ b/Documentation/trace/ftrace.rst @@ -1454,6 +1454,10 @@ Options for function_graph tracer: printed in hexadecimal format. By default, this option is off. + funcgraph-retaddr + When set, the return address will always be printed. + By default, this option is off. + sleep-time When running function graph tracer, to include the time a task schedules out in its function. @@ -2800,7 +2804,7 @@ It is default disabled. 0) 2.861 us | } /* putname() */ The return value of each traced function can be displayed after -an equal sign "=". When encountering system call failures, it +an equal sign "ret =". When encountering system call failures, it can be very helpful to quickly locate the function that first returns an error code. @@ -2810,16 +2814,16 @@ returns an error code. Example with funcgraph-retval:: 1) | cgroup_migrate() { - 1) 0.651 us | cgroup_migrate_add_task(); /* = 0xffff93fcfd346c00 */ + 1) 0.651 us | cgroup_migrate_add_task(); /* ret=0xffff93fcfd346c00 */ 1) | cgroup_migrate_execute() { 1) | cpu_cgroup_can_attach() { 1) | cgroup_taskset_first() { - 1) 0.732 us | cgroup_taskset_next(); /* = 0xffff93fc8fb20000 */ - 1) 1.232 us | } /* cgroup_taskset_first = 0xffff93fc8fb20000 */ - 1) 0.380 us | sched_rt_can_attach(); /* = 0x0 */ - 1) 2.335 us | } /* cpu_cgroup_can_attach = -22 */ - 1) 4.369 us | } /* cgroup_migrate_execute = -22 */ - 1) 7.143 us | } /* cgroup_migrate = -22 */ + 1) 0.732 us | cgroup_taskset_next(); /* ret=0xffff93fc8fb20000 */ + 1) 1.232 us | } /* cgroup_taskset_first ret=0xffff93fc8fb20000 */ + 1) 0.380 us | sched_rt_can_attach(); /* ret=0x0 */ + 1) 2.335 us | } /* cpu_cgroup_can_attach ret=-22 */ + 1) 4.369 us | } /* cgroup_migrate_execute ret=-22 */ + 1) 7.143 us | } /* cgroup_migrate ret=-22 */ The above example shows that the function cpu_cgroup_can_attach returned the error code -22 firstly, then we can read the code @@ -2836,37 +2840,41 @@ printed in hexadecimal format. Example with funcgraph-retval-hex:: 1) | cgroup_migrate() { - 1) 0.651 us | cgroup_migrate_add_task(); /* = 0xffff93fcfd346c00 */ + 1) 0.651 us | cgroup_migrate_add_task(); /* ret=0xffff93fcfd346c00 */ 1) | cgroup_migrate_execute() { 1) | cpu_cgroup_can_attach() { 1) | cgroup_taskset_first() { - 1) 0.732 us | cgroup_taskset_next(); /* = 0xffff93fc8fb20000 */ - 1) 1.232 us | } /* cgroup_taskset_first = 0xffff93fc8fb20000 */ - 1) 0.380 us | sched_rt_can_attach(); /* = 0x0 */ - 1) 2.335 us | } /* cpu_cgroup_can_attach = 0xffffffea */ - 1) 4.369 us | } /* cgroup_migrate_execute = 0xffffffea */ + 1) 0.732 us | cgroup_taskset_next(); /* ret=0xffff93fc8fb20000 */ + 1) 1.232 us | } /* cgroup_taskset_first ret=0xffff93fc8fb20000 */ + 1) 0.380 us | sched_rt_can_attach(); /* ret=0x0 */ + 1) 2.335 us | } /* cpu_cgroup_can_attach ret=0xffffffea */ + 1) 4.369 us | } /* cgroup_migrate_execute ret=0xffffffea */ 1) 7.143 us | } /* cgroup_migrate = 0xffffffea */ -At present, there are some limitations when using the funcgraph-retval -option, and these limitations will be eliminated in the future: +Note that there are some limitations when using the funcgraph-retval +option: + +- If CONFIG_DEBUG_INFO_BTF is disabled (n), a return value is printed even for + functions with a void return type. When CONFIG_DEBUG_INFO_BTF is enabled (y), + the return value is printed only for non-void functions. -- Even if the function return type is void, a return value will still - be printed, and you can just ignore it. +- If a return value occupies multiple registers, only the value in the first + register is recorded and printed. For example, on the x86 architecture, a + 64-bit return value is stored across eax (lower 32 bits) and edx (upper 32 bits), + but only the contents of eax are captured. If CONFIG_DEBUG_INFO_BTF is enabled, + the suffix "(trunc)" is appended to the printed value to indicate that the + output may be truncated because high-order register contents are omitted. -- Even if return values are stored in multiple registers, only the - value contained in the first register will be recorded and printed. - To illustrate, in the x86 architecture, eax and edx are used to store - a 64-bit return value, with the lower 32 bits saved in eax and the - upper 32 bits saved in edx. However, only the value stored in eax - will be recorded and printed. +- Under certain procedure-call standards (e.g., arm64's AAPCS64), when the return + type is smaller than a general-purpose register (GPR), the caller is responsible + for narrowing the value; the upper bits of the register may contain undefined data. + For instance, when a u8 is returned in 64-bit GPR, bits [63:8] can hold arbitrary + values, especially when larger types are truncated (explicitly or implicitly). It + is therefore advisable to inspect the code in such cases. If CONFIG_DEBUG_INFO_BTF + is enabled (y), the return value is automatically trimmed to the width of the return + type. -- In certain procedure call standards, such as arm64's AAPCS64, when a - type is smaller than a GPR, it is the responsibility of the consumer - to perform the narrowing, and the upper bits may contain UNKNOWN values. - Therefore, it is advisable to check the code for such cases. For instance, - when using a u8 in a 64-bit GPR, bits [63:8] may contain arbitrary values, - especially when larger types are truncated, whether explicitly or implicitly. - Here are some specific cases to illustrate this point: + The following examples illustrate the behavior: **Case One**: @@ -2885,7 +2893,9 @@ option, and these limitations will be eliminated in the future: RET If you pass 0x123456789abcdef to this function and want to narrow it, - it may be recorded as 0x123456789abcdef instead of 0xef. + it may be recorded as 0x123456789abcdef instead of 0xef. When + CONFIG_DEBUG_INFO_BTF is enabled, the value will be correctly truncated + to 0xef based on the size constraints of the u8 type. **Case Two**: @@ -2910,7 +2920,9 @@ option, and these limitations will be eliminated in the future: RET When passing 0x2_0000_0000 to it, the return value may be recorded as - 0x2_0000_0000 instead of 0. + 0x2_0000_0000 instead of 0. When CONFIG_DEBUG_INFO_BTF is enabled, the + value will be correctly truncated to 0 based on the size constraints of + the int type. You can put some comments on specific functions by using trace_printk() For example, if you want to put a comment inside -- 2.34.1