Currently every function is checked for its parameters to identify whether the signature changed or not. If signature indeed changed, pahole may do some adjustment for parameters for true signatures. In clang, any function with the following attribute DW_AT_calling_convention (DW_CC_nocall) indicates this function having signature changed. pahole can take advantage of this to avoid parameter checking if DW_AT_calling_convention is not DW_CC_nocall. But more importantly, DW_CC_nocall can identify signature-changed functions and parameters can be checked one-after-another to create the true signatures. Otherwise, it takes more effort to identify whether a function has signature changed or not. For example, for funciton __bpf_kfunc static void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs) { ... } and bbr_main() is a callback function in .cong_control = bbr_main in 'struct tcp_congestion_ops tcp_bbr_cong_ops'. In the above bbr_main(...), parameter 'ack' and 'flag' are not used. The following are some details: 0x0a713b8d: DW_TAG_formal_parameter DW_AT_location (indexed (0x28) loclist = 0x0166d452: [0xffffffff83e77fd9, 0xffffffff83e78016): DW_OP_reg5 RDI ... DW_AT_name ("sk") DW_AT_type (0x0a6f5b2b "sock *") ... 0x0a713b98: DW_TAG_formal_parameter DW_AT_name ("ack") DW_AT_type (0x0a6f58fd "u32") ... 0x0a713ba2: DW_TAG_formal_parameter DW_AT_name ("flag") DW_AT_type (0x0a6f57d1 "int") ... 0x0a713bac: DW_TAG_formal_parameter DW_AT_location (indexed (0x29) loclist = 0x0166d4a8: [0xffffffff83e77fd9, 0xffffffff83e78016): DW_OP_reg2 RCX ... DW_AT_name ("rs") DW_AT_type (0x0a710da5 "const rate_sample *") DW_AT_decl_line (1027) Some analysis for the above dwarf can conclude that the 'ark' and 'flag' may be related to RSI and RDX, considering the last one is RCX. Basically this requires all parameters are available to collectively decide whether the true signature can be found or not. In such case, DW_CC_nocall can make things easier as parameter can be checked one after another. For a clang built bpf-next kernel, in non-LTO setup, the number of kernel functions is 69103 and the number of signature changed functions is 875, based on DW_AT_calling_convention (DW_CC_nocall) indication. Among 875 signature changed functions, after this patch, 495 functions can have proper true signatures, mostly due to simple dead argument elimination. The number of remaining functions, which cannot get the true signature, is 379. They will be addressed in the subsequent commits. In llvm23, I implemented [1] which added DW_CC_nocall for ArgumentPromotion pass. This compiler pass can add additional DW_CC_nocall cases for the following compilation: - Flag -O3 or FullLTO So once llvm23 available, we may have more DW_CC_nocall cases, hence more potential true signatures if the kernel is built with -O3 or with FullLTO (CONFIG_LTO_CLANG_FULL). [1] https://github.com/llvm/llvm-project/pull/178973 Signed-off-by: Yonghong Song --- dwarf_loader.c | 70 ++++++++++++++++++++++++++++++++++++++++++-------- dwarves.h | 1 + 2 files changed, 60 insertions(+), 11 deletions(-) diff --git a/dwarf_loader.c b/dwarf_loader.c index 16fb7be..21f8bf1 100644 --- a/dwarf_loader.c +++ b/dwarf_loader.c @@ -1190,6 +1190,10 @@ static ptrdiff_t __dwarf_getlocations(Dwarf_Attribute *attr, return ret; } +struct func_info { + bool signature_changed; +}; + /* For DW_AT_location 'attr': * - if first location is DW_OP_regXX with expected number, return the register; * otherwise save the register for later return @@ -1252,7 +1256,8 @@ out: } static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu, - struct conf_load *conf, int param_idx) + struct conf_load *conf, int param_idx, + struct func_info *info) { struct parameter *parm = tag__alloc(cu, sizeof(*parm)); @@ -1263,8 +1268,12 @@ static struct parameter *parameter__new(Dwarf_Die *die, struct cu *cu, tag__init(&parm->tag, cu, die); parm->name = attr_string(die, DW_AT_name, conf); parm->idx = param_idx; - if (param_idx >= cu->nr_register_params || param_idx < 0) + if (param_idx < 0) return parm; + if (!info->signature_changed) { + if (cu->producer_clang || param_idx >= cu->nr_register_params) + return parm; + } /* Parameters which use DW_AT_abstract_origin to point at * the original parameter definition (with no name in the DIE) * are the result of later DWARF generation during compilation @@ -1337,7 +1346,7 @@ static int formal_parameter_pack__load_params(struct formal_parameter_pack *pack continue; } - struct parameter *param = parameter__new(die, cu, conf, -1); + struct parameter *param = parameter__new(die, cu, conf, -1, NULL); if (param == NULL) return -1; @@ -1502,6 +1511,29 @@ static struct ftype *ftype__new(Dwarf_Die *die, struct cu *cu) return ftype; } +static bool function__signature_changed(struct function *func, Dwarf_Die *die) +{ + /* The inlined DW_TAG_subprogram typically has the original source type for + * abstract origin of a concrete function with address range, inlined subroutine, + * or call site. + */ + if (func->inlined) + return false; + + if (!func->abstract_origin) + return attr_numeric(die, DW_AT_calling_convention) == DW_CC_nocall; + + Dwarf_Attribute attr; + if (dwarf_attr(die, DW_AT_abstract_origin, &attr)) { + Dwarf_Die origin; + if (dwarf_formref_die(&attr, &origin)) + return attr_numeric(&origin, DW_AT_calling_convention) == DW_CC_nocall; + } + + /* This should not happen */ + return false; +} + static struct function *function__new(Dwarf_Die *die, struct cu *cu, struct conf_load *conf) { struct function *func = tag__alloc(cu, sizeof(*func)); @@ -1800,9 +1832,9 @@ static struct tag *die__create_new_parameter(Dwarf_Die *die, struct ftype *ftype, struct lexblock *lexblock, struct cu *cu, struct conf_load *conf, - int param_idx) + int param_idx, struct func_info *info) { - struct parameter *parm = parameter__new(die, cu, conf, param_idx); + struct parameter *parm = parameter__new(die, cu, conf, param_idx, info); if (parm == NULL) return NULL; @@ -1889,7 +1921,7 @@ static struct tag *die__create_new_subroutine_type(Dwarf_Die *die, tag__print_not_supported(die); continue; case DW_TAG_formal_parameter: - tag = die__create_new_parameter(die, ftype, NULL, cu, conf, -1); + tag = die__create_new_parameter(die, ftype, NULL, cu, conf, -1, NULL); break; case DW_TAG_unspecified_parameters: ftype->unspec_parms = 1; @@ -2118,7 +2150,8 @@ out_enomem: } static int die__process_function(Dwarf_Die *die, struct ftype *ftype, - struct lexblock *lexblock, struct cu *cu, struct conf_load *conf); + struct lexblock *lexblock, struct cu *cu, struct conf_load *conf, + struct func_info *info); static int die__create_new_lexblock(Dwarf_Die *die, struct cu *cu, struct lexblock *father, struct conf_load *conf) @@ -2126,7 +2159,7 @@ static int die__create_new_lexblock(Dwarf_Die *die, struct lexblock *lexblock = lexblock__new(die, cu); if (lexblock != NULL) { - if (die__process_function(die, NULL, lexblock, cu, conf) != 0) + if (die__process_function(die, NULL, lexblock, cu, conf, NULL) != 0) goto out_delete; } if (father != NULL) @@ -2246,7 +2279,8 @@ static struct tag *die__create_new_inline_expansion(Dwarf_Die *die, } static int die__process_function(Dwarf_Die *die, struct ftype *ftype, - struct lexblock *lexblock, struct cu *cu, struct conf_load *conf) + struct lexblock *lexblock, struct cu *cu, struct conf_load *conf, + struct func_info *info) { int param_idx = 0; Dwarf_Die child; @@ -2320,7 +2354,7 @@ static int die__process_function(Dwarf_Die *die, struct ftype *ftype, continue; } case DW_TAG_formal_parameter: - tag = die__create_new_parameter(die, ftype, lexblock, cu, conf, param_idx++); + tag = die__create_new_parameter(die, ftype, lexblock, cu, conf, param_idx++, info); break; case DW_TAG_variable: tag = die__create_new_variable(die, cu, conf, 0); @@ -2391,9 +2425,10 @@ out_enomem: static struct tag *die__create_new_function(Dwarf_Die *die, struct cu *cu, struct conf_load *conf) { struct function *function = function__new(die, cu, conf); + struct func_info info = { function__signature_changed(function, die) }; if (function != NULL && - die__process_function(die, &function->proto, &function->lexblock, cu, conf) != 0) { + die__process_function(die, &function->proto, &function->lexblock, cu, conf, &info) != 0) { function__delete(function, cu); function = NULL; } @@ -3045,6 +3080,17 @@ static unsigned long long dwarf_tag__orig_id(const struct tag *tag, return cu->extra_dbg_info ? dtag->id : 0; } +static bool attr_producer_clang(Dwarf_Die *die) +{ + const char *producer; + + producer = attr_string(die, DW_AT_producer, NULL); + if (!producer) + return false; + + return !!strstr(producer, "clang"); +} + struct debug_fmt_ops dwarf__ops; static int die__process(Dwarf_Die *die, struct cu *cu, struct conf_load *conf) @@ -3082,6 +3128,7 @@ static int die__process(Dwarf_Die *die, struct cu *cu, struct conf_load *conf) } cu->language = attr_numeric(die, DW_AT_language); + cu->producer_clang = attr_producer_clang(die); if (conf->early_cu_filter) cu = conf->early_cu_filter(cu); @@ -3841,6 +3888,7 @@ static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf, cu->priv = dcu; cu->dfops = &dwarf__ops; cu->language = attr_numeric(cu_die, DW_AT_language); + cu->producer_clang = attr_producer_clang(cu_die); cus__add(cus, cu); } diff --git a/dwarves.h b/dwarves.h index d7c6474..4cabab0 100644 --- a/dwarves.h +++ b/dwarves.h @@ -302,6 +302,7 @@ struct cu { uint8_t has_addr_info:1; uint8_t uses_global_strings:1; uint8_t little_endian:1; + uint8_t producer_clang:1; uint8_t nr_register_params; int register_params[ARCH_MAX_REGISTER_PARAMS]; int functions_saved; -- 2.52.0