Currently, when a function has both a weak and a strong definition across different compilation units (CUs), the BTF encoder arbitrarily selects one to generate the BTF entry. This selection fundamentally is dependent on the order in which pahole processes the CUs. This indifference often leads to a mismatch where the generated BTF reflects the weak definition's prototype, even though the linker selected the strong definition for the final vmlinux binary. A notable example described in [0] involving function bpf_lsm_mmap_file(). Both weak and strong definitions exist, distinguished only by parameter names (e.g., file vs file__nullable). While the strong definition is linked into the vmlinux object, the generated BTF contained the prototype for the weak definition. This causes issues for BPF verifier (e.g., __nullable annotation semantics), or tools relying on accurate type information. To fix this, ensure the BTF encoder selects the function definition corresponding to the actual code linked into the binary. This is achieved by comparing the DWARF function address (DW_AT_low_pc) with the ELF symbol address (st_value). Only the DWARF entry for the strong definition will match the final resolved ELF symbol address. [0] https://lore.kernel.org/all/aVJY9H-e83T7ivT4@google.com/ Link: https://lore.kernel.org/all/aVJY9H-e83T7ivT4@google.com/ Signed-off-by: Matt Bobrowski --- btf_encoder.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/btf_encoder.c b/btf_encoder.c index b37ee7f..0462094 100644 --- a/btf_encoder.c +++ b/btf_encoder.c @@ -79,6 +79,7 @@ struct btf_encoder_func_annot { /* state used to do later encoding of saved functions */ struct btf_encoder_func_state { + uint64_t addr; struct elf_function *elf; uint32_t type_id_off; uint16_t nr_parms; @@ -1258,6 +1259,7 @@ static int32_t btf_encoder__save_func(struct btf_encoder *encoder, struct functi if (!state) return -ENOMEM; + state->addr = function__addr(fn); state->elf = func; state->nr_parms = ftype->nr_parms + (ftype->unspec_parms ? 1 : 0); state->ret_type_id = ftype->tag.type == 0 ? 0 : encoder->type_id_off + ftype->tag.type; @@ -1477,6 +1479,29 @@ static void btf_encoder__delete_saved_funcs(struct btf_encoder *encoder) encoder->func_states.cap = 0; } +static struct btf_encoder_func_state *btf_encoder__select_canonical_state(struct btf_encoder_func_state *combined_states, + int combined_cnt) +{ + int i, j; + + /* + * The same elf_function is shared amongst combined functions, + * as per saved_functions_combine(). + */ + struct elf_function *elf = combined_states[0].elf; + + for (i = 0; i < combined_cnt; i++) { + struct btf_encoder_func_state *state = &combined_states[i]; + + for (j = 0; j < elf->sym_cnt; j++) { + if (state->addr == elf->syms[j].addr) + return state; + } + } + + return &combined_states[0]; +} + static int btf_encoder__add_saved_funcs(struct btf_encoder *encoder, bool skip_encoding_inconsistent_proto) { struct btf_encoder_func_state *saved_fns = encoder->func_states.array; @@ -1517,6 +1542,17 @@ static int btf_encoder__add_saved_funcs(struct btf_encoder *encoder, bool skip_e 0, 0); if (add_to_btf) { + /* + * We're to add the current function within + * BTF. Although, from all functions that have + * possibly been combined via + * saved_functions_combine(), ensure to only + * select and emit BTF for the most canonical + * function definition. + */ + if (j - i > 1) + state = btf_encoder__select_canonical_state(state, j - i); + if (is_kfunc_state(state)) err = btf_encoder__add_bpf_kfunc(encoder, state); else -- 2.52.0.351.gbe84eed79e-goog