Allow architectures to provide CFLAGS that should be added only after all other optional CFLAGS have been evaluated. This will be useful for flags that depend on other, generic ones. To allow 'LATE_CFLAGS' to make use of the $(cc-option ...) helper, assume it'll be a lazily evaluated variable. To further ensure the $(cc-option ...) compiler invocation overhead won't be per-use of $(CFLAGS), enforce its evaluation prior to extending CFLAGS. Signed-off-by: Mathias Krause --- Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile b/Makefile index 9dc5d2234e2a..0ce0813bf124 100644 --- a/Makefile +++ b/Makefile @@ -95,6 +95,10 @@ CFLAGS += $(wmissing_parameter_type) CFLAGS += $(wold_style_declaration) CFLAGS += -Woverride-init -Wmissing-prototypes -Wstrict-prototypes +# Evaluate and add late cflags last -- they may depend on previous flags +LATE_CFLAGS := $(LATE_CFLAGS) +CFLAGS += $(LATE_CFLAGS) + autodepend-flags = -MMD -MP -MF $(dir $*).$(notdir $*).d LDFLAGS += -nostdlib $(no_pie) -z noexecstack -- 2.47.3 Leaf functions are problematic for backtraces as they lack the frame pointer setup epilogue. If such a function causes a fault, the original caller won't be part of the backtrace. That's problematic if, for example, memcpy() is failing because it got passed a bad pointer. The generated backtrace will look like this, providing no clue what the issue may be: STACK: @401b31 4001ad 0x0000000000401b31: memcpy at lib/string.c:136 (discriminator 3) for (i = 0; i < n; ++i) > a[i] = b[i]; 0x00000000004001ac: gdt32_end at x86/cstart64.S:127 lea __environ(%rip), %rdx > call main mov %eax, %edi By abusing profiling, we can force the compiler to emit a frame pointer setup epilogue even for leaf functions, making the above backtrace change like this: STACK: @401c21 400512 4001ad 0x0000000000401c21: memcpy at lib/string.c:136 (discriminator 3) for (i = 0; i < n; ++i) > a[i] = b[i]; 0x0000000000400511: main at x86/hypercall.c:91 (discriminator 24) > memcpy((void *)~0xbadc0de, (void *)0xdeadbeef, 42); 0x00000000004001ac: gdt32_end at x86/cstart64.S:127 lea __environ(%rip), %rdx > call main mov %eax, %edi Above backtrace includes the failing memcpy() call, making it much easier to spot the bug. Enable "fake profiling" if supported by the compiler to get better backtraces. The runtime overhead should be negligible for the gained debugability as the profiling call is actually a NOP. Signed-off-by: Mathias Krause --- One may argure that the "ifneq ($(KEEP_FRAME_POINTER),) ... endif" wrapping isn't needed, and that's true. However, it simplifies toggling that variable, if there'll ever be a need for it. x86/Makefile.common | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/x86/Makefile.common b/x86/Makefile.common index 5663a65d3df4..be18a77a779e 100644 --- a/x86/Makefile.common +++ b/x86/Makefile.common @@ -43,6 +43,17 @@ COMMON_CFLAGS += -O1 # stack.o relies on frame pointers. KEEP_FRAME_POINTER := y +ifneq ($(KEEP_FRAME_POINTER),) +# Fake profiling to force the compiler to emit a frame pointer setup also in +# leaf function (-mno-omit-leaf-frame-pointer doesn't work, unfortunately). +# +# Note: +# We need to defer the cc-option test until -fno-pic or -no-pie have been +# added to CFLAGS as -mnop-mcount needs it. The lazy evaluation of CFLAGS +# during compilation makes this do "The Right Thing." +LATE_CFLAGS += $(call cc-option, -pg -mnop-mcount, "") +endif + FLATLIBS = lib/libcflat.a ifeq ($(CONFIG_EFI),y) -- 2.47.3 Similar to x86 before, ARM64 skips the stack frame setup for leaf functions, making backtraces miss frames. Fortunately, gcc supports forcing generating stack frames for these via -mno-omit-leaf-frame-pointer. Signed-off-by: Mathias Krause --- arm/Makefile.arm64 | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arm/Makefile.arm64 b/arm/Makefile.arm64 index bf7ea2a36d3a..a40c830df20f 100644 --- a/arm/Makefile.arm64 +++ b/arm/Makefile.arm64 @@ -67,5 +67,11 @@ tests += $(TEST_DIR)/mte.$(exe) include $(SRCDIR)/$(TEST_DIR)/Makefile.common +ifneq ($(KEEP_FRAME_POINTER),) +# Force the generation of a regular stack frame even for leaf functions to make +# stack walking reliable. +LATE_CFLAGS += $(call cc-option, -mno-omit-leaf-frame-pointer, "") +endif + arch_clean: arm_clean $(RM) lib/arm64/.*.d -- 2.47.3 When backtracing starts in a leaf function, it'll cause the code to go off the rials as the stack frame for leaf functions is incomplete -- it lacks the return address, likely just causing recursive exception handling by trying to follow an invalid pointer chain. Unfortunately, -mno-omit-leaf-frame-pointer isn't supported for the ARM target as an easy fix. Make use of -mapcs-frame instead to force the generation of an APCS stack frame layout[1] that can be traversed reliably. As Clang doesn't support -mapcs-frame, make the stack walking code handle this by using the (old) more compact standard format as a fall-back. Link: https://developer.arm.com/documentation/dui0041/c/ARM-Procedure-Call-Standard/APCS-definition/The-stack-backtrace-data-structure [1] Signed-off-by: Mathias Krause --- I failed to build KUT with Clang for ARM for various reasons, the code is clearly lacking Clang support for ARM, so I doubt this fall-back will be needed / used anytime soon. arm/Makefile.arm | 8 ++++++++ lib/arm/stack.c | 18 ++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/arm/Makefile.arm b/arm/Makefile.arm index d6250b7fb686..7734e17fe583 100644 --- a/arm/Makefile.arm +++ b/arm/Makefile.arm @@ -39,4 +39,12 @@ tests = include $(SRCDIR)/$(TEST_DIR)/Makefile.common +ifneq ($(KEEP_FRAME_POINTER),) +# Force the generation of an APCS stack frame layout to be able to reliably +# walk the stack. Otherwise the compiler may omit saving LR on the stack for +# leaf functions and, unfortunately, -mno-omit-leaf-frame-pointer isn't +# supported on ARM :( +LATE_CFLAGS += $(call cc-option, -mapcs-frame -DAPCS_FRAMES, "") +endif + arch_clean: arm_clean diff --git a/lib/arm/stack.c b/lib/arm/stack.c index 66d18b47ea53..b2384d8eb4c1 100644 --- a/lib/arm/stack.c +++ b/lib/arm/stack.c @@ -8,6 +8,20 @@ #include #include +/* + * APCS stack frames are generated by code like this: + * | mov ip, sp + * | push {..., fp, ip, lr, pc} + * | sub fp, ip, #4 + */ +#ifdef APCS_FRAMES +# define FP_IDX -3 +# define LR_IDX -1 +#else +# define FP_IDX -1 +# define LR_IDX 0 +#endif + int arch_backtrace_frame(const void *frame, const void **return_addrs, int max_depth, bool current_frame) { @@ -27,10 +41,10 @@ int arch_backtrace_frame(const void *frame, const void **return_addrs, for (depth = 0; depth < max_depth; depth++) { if (!fp) break; - return_addrs[depth] = (void *)fp[0]; + return_addrs[depth] = (void *)fp[LR_IDX]; if (return_addrs[depth] == 0) break; - fp = (unsigned long *)fp[-1]; + fp = (unsigned long *)fp[FP_IDX]; } walking = 0; -- 2.47.3