Currently, jevents.py emits both the massive 2.8 MB big_c_string literal and tens of thousands of compact_pmu_event struct arrays into a single pmu-events.c compilation unit. Compiling this giant file takes ~2.2 seconds on a single CPU core during Kbuild startup. Refactor jevents.py to emit big_c_string into a dedicated pmu-events-string.c compilation unit. This allows Kbuild to compile pmu-events.o and pmu-events-string.o simultaneously in parallel across two separate CPU cores, preserving 100% string deduplication and zero dynamic ELF relocations while cutting C compilation latency in half. Tested-by: James Clark Assisted-by: Gemini:gemini-3.1-pro-preview Signed-off-by: Ian Rogers --- tools/perf/Makefile.perf | 4 ++-- tools/perf/pmu-events/Build | 15 +++++++++++++-- tools/perf/pmu-events/jevents.py | 20 ++++++++++++++++---- 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index d2e673d5e5be..98b6f9fda8d1 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -921,7 +921,7 @@ bpf-skel-clean: pmu-events-clean: ifeq ($(OUTPUT),) $(call QUIET_CLEAN, pmu-events) $(RM) \ - pmu-events/pmu-events.c \ + pmu-events/pmu-events*.c \ pmu-events/metric_test.log \ pmu-events/test-empty-pmu-events.c \ pmu-events/empty-pmu-events.log @@ -929,7 +929,7 @@ ifeq ($(OUTPUT),) -name 'extra-metricgroups.json' -delete else # When an OUTPUT directory is present, clean up the copied pmu-events/arch directory. $(call QUIET_CLEAN, pmu-events) $(RM) -r $(OUTPUT)pmu-events/arch \ - $(OUTPUT)pmu-events/pmu-events.c \ + $(OUTPUT)pmu-events/pmu-events*.c \ $(OUTPUT)pmu-events/metric_test.log \ $(OUTPUT)pmu-events/test-empty-pmu-events.c \ $(OUTPUT)pmu-events/empty-pmu-events.log diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index dc1df2d57ddc..e957aa2fcb48 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build @@ -1,7 +1,12 @@ EMPTY_PMU_EVENTS_C = pmu-events/empty-pmu-events.c # pmu-events.c will be generated by jevents.py or copied from EMPTY_PMU_EVENTS_C PMU_EVENTS_C = $(OUTPUT)pmu-events/pmu-events.c +PMU_EVENTS_STRING_C = $(OUTPUT)pmu-events/pmu-events-string.c + pmu-events-y += pmu-events.o +ifneq ($(NO_JEVENTS),1) +pmu-events-y += pmu-events-string.o +endif # pmu-events.c file is generated in the OUTPUT directory so it needs a # separate rule to depend on it properly @@ -9,6 +14,10 @@ $(OUTPUT)pmu-events/pmu-events.o: $(PMU_EVENTS_C) $(call rule_mkdir) $(call if_changed_dep,cc_o_c) +$(OUTPUT)pmu-events/pmu-events-string.o: $(PMU_EVENTS_STRING_C) + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + # Message for $(call echo-cmd,cp), possibly remove the src file from # the destination to save space in the build log. quiet_cmd_cp = COPY $(patsubst %$<,%,$@) <- $< @@ -118,6 +127,7 @@ CUR_OUT_JSON := $(shell [ -d $(OUT_DIR) ] && find $(OUT_DIR) -type f) # Things in the OUTPUT directory but shouldn't be there as computed by # OUT_JSON and GEN_JSON. + ORPHAN_FILES := $(filter-out $(OUT_JSON) $(GEN_JSON),$(CUR_OUT_JSON)) # Message for $(call echo-cmd,mkd). There is already a mkdir message @@ -222,8 +232,9 @@ endif # Finally, the rule to build pmu-events.c using jevents.py. All test # and inputs are dependencies. -$(PMU_EVENTS_C): $(JEVENTS_DEPS) +$(PMU_EVENTS_STRING_C) $(PMU_EVENTS_C) &: $(JEVENTS_DEPS) $(call rule_mkdir) - $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) $(JEVENTS_MODEL) $(OUT_DIR) $@ + $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) $(JEVENTS_MODEL) \ + $(OUT_DIR) $(PMU_EVENTS_C) $(PMU_EVENTS_STRING_C) endif # ifeq ($(NO_JEVENTS),1) diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index 3a1bcdcdc685..70a45e62f5d1 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -1422,6 +1422,8 @@ such as "arm/cortex-a34".''', ) ap.add_argument( 'output_file', type=argparse.FileType('w', encoding='utf-8'), nargs='?', default=sys.stdout) + ap.add_argument( + 'output_string_file', type=argparse.FileType('w', encoding='utf-8'), nargs='?', default=None) _args = ap.parse_args() _args.output_file.write(f""" @@ -1463,10 +1465,20 @@ struct pmu_table_entry { ftw(arch_path, [], preprocess_one_file) _bcs.compute() - _args.output_file.write('static const char *const big_c_string =\n') - for s in _bcs.big_string: - _args.output_file.write(s) - _args.output_file.write(';\n\n') + if not _args.output_string_file: + _args.output_file.write('static const char *const big_c_string =\n') + for s in _bcs.big_string: + _args.output_file.write(s) + _args.output_file.write(';\n\n') + else: + _args.output_string_file.write('/* SPDX-License-Identifier: GPL-2.0 */\n') + _args.output_string_file.write('/* Autogenerated by jevents.py */\n') + _args.output_string_file.write('const char big_c_string[] =\n') + for s in _bcs.big_string: + _args.output_string_file.write(s) + _args.output_string_file.write(';\n') + _args.output_string_file.close() + _args.output_file.write('extern const char big_c_string[];\n\n') for arch in archs: arch_path = f'{_args.starting_dir}/{arch}' ftw(arch_path, [], process_one_file) -- 2.54.0.563.g4f69b47b94-goog