The verifier uses an ID mapping table (struct bpf_idmap) during state equivalence checks. Currently, reset_idmap_scratch performs a full memset on the entire map (~4.7KB) in every call to states_equal. This ensures that reset overhead is minimal and the search loop is bounded by the number of IDs actually encountered in the current equivalence check. Benchmark results (system-wide 'perf stat' during high-concurrency 'veristat' stress test, 60s): The following results, captured using perf while running veristat in parallel across all CPU cores, show a significant reduction in instruction overhead (~9.3%) and branch executions (~11%), confirming that the O(1) reset logic significantly reduces the verifier's workload during state equivalence checks. Metric | Baseline | Patched | Delta ----------------|---------------|---------------|---------- Iterations | 5710 | 5731 | +0.37% Instructions | 1.714 T | 1.555 T | -9.28% Inst/Iter | 300.2 M | 271.3 M | -9.63% Cycles | 1.436 T | 1.335 T | -7.03% Branches | 350.4 B | 311.9 B | -10.99% Migrations | 25,977 | 23,524 | -9.44% Test Command: seq 1 2000000 | sudo perf stat -a -- \ timeout 60s xargs -P $(nproc) -I {} ./veristat access_map_in_map.bpf.o Detailed Performance Stats: Baseline: Performance counter stats for 'system wide': 6,735,538 context-switches # 3505.5 cs/sec cs_per_second 1,921,431.27 msec cpu-clock # 32.0 CPUs CPUs_utilized 25,977 cpu-migrations # 13.5 migrations/sec migrations_per_second 7,268,841 page-faults # 3783.0 faults/sec page_fault_per_second 18,662,357,052 branch-misses # 3.9 % branch_miss_rate (50.14%) 350,411,558,023 branches # 182.4 M/sec branch_frequency (66.85%) 1,435,774,261,319 cpu-cycles # 0.7 GHz cycles_frequency (66.95%) 1,714,154,229,503 instructions # 1.2 instructions insn_per_cycle (66.86%) 429,445,480,497 stalled-cycles-frontend # 0.30 frontend_cycles_idle (66.36%) 60.035899231 seconds time elapsed Patched: Performance counter stats for 'system wide': 6,662,371 context-switches # 3467.3 cs/sec cs_per_second 1,921,497.78 msec cpu-clock # 32.0 CPUs CPUs_utilized 23,524 cpu-migrations # 12.2 migrations/sec migrations_per_second 7,783,064 page-faults # 4050.5 faults/sec page_faults_per_second 18,181,655,163 branch-misses # 4.3 % branch_miss_rate (50.15%) 311,865,239,743 branches # 162.3 M/sec branch_frequency (66.86%) 1,334,859,779,821 cpu-cycles # 0.7 GHz cycles_frequency (66.96%) 1,555,086,465,845 instructions # 1.2 instructions insn_per_cycle (66.87%) 407,666,712,045 stalled-cycles-frontend # 0.31 frontend_cycles_idle (66.35%) 60.034702643 seconds time elapsed Acked-by: Eduard Zingerman Signed-off-by: Qiliang Yuan --- v3: - Remove Suggested-by tags per Eduard's feedback. - Add Eduard's Acked-by. - Credit Andrii Nakryiko for the further optimization suggestion. - Mention the limitation of system-wide profiling in commit message. v2: - Further optimize ID mapping reset (suggested by Andrii Nakryiko) by using a simple counter reset and bounding the search loop. v1: - Initial version using a watermark-based partial memset to optimize the ID mapping reset overhead. include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 23 ++++++++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 130bcbd66f60..8355b585cd18 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -692,6 +692,7 @@ struct bpf_id_pair { struct bpf_idmap { u32 tmp_id_gen; + u32 cnt; struct bpf_id_pair map[BPF_ID_MAP_SIZE]; }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3135643d5695..6ec6d70e5ce7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -18948,18 +18948,21 @@ static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap) if (old_id == 0) /* cur_id == 0 as well */ return true; - for (i = 0; i < BPF_ID_MAP_SIZE; i++) { - if (!map[i].old) { - /* Reached an empty slot; haven't seen this id before */ - map[i].old = old_id; - map[i].cur = cur_id; - return true; - } + for (i = 0; i < idmap->cnt; i++) { if (map[i].old == old_id) return map[i].cur == cur_id; if (map[i].cur == cur_id) return false; } + + /* Reached the end of known mappings; haven't seen this id before */ + if (idmap->cnt < BPF_ID_MAP_SIZE) { + map[idmap->cnt].old = old_id; + map[idmap->cnt].cur = cur_id; + idmap->cnt++; + return true; + } + /* We ran out of idmap slots, which should be impossible */ WARN_ON_ONCE(1); return false; @@ -19470,8 +19473,10 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat static void reset_idmap_scratch(struct bpf_verifier_env *env) { - env->idmap_scratch.tmp_id_gen = env->id_gen; - memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map)); + struct bpf_idmap *idmap = &env->idmap_scratch; + + idmap->tmp_id_gen = env->id_gen; + idmap->cnt = 0; } static bool states_equal(struct bpf_verifier_env *env, -- 2.39.5