The stacktrace map can be easily full, which will lead to failure in obtaining the stack. In addition to increasing the size of the map, another solution is to delete the stack_id after looking it up from the user, so extend the existing bpf_map_lookup_and_delete_elem() functionality to stacktrace map types. Signed-off-by: Tao Chen --- include/linux/bpf.h | 2 +- kernel/bpf/stackmap.c | 16 ++++++++++++++-- kernel/bpf/syscall.c | 8 +++++--- 3 files changed, 20 insertions(+), 6 deletions(-) Change list: v1 -> v2: - typo s/detele/delete/.(Jiri) - make sure following lookup fails after deleting the stack_id with NOENT.(Jiri) - use '&key' directly as the update value.(Jiri) v1: https://lore.kernel.org/bpf/20250908113622.810652-1-chen.dylane@linux.dev v2 -> v3: - rename bpf_stackmap_copy_and_delete with bpf_stackmap_extract.(Andrii) v2: https://lore.kernel.org/bpf/20250909163223.864120-1-chen.dylane@linux.dev v3 -> v4: - describe what the test is for in commit message.(Leon) - 'val_buf' should stay on a single line.(Leon) v3: https://lore.kernel.org/bpf/20250920155211.1354348-1-chen.dylane@linux.dev v4 -> v5: - refactor stacktrace_map with skeleton in patch2.(Andrii) - replace CHECK*() with ASSERT_xx() and just check err from bpf_map_lookup_elem only.(Andrii) v4: https://lore.kernel.org/bpf/20250922140317.1468691-1-chen.dylane@linux.dev v5 -> v6: - fix test_stacktrace_map_raw_tp fail.(Daniel) v5: https://lore.kernel.org/bpf/20250923165849.1524622-1-chen.dylane@linux.dev diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8f6e87f0f3a..9d6f7671ba1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2703,7 +2703,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, u64 flags); -int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value); +int bpf_stackmap_extract(struct bpf_map *map, void *key, void *value, bool delete); int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 3615c06b7df..2e182a3ac4c 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -646,7 +646,15 @@ static void *stack_map_lookup_elem(struct bpf_map *map, void *key) } /* Called from syscall */ -int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) +static int stack_map_lookup_and_delete_elem(struct bpf_map *map, void *key, + void *value, u64 flags) +{ + return bpf_stackmap_extract(map, key, value, true); +} + +/* Called from syscall */ +int bpf_stackmap_extract(struct bpf_map *map, void *key, void *value, + bool delete) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct stack_map_bucket *bucket, *old_bucket; @@ -663,7 +671,10 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) memcpy(value, bucket->data, trace_len); memset(value + trace_len, 0, map->value_size - trace_len); - old_bucket = xchg(&smap->buckets[id], bucket); + if (delete) + old_bucket = bucket; + else + old_bucket = xchg(&smap->buckets[id], bucket); if (old_bucket) pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); return 0; @@ -754,6 +765,7 @@ const struct bpf_map_ops stack_trace_map_ops = { .map_free = stack_map_free, .map_get_next_key = stack_map_get_next_key, .map_lookup_elem = stack_map_lookup_elem, + .map_lookup_and_delete_elem = stack_map_lookup_and_delete_elem, .map_update_elem = stack_map_update_elem, .map_delete_elem = stack_map_delete_elem, .map_check_btf = map_check_no_btf, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0fbfa8532c3..c380a2576fc 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -318,7 +318,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { err = bpf_percpu_cgroup_storage_copy(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { - err = bpf_stackmap_copy(map, key, value); + err = bpf_stackmap_extract(map, key, value, false); } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) { err = bpf_fd_array_map_lookup_elem(map, key, value); } else if (IS_FD_HASH(map)) { @@ -1627,7 +1627,8 @@ struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) } EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); -int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) +int __weak bpf_stackmap_extract(struct bpf_map *map, void *key, void *value, + bool delete) { return -ENOTSUPP; } @@ -2158,7 +2159,8 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) } else if (map->map_type == BPF_MAP_TYPE_HASH || map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_HASH || - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_STACK_TRACE) { if (!bpf_map_is_offloaded(map)) { bpf_disable_instrumentation(); rcu_read_lock(); -- 2.48.1 The loading method of the stacktrace_map test case looks too outdated, refactor it with skeleton, and we can use global avariable feature in the next patch. Signed-off-by: Tao Chen --- .../selftests/bpf/prog_tests/stacktrace_map.c | 52 ++++++++----------- .../bpf/prog_tests/stacktrace_map_raw_tp.c | 2 +- ...test_stacktrace_map.c => stacktrace_map.c} | 0 3 files changed, 22 insertions(+), 32 deletions(-) rename tools/testing/selftests/bpf/progs/{test_stacktrace_map.c => stacktrace_map.c} (100%) diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c index 84a7e405e91..0a79bf1d354 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c @@ -1,46 +1,38 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include "stacktrace_map.skel.h" void test_stacktrace_map(void) { + struct stacktrace_map *skel; int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; - const char *prog_name = "oncpu"; - int err, prog_fd, stack_trace_len; - const char *file = "./test_stacktrace_map.bpf.o"; + int err, stack_trace_len; __u32 key, val, duration = 0; - struct bpf_program *prog; - struct bpf_object *obj; - struct bpf_link *link; - err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); - if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + skel = stacktrace_map__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) return; - prog = bpf_object__find_program_by_name(obj, prog_name); - if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name)) - goto close_prog; - - link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch"); - if (!ASSERT_OK_PTR(link, "attach_tp")) - goto close_prog; - /* find map fds */ - control_map_fd = bpf_find_map(__func__, obj, "control_map"); + control_map_fd = bpf_map__fd(skel->maps.control_map); if (CHECK_FAIL(control_map_fd < 0)) - goto disable_pmu; + goto out; - stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); + stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap); if (CHECK_FAIL(stackid_hmap_fd < 0)) - goto disable_pmu; + goto out; - stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); + stackmap_fd = bpf_map__fd(skel->maps.stackmap); if (CHECK_FAIL(stackmap_fd < 0)) - goto disable_pmu; + goto out; - stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); + stack_amap_fd = bpf_map__fd(skel->maps.stack_amap); if (CHECK_FAIL(stack_amap_fd < 0)) - goto disable_pmu; + goto out; + err = stacktrace_map__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; /* give some time for bpf program run */ sleep(1); @@ -55,21 +47,19 @@ void test_stacktrace_map(void) err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto out; err = compare_map_keys(stackmap_fd, stackid_hmap_fd); if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto out; stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64); err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len); if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto out; -disable_pmu: - bpf_link__destroy(link); -close_prog: - bpf_object__close(obj); +out: + stacktrace_map__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c index e0cb4697b4b..e985d51d3d4 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c @@ -5,7 +5,7 @@ void test_stacktrace_map_raw_tp(void) { const char *prog_name = "oncpu"; int control_map_fd, stackid_hmap_fd, stackmap_fd; - const char *file = "./test_stacktrace_map.bpf.o"; + const char *file = "./stacktrace_map.bpf.o"; __u32 key, val, duration = 0; int err, prog_fd; struct bpf_program *prog; diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/stacktrace_map.c similarity index 100% rename from tools/testing/selftests/bpf/progs/test_stacktrace_map.c rename to tools/testing/selftests/bpf/progs/stacktrace_map.c -- 2.48.1 Add tests for stacktrace map lookup and delete: 1. use bpf_map_lookup_and_delete_elem to lookup and delete the target stack_id, 2. lookup the deleted stack_id again to double check. Signed-off-by: Tao Chen --- .../testing/selftests/bpf/prog_tests/stacktrace_map.c | 11 ++++++++++- tools/testing/selftests/bpf/progs/stacktrace_map.c | 2 ++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c index 0a79bf1d354..ec93918fe3c 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c @@ -7,7 +7,8 @@ void test_stacktrace_map(void) struct stacktrace_map *skel; int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; int err, stack_trace_len; - __u32 key, val, duration = 0; + __u32 key, val, stack_id, duration = 0; + __u64 stack[PERF_MAX_STACK_DEPTH]; skel = stacktrace_map__open_and_load(); if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) @@ -60,6 +61,14 @@ void test_stacktrace_map(void) "err %d errno %d\n", err, errno)) goto out; + stack_id = skel->bss->stack_id; + err = bpf_map_lookup_and_delete_elem(stackmap_fd, &stack_id, stack); + if (!ASSERT_OK(err, "lookup and delete target stack_id")) + goto out; + + err = bpf_map_lookup_elem(stackmap_fd, &stack_id, stack); + if (!ASSERT_EQ(err, -ENOENT, "lookup deleted stack_id")) + goto out; out: stacktrace_map__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/stacktrace_map.c b/tools/testing/selftests/bpf/progs/stacktrace_map.c index 47568007b66..0c77df05be7 100644 --- a/tools/testing/selftests/bpf/progs/stacktrace_map.c +++ b/tools/testing/selftests/bpf/progs/stacktrace_map.c @@ -50,6 +50,7 @@ struct sched_switch_args { int next_prio; }; +__u32 stack_id; SEC("tracepoint/sched/sched_switch") int oncpu(struct sched_switch_args *ctx) { @@ -64,6 +65,7 @@ int oncpu(struct sched_switch_args *ctx) /* The size of stackmap and stackid_hmap should be the same */ key = bpf_get_stackid(ctx, &stackmap, 0); if ((int)key >= 0) { + stack_id = key; bpf_map_update_elem(&stackid_hmap, &key, &val, 0); stack_p = bpf_map_lookup_elem(&stack_amap, &key); if (stack_p) -- 2.48.1