Introduce a kernel module that will exercise lock acquisition in the NMI path, and bias toward creating contention such that NMI waiters end up being non-head waiters. Prior to the rqspinlock fix made in the commit 0d80e7f951be ("rqspinlock: Choose trylock fallback for NMI waiters"), it was possible for the queueing path of non-head waiters to get stuck in NMI, which this stress test reproduces fairly easily with just 3 CPUs. Both AA and ABBA flavors are supported, and it will serve as a test case for future fixes that address this corner case. More information about the problem in question is available in the commit cited above. When the fix is reverted, this stress test will lock up the system. To enable this test automatically through the test_progs infrastructure, add a load_module_params API to exercise both AA and ABBA cases when running the test. Note that the test runs for at most 5 seconds, and becomes a noop after that, in order to allow the system to make forward progress. In addition, CPU 0 is always kept untouched by the created threads and NMIs. The test will automatically scale to the number of available online CPUs. Note that at least 3 CPUs are necessary to run this test, hence skip the selftest in case the environment has less than 3 CPUs available. Signed-off-by: Kumar Kartikeya Dwivedi --- tools/testing/selftests/bpf/Makefile | 2 +- .../selftests/bpf/prog_tests/res_spin_lock.c | 16 ++ .../testing/selftests/bpf/test_kmods/Makefile | 2 +- .../bpf/test_kmods/bpf_test_rqspinlock.c | 208 ++++++++++++++++++ tools/testing/selftests/bpf/testing_helpers.c | 14 +- tools/testing/selftests/bpf/testing_helpers.h | 1 + 6 files changed, 239 insertions(+), 4 deletions(-) create mode 100644 tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 0b6ee902bce5..f00587d4ede6 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -120,7 +120,7 @@ TEST_PROGS_EXTENDED := \ test_bpftool.py TEST_KMODS := bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \ - bpf_test_modorder_y.ko + bpf_test_modorder_y.ko bpf_test_rqspinlock.ko TEST_KMOD_TARGETS = $(addprefix $(OUTPUT)/,$(TEST_KMODS)) # Compile but not part of 'make run_tests' diff --git a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c index 0703e987df89..8c6c2043a432 100644 --- a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c @@ -99,3 +99,19 @@ void test_res_spin_lock_success(void) res_spin_lock__destroy(skel); return; } + +void serial_test_res_spin_lock_stress(void) +{ + if (libbpf_num_possible_cpus() < 3) { + test__skip(); + return; + } + + ASSERT_OK(load_module("bpf_test_rqspinlock.ko", false), "load module AA"); + sleep(5); + unload_module("bpf_test_rqspinlock", false); + + ASSERT_OK(load_module_params("bpf_test_rqspinlock.ko", "test_ab=1", false), "load module ABBA"); + sleep(5); + unload_module("bpf_test_rqspinlock", false); +} diff --git a/tools/testing/selftests/bpf/test_kmods/Makefile b/tools/testing/selftests/bpf/test_kmods/Makefile index d4e50c4509c9..63c4d3f6a12f 100644 --- a/tools/testing/selftests/bpf/test_kmods/Makefile +++ b/tools/testing/selftests/bpf/test_kmods/Makefile @@ -8,7 +8,7 @@ Q = @ endif MODULES = bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \ - bpf_test_modorder_y.ko + bpf_test_modorder_y.ko bpf_test_rqspinlock.ko $(foreach m,$(MODULES),$(eval obj-m += $(m:.ko=.o))) diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c new file mode 100644 index 000000000000..dc8620715ac1 --- /dev/null +++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct perf_event_attr hw_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .size = sizeof(struct perf_event_attr), + .pinned = 1, + .disabled = 1, + .sample_period = 100000, +}; + +static rqspinlock_t lock_a; +static rqspinlock_t lock_b; + +static struct perf_event **rqsl_evts; +static int rqsl_nevts; + +static bool test_ab = false; +module_param(test_ab, bool, 0644); +MODULE_PARM_DESC(test_ab, "Test ABBA situations instead of AA situations"); + +static struct task_struct **rqsl_threads; +static int rqsl_nthreads; +static atomic_t rqsl_ready_cpus = ATOMIC_INIT(0); + +static int pause = 0; + +static bool nmi_locks_a(int cpu) +{ + return (cpu & 1) && test_ab; +} + +static int rqspinlock_worker_fn(void *arg) +{ + int cpu = smp_processor_id(); + unsigned long flags; + int ret; + + if (cpu) { + atomic_inc(&rqsl_ready_cpus); + + while (!kthread_should_stop()) { + if (READ_ONCE(pause)) { + msleep(1000); + continue; + } + if (nmi_locks_a(cpu)) + ret = raw_res_spin_lock_irqsave(&lock_b, flags); + else + ret = raw_res_spin_lock_irqsave(&lock_a, flags); + mdelay(20); + if (nmi_locks_a(cpu) && !ret) + raw_res_spin_unlock_irqrestore(&lock_b, flags); + else if (!ret) + raw_res_spin_unlock_irqrestore(&lock_a, flags); + cpu_relax(); + } + return 0; + } + + while (!kthread_should_stop()) { + int expected = rqsl_nthreads > 0 ? rqsl_nthreads - 1 : 0; + int ready = atomic_read(&rqsl_ready_cpus); + + if (ready == expected && !READ_ONCE(pause)) { + for (int i = 0; i < rqsl_nevts; i++) + perf_event_enable(rqsl_evts[i]); + pr_err("Waiting 5 secs to pause the test\n"); + msleep(1000 * 5); + WRITE_ONCE(pause, 1); + pr_err("Paused the test\n"); + } else { + msleep(1000); + cpu_relax(); + } + } + return 0; +} + +static void nmi_cb(struct perf_event *event, struct perf_sample_data *data, + struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + unsigned long flags; + int ret; + + if (!cpu || READ_ONCE(pause)) + return; + + if (nmi_locks_a(cpu)) + ret = raw_res_spin_lock_irqsave(&lock_a, flags); + else + ret = raw_res_spin_lock_irqsave(test_ab ? &lock_b : &lock_a, flags); + + mdelay(10); + + if (nmi_locks_a(cpu) && !ret) + raw_res_spin_unlock_irqrestore(&lock_a, flags); + else if (!ret) + raw_res_spin_unlock_irqrestore(test_ab ? &lock_b : &lock_a, flags); +} + +static void free_rqsl_threads(void) { + int i; + + if (rqsl_threads) { + for_each_online_cpu(i) { + if (rqsl_threads[i]) + kthread_stop(rqsl_threads[i]); + } + kfree(rqsl_threads); + } +} + +static void free_rqsl_evts(void) +{ + int i; + + if (rqsl_evts) { + for (i = 0; i < rqsl_nevts; i++) { + if (rqsl_evts[i]) + perf_event_release_kernel(rqsl_evts[i]); + } + kfree(rqsl_evts); + } +} + +static int bpf_test_rqspinlock_init(void) +{ + int i, ret; + int ncpus = num_online_cpus(); + + pr_err("Mode = %s\n", test_ab ? "ABBA" : "AA"); + + if (ncpus < 3) + return -ENOTSUPP; + + raw_res_spin_lock_init(&lock_a); + raw_res_spin_lock_init(&lock_b); + + rqsl_evts = kcalloc(ncpus - 1, sizeof(*rqsl_evts), GFP_KERNEL); + if (!rqsl_evts) + return -ENOMEM; + rqsl_nevts = ncpus - 1; + for (i = 1; i < ncpus; i++) { + struct perf_event *e; + + e = perf_event_create_kernel_counter(&hw_attr, i, NULL, nmi_cb, NULL); + if (IS_ERR(e)) { + ret = PTR_ERR(e); + goto err_perf_events; + } + rqsl_evts[i - 1] = e; + } + + rqsl_threads = kcalloc(ncpus, sizeof(*rqsl_threads), GFP_KERNEL); + if (!rqsl_threads) { + ret = -ENOMEM; + goto err_perf_alloc; + } + rqsl_nthreads = ncpus; + + for_each_online_cpu(i) { + struct task_struct *t; + + t = kthread_create(rqspinlock_worker_fn, NULL, "rqsl_w/%d", i); + if (IS_ERR(t)) { + ret = PTR_ERR(t); + goto err_threads_create; + } + kthread_bind(t, i); + rqsl_threads[i] = t; + wake_up_process(t); + } + return 0; + +err_threads_create: + free_rqsl_threads(); +err_perf_alloc: + free_rqsl_evts(); +err_perf_events: + return ret; +} + +module_init(bpf_test_rqspinlock_init); + +static void bpf_test_rqspinlock_exit(void) +{ + free_rqsl_threads(); + free_rqsl_evts(); +} + +module_exit(bpf_test_rqspinlock_exit); + +MODULE_AUTHOR("Kumar Kartikeya Dwivedi"); +MODULE_DESCRIPTION("BPF rqspinlock stress test module"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 5e9f16683be5..16eb37e5bad6 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -399,7 +399,7 @@ int unload_module(const char *name, bool verbose) return 0; } -int load_module(const char *path, bool verbose) +static int __load_module(const char *path, const char *param_values, bool verbose) { int fd; @@ -411,7 +411,7 @@ int load_module(const char *path, bool verbose) fprintf(stdout, "Can't find %s kernel module: %d\n", path, -errno); return -ENOENT; } - if (finit_module(fd, "", 0)) { + if (finit_module(fd, param_values, 0)) { fprintf(stdout, "Failed to load %s into the kernel: %d\n", path, -errno); close(fd); return -EINVAL; @@ -423,6 +423,16 @@ int load_module(const char *path, bool verbose) return 0; } +int load_module_params(const char *path, const char *param_values, bool verbose) +{ + return __load_module(path, param_values, verbose); +} + +int load_module(const char *path, bool verbose) +{ + return __load_module(path, "", verbose); +} + int unload_bpf_testmod(bool verbose) { return unload_module("bpf_testmod", verbose); diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index 46d7f7089f63..eb20d3772218 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -39,6 +39,7 @@ int kern_sync_rcu(void); int finit_module(int fd, const char *param_values, int flags); int delete_module(const char *name, int flags); int load_module(const char *path, bool verbose); +int load_module_params(const char *path, const char *param_values, bool verbose); int unload_module(const char *name, bool verbose); static inline __u64 get_time_ns(void) -- 2.51.0