Sleepable BPF programs can only be attached to selected functions. For convenience, the error injection list was originally used, which contains syscalls and several other functions. When error injection is disabled (CONFIG_FUNCTION_ERROR_INJECTION=n), that list is empty and sleepable tracing programs are effectively unavailable. In such a case, at least enable sleepable programs on syscalls. For discussion why syscalls were chosen, see [1]. To detect that a function is a syscall handler, we check for arch-specific prefixes for the most common architectures. Unfortunately, the prefixes are hard-coded in arch syscall code so we need to hard-code them, too. [1] https://lore.kernel.org/bpf/CAADnVQK6qP8izg+k9yV0vdcT-+=axtFQ2fKw7D-2Ei-V6WS5Dw@mail.gmail.com/ Signed-off-by: Viktor Malik Acked-by: Kumar Kartikeya Dwivedi Acked-by: Leon Hwang --- kernel/bpf/verifier.c | 60 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7aa06f534cb2..a52e57f3eb80 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -24961,6 +24961,8 @@ static int check_attach_modify_return(unsigned long addr, const char *func_name) return -EINVAL; } +#ifdef CONFIG_FUNCTION_ERROR_INJECTION + /* list of non-sleepable functions that are otherwise on * ALLOW_ERROR_INJECTION list */ @@ -24982,6 +24984,57 @@ static int check_non_sleepable_error_inject(u32 btf_id) return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id); } +static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name) +{ + /* fentry/fexit/fmod_ret progs can be sleepable if they are + * attached to ALLOW_ERROR_INJECTION and are not in denylist. + */ + if (!check_non_sleepable_error_inject(btf_id) && + within_error_injection_list(addr)) + return 0; + + return -EINVAL; +} + +#else + +/* Unfortunately, the arch-specific prefixes are hard-coded in arch syscall code + * so we need to hard-code them, too. Ftrace has arch_syscall_match_sym_name() + * but that just compares two concrete function names. + */ +static bool has_arch_syscall_prefix(const char *func_name) +{ +#if defined(__x86_64__) + return !strncmp(func_name, "__x64_", 6); +#elif defined(__i386__) + return !strncmp(func_name, "__ia32_", 7); +#elif defined(__s390x__) + return !strncmp(func_name, "__s390x_", 8); +#elif defined(__aarch64__) + return !strncmp(func_name, "__arm64_", 8); +#elif defined(__riscv) + return !strncmp(func_name, "__riscv_", 8); +#elif defined(__powerpc__) || defined(__powerpc64__) + return !strncmp(func_name, "sys_", 4); +#elif defined(__loongarch__) + return !strncmp(func_name, "sys_", 4); +#else + return false; +#endif +} + +/* Without error injection, allow sleepable progs on syscalls. */ + +static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name) +{ + if (has_arch_syscall_prefix(func_name)) + return 0; + + return -EINVAL; +} + +#endif /* CONFIG_FUNCTION_ERROR_INJECTION */ + int bpf_check_attach_target(struct bpf_verifier_log *log, const struct bpf_prog *prog, const struct bpf_prog *tgt_prog, @@ -25261,12 +25314,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, ret = -EINVAL; switch (prog->type) { case BPF_PROG_TYPE_TRACING: - - /* fentry/fexit/fmod_ret progs can be sleepable if they are - * attached to ALLOW_ERROR_INJECTION and are not in denylist. - */ - if (!check_non_sleepable_error_inject(btf_id) && - within_error_injection_list(addr)) + if (!check_attach_sleepable(btf_id, addr, tname)) ret = 0; /* fentry/fexit/fmod_ret progs can also be sleepable if they are * in the fmodret id set with the KF_SLEEPABLE flag. -- 2.53.0 fmod_ret BPF programs can only be attached to selected functions. For convenience, the error injection list was originally used (along with functions prefixed with "security_"), which contains syscalls and several other functions. When error injection is disabled (CONFIG_FUNCTION_ERROR_INJECTION=n), that list is empty and fmod_ret programs are effectively unavailable for most of the functions. In such a case, at least enable fmod_ret programs on syscalls. Signed-off-by: Viktor Malik Acked-by: Kumar Kartikeya Dwivedi Acked-by: Leon Hwang --- kernel/bpf/verifier.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a52e57f3eb80..8e4f69918693 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -24952,15 +24952,6 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env) } #define SECURITY_PREFIX "security_" -static int check_attach_modify_return(unsigned long addr, const char *func_name) -{ - if (within_error_injection_list(addr) || - !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1)) - return 0; - - return -EINVAL; -} - #ifdef CONFIG_FUNCTION_ERROR_INJECTION /* list of non-sleepable functions that are otherwise on @@ -24996,6 +24987,15 @@ static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *fu return -EINVAL; } +static int check_attach_modify_return(unsigned long addr, const char *func_name) +{ + if (within_error_injection_list(addr) || + !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1)) + return 0; + + return -EINVAL; +} + #else /* Unfortunately, the arch-specific prefixes are hard-coded in arch syscall code @@ -25023,7 +25023,7 @@ static bool has_arch_syscall_prefix(const char *func_name) #endif } -/* Without error injection, allow sleepable progs on syscalls. */ +/* Without error injection, allow sleepable and fmod_ret progs on syscalls. */ static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name) { @@ -25033,6 +25033,15 @@ static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *fu return -EINVAL; } +static int check_attach_modify_return(unsigned long addr, const char *func_name) +{ + if (has_arch_syscall_prefix(func_name) || + !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1)) + return 0; + + return -EINVAL; +} + #endif /* CONFIG_FUNCTION_ERROR_INJECTION */ int bpf_check_attach_target(struct bpf_verifier_log *log, -- 2.53.0 Now that sleepable programs are always enabled on syscalls, let refcounted_kptr tests use syscalls rather than bpf_testmod_test_read, which is not sleepable with error injection disabled. The tests just check that the verifier can handle usage of RCU locks in sleepable programs and never actually attach. So, the attachment target doesn't matter (as long as it is sleepable) and with syscalls, the tests pass on kernels with disabled error injection. Signed-off-by: Viktor Malik Acked-by: Kumar Kartikeya Dwivedi --- tools/testing/selftests/bpf/progs/refcounted_kptr.c | 4 ++-- tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c index 1aca85d86aeb..c847398837cc 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c @@ -500,7 +500,7 @@ long rbtree_wrong_owner_remove_fail_a2(void *ctx) return 0; } -SEC("?fentry.s/bpf_testmod_test_read") +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") __success int BPF_PROG(rbtree_sleepable_rcu, struct file *file, struct kobject *kobj, @@ -534,7 +534,7 @@ int BPF_PROG(rbtree_sleepable_rcu, return 0; } -SEC("?fentry.s/bpf_testmod_test_read") +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") __success int BPF_PROG(rbtree_sleepable_rcu_no_explicit_rcu_lock, struct file *file, struct kobject *kobj, diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c index 836c8ab7b908..b2808bfcec29 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c @@ -93,7 +93,7 @@ long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx) return 0; } -SEC("?fentry.s/bpf_testmod_test_read") +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") __failure __msg("function calls are not allowed while holding a lock") int BPF_PROG(rbtree_fail_sleepable_lock_across_rcu, struct file *file, struct kobject *kobj, -- 2.53.0