Add generic BPF syscall support for passing common attributes. The initial set of common attributes includes: 1. 'log_buf': User-provided buffer for storing logs. 2. 'log_size': Size of the log buffer. 3. 'log_level': Log verbosity level. 4. 'log_true_size': Actual log size reported by kernel. The common-attribute pointer and its size are passed as the 4th and 5th syscall arguments. A new command bit, 'BPF_COMMON_ATTRS' ('1 << 16'), indicates that common attributes are supplied. This commit adds syscall and uapi plumbing. Command-specific handling is added in follow-up patches. Signed-off-by: Leon Hwang --- include/linux/syscalls.h | 3 ++- include/uapi/linux/bpf.h | 8 ++++++++ kernel/bpf/syscall.c | 25 +++++++++++++++++++++---- tools/include/uapi/linux/bpf.h | 8 ++++++++ 4 files changed, 39 insertions(+), 5 deletions(-) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f5639d5ac331..50055ab73649 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -936,7 +936,8 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, asmlinkage long sys_getrandom(char __user *buf, size_t count, unsigned int flags); asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags); -asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size); +asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size, + struct bpf_common_attr __user *attr_common, unsigned int size_common); asmlinkage long sys_execveat(int dfd, const char __user *filename, const char __user *const __user *argv, const char __user *const __user *envp, int flags); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 552bc5d9afbd..aec171ccb6ef 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -994,6 +994,7 @@ enum bpf_cmd { BPF_PROG_STREAM_READ_BY_FD, BPF_PROG_ASSOC_STRUCT_OPS, __MAX_BPF_CMD, + BPF_COMMON_ATTRS = 1 << 16, /* Indicate carrying syscall common attrs. */ }; enum bpf_map_type { @@ -1500,6 +1501,13 @@ struct bpf_stack_build_id { }; }; +struct bpf_common_attr { + __aligned_u64 log_buf; + __u32 log_size; + __u32 log_level; + __u32 log_true_size; +}; + #define BPF_OBJ_NAME_LEN 16U enum { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 3b1f0ba02f61..354f6f471a08 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -6211,8 +6211,10 @@ static int prog_assoc_struct_ops(union bpf_attr *attr) return ret; } -static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) +static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, + bpfptr_t uattr_common, unsigned int size_common) { + struct bpf_common_attr attr_common; union bpf_attr attr; int err; @@ -6226,6 +6228,20 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) if (copy_from_bpfptr(&attr, uattr, size) != 0) return -EFAULT; + memset(&attr_common, 0, sizeof(attr_common)); + if (cmd & BPF_COMMON_ATTRS) { + err = bpf_check_uarg_tail_zero(uattr_common, sizeof(attr_common), size_common); + if (err) + return err; + + cmd &= ~BPF_COMMON_ATTRS; + size_common = min_t(u32, size_common, sizeof(attr_common)); + if (copy_from_bpfptr(&attr_common, uattr_common, size_common) != 0) + return -EFAULT; + } else { + size_common = 0; + } + err = security_bpf(cmd, &attr, size, uattr.is_kernel); if (err < 0) return err; @@ -6361,9 +6377,10 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) return err; } -SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) +SYSCALL_DEFINE5(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size, + struct bpf_common_attr __user *, uattr_common, unsigned int, size_common) { - return __sys_bpf(cmd, USER_BPFPTR(uattr), size); + return __sys_bpf(cmd, USER_BPFPTR(uattr), size, USER_BPFPTR(uattr_common), size_common); } static bool syscall_prog_is_valid_access(int off, int size, @@ -6393,7 +6410,7 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) default: return -EINVAL; } - return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size); + return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size, KERNEL_BPFPTR(NULL), 0); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 677be9a47347..37142e6d911a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -994,6 +994,7 @@ enum bpf_cmd { BPF_PROG_STREAM_READ_BY_FD, BPF_PROG_ASSOC_STRUCT_OPS, __MAX_BPF_CMD, + BPF_COMMON_ATTRS = 1 << 16, /* Indicate carrying syscall common attrs. */ }; enum bpf_map_type { @@ -1500,6 +1501,13 @@ struct bpf_stack_build_id { }; }; +struct bpf_common_attr { + __aligned_u64 log_buf; + __u32 log_size; + __u32 log_level; + __u32 log_true_size; +}; + #define BPF_OBJ_NAME_LEN 16U enum { -- 2.54.0 To support the extended BPF syscall introduced in the previous commit, introduce the following internal APIs: * 'sys_bpf_ext()' * 'sys_bpf_ext_fd()' They wrap the raw 'syscall()' interface to support passing extended attributes. * 'probe_sys_bpf_ext()' Check whether current kernel supports the BPF syscall common attributes. Acked-by: Andrii Nakryiko Signed-off-by: Leon Hwang --- tools/lib/bpf/bpf.c | 36 +++++++++++++++++++++++++++++++++ tools/lib/bpf/features.c | 8 ++++++++ tools/lib/bpf/libbpf_internal.h | 3 +++ 3 files changed, 47 insertions(+) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 5846de364209..9d8740761b7a 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -69,6 +69,42 @@ static inline __u64 ptr_to_u64(const void *ptr) return (__u64) (unsigned long) ptr; } +static inline int sys_bpf_ext(enum bpf_cmd cmd, union bpf_attr *attr, + unsigned int size, + struct bpf_common_attr *attr_common, + unsigned int size_common) +{ + cmd = attr_common ? (cmd | BPF_COMMON_ATTRS) : (cmd & ~BPF_COMMON_ATTRS); + return syscall(__NR_bpf, cmd, attr, size, attr_common, size_common); +} + +static inline int sys_bpf_ext_fd(enum bpf_cmd cmd, union bpf_attr *attr, + unsigned int size, + struct bpf_common_attr *attr_common, + unsigned int size_common) +{ + int fd; + + fd = sys_bpf_ext(cmd, attr, size, attr_common, size_common); + return ensure_good_fd(fd); +} + +int probe_sys_bpf_ext(void) +{ + const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd); + union bpf_attr attr; + int fd; + + memset(&attr, 0, attr_sz); + fd = syscall(__NR_bpf, BPF_PROG_LOAD | BPF_COMMON_ATTRS, &attr, attr_sz, NULL, + sizeof(struct bpf_common_attr)); + if (fd >= 0) { + close(fd); + return -EINVAL; + } + return errno == EFAULT ? 1 : 0; +} + static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, unsigned int size) { diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c index 4f19a0d79b0c..b7e388f99d0b 100644 --- a/tools/lib/bpf/features.c +++ b/tools/lib/bpf/features.c @@ -615,6 +615,11 @@ static int probe_kern_btf_layout(int token_fd) (char *)layout, token_fd)); } +static int probe_bpf_syscall_common_attrs(int token_fd) +{ + return probe_sys_bpf_ext(); +} + typedef int (*feature_probe_fn)(int /* token_fd */); static struct kern_feature_cache feature_cache; @@ -699,6 +704,9 @@ static struct kern_feature_desc { [FEAT_BTF_LAYOUT] = { "kernel supports BTF layout", probe_kern_btf_layout, }, + [FEAT_BPF_SYSCALL_COMMON_ATTRS] = { + "BPF syscall common attributes support", probe_bpf_syscall_common_attrs, + }, }; bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 3781c45b46d3..7d93c6c01d60 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -398,6 +398,8 @@ enum kern_feature_id { FEAT_UPROBE_SYSCALL, /* Kernel supports BTF layout information */ FEAT_BTF_LAYOUT, + /* Kernel supports BPF syscall common attributes */ + FEAT_BPF_SYSCALL_COMMON_ATTRS, __FEAT_CNT, }; @@ -768,4 +770,5 @@ int probe_fd(int fd); #define SHA256_DWORD_SIZE SHA256_DIGEST_LENGTH / sizeof(__u64) void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]); +int probe_sys_bpf_ext(void); #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ -- 2.54.0 The next commit will add support for reporting logs via extended common attributes, including 'log_true_size'. To prepare for that, refactor the 'log_true_size' reporting logic by introducing a new struct bpf_log_attr to encapsulate log-related behavior: * bpf_log_attr_init(): initialize log fields, which will support extended common attributes in the next commit. * bpf_log_attr_finalize(): handle log finalization and write back 'log_true_size' to userspace. Acked-by: Andrii Nakryiko Signed-off-by: Leon Hwang --- include/linux/bpf.h | 4 +++- include/linux/bpf_verifier.h | 12 ++++++++++++ kernel/bpf/log.c | 29 +++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 12 +++++++++--- kernel/bpf/verifier.c | 17 ++++------------- 5 files changed, 57 insertions(+), 17 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 14759972f148..9e16e91647d3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2919,7 +2919,9 @@ int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size, size_t actual_size); /* verify correctness of eBPF program */ -int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size); +struct bpf_log_attr; +int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr, + struct bpf_log_attr *attr_log); #ifndef CONFIG_BPF_JIT_ALWAYS_ON void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 976e2b2f40e8..8d27ad1f9f94 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -755,6 +755,18 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) return log && log->level; } +struct bpf_log_attr { + char __user *ubuf; + u32 size; + u32 level; + u32 offsetof_true_size; + bpfptr_t uattr; +}; + +int bpf_log_attr_init(struct bpf_log_attr *log, u64 log_buf, u32 log_size, u32 log_level, + u32 offsetof_log_true_size, bpfptr_t uattr); +int bpf_log_attr_finalize(struct bpf_log_attr *attr, struct bpf_verifier_log *log); + #define BPF_MAX_SUBPROGS 256 struct bpf_subprog_arg_info { diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 64566b86dd27..1b1efe75398b 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -825,3 +825,32 @@ void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_st } print_verifier_state(env, vstate, frameno, false); } + +int bpf_log_attr_init(struct bpf_log_attr *log, u64 log_buf, u32 log_size, u32 log_level, + u32 offsetof_log_true_size, bpfptr_t uattr) +{ + char __user *ubuf = u64_to_user_ptr(log_buf); + + memset(log, 0, sizeof(*log)); + log->ubuf = ubuf; + log->size = log_size; + log->level = log_level; + log->offsetof_true_size = offsetof_log_true_size; + log->uattr = uattr; + return 0; +} + +int bpf_log_attr_finalize(struct bpf_log_attr *attr, struct bpf_verifier_log *log) +{ + u32 log_true_size; + int err; + + err = bpf_vlog_finalize(log, &log_true_size); + + if (attr->offsetof_true_size && + copy_to_bpfptr_offset(attr->uattr, attr->offsetof_true_size, &log_true_size, + sizeof(log_true_size))) + return -EFAULT; + + return err; +} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 354f6f471a08..70b78ddcdedb 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2861,7 +2861,7 @@ static int bpf_prog_mark_insn_arrays_ready(struct bpf_prog *prog) /* last field in 'union bpf_attr' used by this command */ #define BPF_PROG_LOAD_LAST_FIELD keyring_id -static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) +static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, struct bpf_log_attr *attr_log) { enum bpf_prog_type type = attr->prog_type; struct bpf_prog *prog, *dst_prog = NULL; @@ -3079,7 +3079,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) goto free_prog_sec; /* run eBPF verifier */ - err = bpf_check(&prog, attr, uattr, uattr_size); + err = bpf_check(&prog, attr, uattr, attr_log); if (err < 0) goto free_used_maps; @@ -6215,6 +6215,8 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, bpfptr_t uattr_common, unsigned int size_common) { struct bpf_common_attr attr_common; + u32 offsetof_log_true_size = 0; + struct bpf_log_attr attr_log; union bpf_attr attr; int err; @@ -6266,7 +6268,11 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, err = map_freeze(&attr); break; case BPF_PROG_LOAD: - err = bpf_prog_load(&attr, uattr, size); + if (size >= offsetofend(union bpf_attr, log_true_size)) + offsetof_log_true_size = offsetof(union bpf_attr, log_true_size); + err = bpf_log_attr_init(&attr_log, attr.log_buf, attr.log_size, attr.log_level, + offsetof_log_true_size, uattr); + err = err ?: bpf_prog_load(&attr, uattr, &attr_log); break; case BPF_OBJ_PIN: err = bpf_obj_pin(&attr); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 11054ad89c14..0e654ef01ae0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -19294,12 +19294,12 @@ int bpf_fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return 0; } -int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) +int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, + struct bpf_log_attr *attr_log) { u64 start_time = ktime_get_ns(); struct bpf_verifier_env *env; int i, len, ret = -EINVAL, err; - u32 log_true_size; bool is_priv; BTF_TYPE_EMIT(enum bpf_features); @@ -19346,9 +19346,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 /* user could have requested verbose verifier output * and supplied buffer to store the verification trace */ - ret = bpf_vlog_init(&env->log, attr->log_level, - (char __user *) (unsigned long) attr->log_buf, - attr->log_size); + ret = bpf_vlog_init(&env->log, attr_log->level, attr_log->ubuf, attr_log->size); if (ret) goto err_unlock; @@ -19510,17 +19508,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 env->prog->aux->verified_insns = env->insn_processed; /* preserve original error even if log finalization is successful */ - err = bpf_vlog_finalize(&env->log, &log_true_size); + err = bpf_log_attr_finalize(attr_log, &env->log); if (err) ret = err; - if (uattr_size >= offsetofend(union bpf_attr, log_true_size) && - copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size), - &log_true_size, sizeof(log_true_size))) { - ret = -EFAULT; - goto err_release_maps; - } - if (ret) goto err_release_maps; -- 2.54.0 BPF_PROG_LOAD can now take log parameters from both union bpf_attr and struct bpf_common_attr. The merge rules are: - if both sides provide a complete log tuple (buf/size/level) and they match, use it; - if only one side provides log parameters, use that one; - if both sides provide complete tuples but they differ, return -EINVAL. Signed-off-by: Leon Hwang --- include/linux/bpf_verifier.h | 3 ++- kernel/bpf/log.c | 34 +++++++++++++++++++++++++++------- kernel/bpf/syscall.c | 3 ++- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 8d27ad1f9f94..8433430dedb7 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -764,7 +764,8 @@ struct bpf_log_attr { }; int bpf_log_attr_init(struct bpf_log_attr *log, u64 log_buf, u32 log_size, u32 log_level, - u32 offsetof_log_true_size, bpfptr_t uattr); + u32 offsetof_log_true_size, bpfptr_t uattr, struct bpf_common_attr *common, + bpfptr_t uattr_common, u32 size_common); int bpf_log_attr_finalize(struct bpf_log_attr *attr, struct bpf_verifier_log *log); #define BPF_MAX_SUBPROGS 256 diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 1b1efe75398b..fd12ad5a0338 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -13,17 +13,17 @@ #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args) -static bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log) +static bool bpf_verifier_log_attr_valid(u32 log_level, char __user *log_buf, u32 log_size) { /* ubuf and len_total should both be specified (or not) together */ - if (!!log->ubuf != !!log->len_total) + if (!!log_buf != !!log_size) return false; /* log buf without log_level is meaningless */ - if (log->ubuf && log->level == 0) + if (log_buf && log_level == 0) return false; - if (log->level & ~BPF_LOG_MASK) + if (log_level & ~BPF_LOG_MASK) return false; - if (log->len_total > UINT_MAX >> 2) + if (log_size > UINT_MAX >> 2) return false; return true; } @@ -36,7 +36,7 @@ int bpf_vlog_init(struct bpf_verifier_log *log, u32 log_level, log->len_total = log_size; /* log attributes have to be sane */ - if (!bpf_verifier_log_attr_valid(log)) + if (!bpf_verifier_log_attr_valid(log_level, log_buf, log_size)) return -EINVAL; return 0; @@ -827,16 +827,36 @@ void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_st } int bpf_log_attr_init(struct bpf_log_attr *log, u64 log_buf, u32 log_size, u32 log_level, - u32 offsetof_log_true_size, bpfptr_t uattr) + u32 offsetof_log_true_size, bpfptr_t uattr, struct bpf_common_attr *common, + bpfptr_t uattr_common, u32 size_common) { + char __user *ubuf_common = u64_to_user_ptr(common->log_buf); char __user *ubuf = u64_to_user_ptr(log_buf); + if (!bpf_verifier_log_attr_valid(common->log_level, ubuf_common, common->log_size) || + !bpf_verifier_log_attr_valid(log_level, ubuf, log_size)) + return -EINVAL; + + if (ubuf && ubuf_common && (ubuf != ubuf_common || log_size != common->log_size || + log_level != common->log_level)) + return -EINVAL; + memset(log, 0, sizeof(*log)); log->ubuf = ubuf; log->size = log_size; log->level = log_level; log->offsetof_true_size = offsetof_log_true_size; log->uattr = uattr; + + if (!ubuf && ubuf_common) { + log->ubuf = ubuf_common; + log->size = common->log_size; + log->level = common->log_level; + log->uattr = uattr_common; + log->offsetof_true_size = 0; + if (size_common >= offsetofend(struct bpf_common_attr, log_true_size)) + log->offsetof_true_size = offsetof(struct bpf_common_attr, log_true_size); + } return 0; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 70b78ddcdedb..db893cae826c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -6271,7 +6271,8 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, if (size >= offsetofend(union bpf_attr, log_true_size)) offsetof_log_true_size = offsetof(union bpf_attr, log_true_size); err = bpf_log_attr_init(&attr_log, attr.log_buf, attr.log_size, attr.log_level, - offsetof_log_true_size, uattr); + offsetof_log_true_size, uattr, &attr_common, uattr_common, + size_common); err = err ?: bpf_prog_load(&attr, uattr, &attr_log); break; case BPF_OBJ_PIN: -- 2.54.0 BPF_BTF_LOAD can now take log parameters from both union bpf_attr and struct bpf_common_attr, with the same merge rules as BPF_PROG_LOAD: - if both sides provide a complete log tuple (buf/size/level) and they match, use it; - if only one side provides log parameters, use that one; - if both sides provide complete tuples but they differ, return -EINVAL. Acked-by: Andrii Nakryiko Signed-off-by: Leon Hwang --- include/linux/btf.h | 3 ++- kernel/bpf/btf.c | 30 +++++++----------------------- kernel/bpf/syscall.c | 11 ++++++++--- 3 files changed, 17 insertions(+), 27 deletions(-) diff --git a/include/linux/btf.h b/include/linux/btf.h index c82d0d689059..240401d9b25b 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -145,7 +145,8 @@ const char *btf_get_name(const struct btf *btf); void btf_get(struct btf *btf); void btf_put(struct btf *btf); const struct btf_header *btf_header(const struct btf *btf); -int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz); +struct bpf_log_attr; +int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_log_attr *attr_log); struct btf *btf_get_by_fd(int fd); int btf_get_info_by_fd(const struct btf *btf, const union bpf_attr *attr, diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 77af44d8a3ad..a6bf4781943c 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5907,25 +5907,10 @@ static int btf_check_type_tags(struct btf_verifier_env *env, return 0; } -static int finalize_log(struct bpf_verifier_log *log, bpfptr_t uattr, u32 uattr_size) -{ - u32 log_true_size; - int err; - - err = bpf_vlog_finalize(log, &log_true_size); - - if (uattr_size >= offsetofend(union bpf_attr, btf_log_true_size) && - copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, btf_log_true_size), - &log_true_size, sizeof(log_true_size))) - err = -EFAULT; - - return err; -} - -static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) +static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, + struct bpf_log_attr *attr_log) { bpfptr_t btf_data = make_bpfptr(attr->btf, uattr.is_kernel); - char __user *log_ubuf = u64_to_user_ptr(attr->btf_log_buf); struct btf_struct_metas *struct_meta_tab; struct btf_verifier_env *env = NULL; struct btf *btf = NULL; @@ -5942,8 +5927,7 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uat /* user could have requested verbose verifier output * and supplied buffer to store the verification trace */ - err = bpf_vlog_init(&env->log, attr->btf_log_level, - log_ubuf, attr->btf_log_size); + err = bpf_vlog_init(&env->log, attr_log->level, attr_log->ubuf, attr_log->size); if (err) goto errout_free; @@ -6008,7 +5992,7 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uat } } - err = finalize_log(&env->log, uattr, uattr_size); + err = bpf_log_attr_finalize(attr_log, &env->log); if (err) goto errout_free; @@ -6020,7 +6004,7 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uat btf_free_struct_meta_tab(btf); errout: /* overwrite err with -ENOSPC or -EFAULT */ - ret = finalize_log(&env->log, uattr, uattr_size); + ret = bpf_log_attr_finalize(attr_log, &env->log); if (ret) err = ret; errout_free: @@ -8189,12 +8173,12 @@ static int __btf_new_fd(struct btf *btf) return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC); } -int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) +int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_log_attr *attr_log) { struct btf *btf; int ret; - btf = btf_parse(attr, uattr, uattr_size); + btf = btf_parse(attr, uattr, attr_log); if (IS_ERR(btf)) return PTR_ERR(btf); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index db893cae826c..2fa05ba8f161 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5474,7 +5474,7 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, #define BPF_BTF_LOAD_LAST_FIELD btf_token_fd -static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) +static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_log_attr *attr_log) { struct bpf_token *token = NULL; @@ -5501,7 +5501,7 @@ static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_ bpf_token_put(token); - return btf_new_fd(attr, uattr, uattr_size); + return btf_new_fd(attr, uattr, attr_log); } #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD fd_by_id_token_fd @@ -6318,7 +6318,12 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, err = bpf_raw_tracepoint_open(&attr); break; case BPF_BTF_LOAD: - err = bpf_btf_load(&attr, uattr, size); + if (size >= offsetofend(union bpf_attr, btf_log_true_size)) + offsetof_log_true_size = offsetof(union bpf_attr, btf_log_true_size); + err = bpf_log_attr_init(&attr_log, attr.btf_log_buf, attr.btf_log_size, + attr.btf_log_level, offsetof_log_true_size, uattr, + &attr_common, uattr_common, size_common); + err = err ?: bpf_btf_load(&attr, uattr, &attr_log); break; case BPF_BTF_GET_FD_BY_ID: err = bpf_btf_get_fd_by_id(&attr); -- 2.54.0 Many BPF_MAP_CREATE validation failures currently return -EINVAL without any explanation to userspace. Plumb common syscall log attributes into map_create(), create a verifier log from bpf_common_attr::log_buf/log_size/log_level, and report map-creation failure reasons through that buffer. This improves debuggability by allowing userspace to inspect why map creation failed and read back log_true_size from common attributes. Signed-off-by: Leon Hwang --- include/linux/bpf_verifier.h | 3 ++ kernel/bpf/log.c | 29 ++++++++++++++++ kernel/bpf/syscall.c | 66 ++++++++++++++++++++++++++++++------ 3 files changed, 88 insertions(+), 10 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 8433430dedb7..c15a4c26a43b 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -766,6 +766,9 @@ struct bpf_log_attr { int bpf_log_attr_init(struct bpf_log_attr *log, u64 log_buf, u32 log_size, u32 log_level, u32 offsetof_log_true_size, bpfptr_t uattr, struct bpf_common_attr *common, bpfptr_t uattr_common, u32 size_common); +struct bpf_verifier_log *bpf_log_attr_create_vlog(struct bpf_log_attr *attr_log, + struct bpf_common_attr *common, bpfptr_t uattr, + u32 size); int bpf_log_attr_finalize(struct bpf_log_attr *attr, struct bpf_verifier_log *log); #define BPF_MAX_SUBPROGS 256 diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index fd12ad5a0338..62fe6ed18374 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -860,6 +860,35 @@ int bpf_log_attr_init(struct bpf_log_attr *log, u64 log_buf, u32 log_size, u32 l return 0; } +struct bpf_verifier_log *bpf_log_attr_create_vlog(struct bpf_log_attr *attr_log, + struct bpf_common_attr *common, bpfptr_t uattr, + u32 size) +{ + struct bpf_verifier_log *log; + int err; + + memset(attr_log, 0, sizeof(*attr_log)); + attr_log->uattr = uattr; + if (size >= offsetofend(struct bpf_common_attr, log_true_size)) + attr_log->offsetof_true_size = offsetof(struct bpf_common_attr, log_true_size); + + if (!size) + return NULL; + + log = kzalloc_obj(*log, GFP_KERNEL); + if (!log) + return ERR_PTR(-ENOMEM); + + err = bpf_vlog_init(log, common->log_level, u64_to_user_ptr(common->log_buf), + common->log_size); + if (err) { + kfree(log); + return ERR_PTR(err); + } + + return log; +} + int bpf_log_attr_finalize(struct bpf_log_attr *attr, struct bpf_verifier_log *log) { u32 log_true_size; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2fa05ba8f161..6600e126fbfb 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1359,7 +1359,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token, #define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size /* called via syscall */ -static int map_create(union bpf_attr *attr, bpfptr_t uattr) +static int __map_create(union bpf_attr *attr, bpfptr_t uattr, struct bpf_verifier_log *log) { const struct bpf_map_ops *ops; struct bpf_token *token = NULL; @@ -1371,8 +1371,10 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) int err; err = CHECK_ATTR(BPF_MAP_CREATE); - if (err) + if (err) { + bpf_log(log, "Invalid attr.\n"); return -EINVAL; + } /* check BPF_F_TOKEN_FD flag, remember if it's set, and then clear it * to avoid per-map type checks tripping on unknown flag @@ -1381,17 +1383,25 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) attr->map_flags &= ~BPF_F_TOKEN_FD; if (attr->btf_vmlinux_value_type_id) { - if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || - attr->btf_key_type_id || attr->btf_value_type_id) + if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS) { + bpf_log(log, "btf_vmlinux_value_type_id can only be used with struct_ops maps.\n"); return -EINVAL; + } + if (attr->btf_key_type_id || attr->btf_value_type_id) { + bpf_log(log, "btf_vmlinux_value_type_id is mutually exclusive with btf_key_type_id and btf_value_type_id.\n"); + return -EINVAL; + } } else if (attr->btf_key_type_id && !attr->btf_value_type_id) { + bpf_log(log, "Invalid btf_value_type_id.\n"); return -EINVAL; } if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && attr->map_type != BPF_MAP_TYPE_ARENA && - attr->map_extra != 0) + attr->map_extra != 0) { + bpf_log(log, "Invalid map_extra.\n"); return -EINVAL; + } f_flags = bpf_get_file_flag(attr->map_flags); if (f_flags < 0) @@ -1399,13 +1409,17 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) if (numa_node != NUMA_NO_NODE && ((unsigned int)numa_node >= nr_node_ids || - !node_online(numa_node))) + !node_online(numa_node))) { + bpf_log(log, "Invalid numa_node.\n"); return -EINVAL; + } /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ map_type = attr->map_type; - if (map_type >= ARRAY_SIZE(bpf_map_types)) + if (map_type >= ARRAY_SIZE(bpf_map_types)) { + bpf_log(log, "Invalid map_type.\n"); return -EINVAL; + } map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types)); ops = bpf_map_types[map_type]; if (!ops) @@ -1423,8 +1437,10 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) if (token_flag) { token = bpf_token_get_from_fd(attr->map_token_fd); - if (IS_ERR(token)) + if (IS_ERR(token)) { + bpf_log(log, "Invalid map_token_fd.\n"); return PTR_ERR(token); + } /* if current token doesn't grant map creation permissions, * then we can't use this token, so ignore it and rely on @@ -1507,8 +1523,10 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) err = bpf_obj_name_cpy(map->name, attr->map_name, sizeof(attr->map_name)); - if (err < 0) + if (err < 0) { + bpf_log(log, "Invalid map_name.\n"); goto free_map; + } preempt_disable(); map->cookie = gen_cookie_next(&bpf_map_cookie); @@ -1531,6 +1549,7 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) btf = btf_get_by_fd(attr->btf_fd); if (IS_ERR(btf)) { + bpf_log(log, "Invalid btf_fd.\n"); err = PTR_ERR(btf); goto free_map; } @@ -1558,6 +1577,7 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) bpfptr_t uprog_hash = make_bpfptr(attr->excl_prog_hash, uattr.is_kernel); if (attr->excl_prog_hash_size != SHA256_DIGEST_SIZE) { + bpf_log(log, "Invalid excl_prog_hash_size.\n"); err = -EINVAL; goto free_map; } @@ -1573,6 +1593,7 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) goto free_map; } } else if (attr->excl_prog_hash_size) { + bpf_log(log, "Invalid excl_prog_hash_size.\n"); err = -EINVAL; goto free_map; } @@ -1611,6 +1632,31 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) return err; } +static int map_create(union bpf_attr *attr, bpfptr_t uattr, struct bpf_common_attr *attr_common, + bpfptr_t uattr_common, u32 size_common) +{ + struct bpf_verifier_log *log; + struct bpf_log_attr attr_log; + int err, ret; + + log = bpf_log_attr_create_vlog(&attr_log, attr_common, uattr_common, size_common); + if (IS_ERR(log)) + return PTR_ERR(log); + + err = __map_create(attr, uattr, log); + + /* preserve original error even if log finalization is successful */ + ret = bpf_log_attr_finalize(&attr_log, log); + if (ret) { + if (err >= 0) + close_fd(err); + err = ret; + } + + kfree(log); + return err; +} + void bpf_map_inc(struct bpf_map *map) { atomic64_inc(&map->refcnt); @@ -6250,7 +6296,7 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, switch (cmd) { case BPF_MAP_CREATE: - err = map_create(&attr, uattr); + err = map_create(&attr, uattr, &attr_common, uattr_common, size_common); break; case BPF_MAP_LOOKUP_ELEM: err = map_lookup_elem(&attr); -- 2.54.0 With the previous commit adding common attribute support for BPF_MAP_CREATE, users can now retrieve detailed error messages when map creation fails via the log_buf field. Introduce struct bpf_log_opts with the following fields: log_buf, log_size, log_level, and log_true_size. Extend bpf_map_create_opts with a new field log_opts, allowing users to capture and inspect log messages on map creation failures. Acked-by: Andrii Nakryiko Signed-off-by: Leon Hwang --- tools/lib/bpf/bpf.c | 16 +++++++++++++++- tools/lib/bpf/bpf.h | 17 ++++++++++++++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 9d8740761b7a..483c02cf21d1 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -209,6 +209,9 @@ int bpf_map_create(enum bpf_map_type map_type, const struct bpf_map_create_opts *opts) { const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size); + const size_t attr_common_sz = sizeof(struct bpf_common_attr); + struct bpf_common_attr attr_common; + struct bpf_log_opts *log_opts; union bpf_attr attr; int fd; @@ -242,7 +245,18 @@ int bpf_map_create(enum bpf_map_type map_type, attr.excl_prog_hash = ptr_to_u64(OPTS_GET(opts, excl_prog_hash, NULL)); attr.excl_prog_hash_size = OPTS_GET(opts, excl_prog_hash_size, 0); - fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); + log_opts = OPTS_GET(opts, log_opts, NULL); + if (log_opts && feat_supported(NULL, FEAT_BPF_SYSCALL_COMMON_ATTRS)) { + memset(&attr_common, 0, attr_common_sz); + attr_common.log_buf = ptr_to_u64(OPTS_GET(log_opts, buf, NULL)); + attr_common.log_size = OPTS_GET(log_opts, size, 0); + attr_common.log_level = OPTS_GET(log_opts, level, 0); + fd = sys_bpf_ext_fd(BPF_MAP_CREATE, &attr, attr_sz, &attr_common, attr_common_sz); + OPTS_SET(log_opts, true_size, attr_common.log_true_size); + } else { + fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); + OPTS_SET(log_opts, true_size, 0); + } return libbpf_err_errno(fd); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 2c8e88ddb674..2312900a3263 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -37,6 +37,18 @@ extern "C" { LIBBPF_API int libbpf_set_memlock_rlim(size_t memlock_bytes); +struct bpf_log_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + + char *buf; + __u32 size; + __u32 level; + __u32 true_size; /* out parameter set by kernel */ + + size_t :0; +}; +#define bpf_log_opts__last_field true_size + struct bpf_map_create_opts { size_t sz; /* size of this struct for forward/backward compatibility */ @@ -57,9 +69,12 @@ struct bpf_map_create_opts { const void *excl_prog_hash; __u32 excl_prog_hash_size; + + struct bpf_log_opts *log_opts; + size_t :0; }; -#define bpf_map_create_opts__last_field excl_prog_hash_size +#define bpf_map_create_opts__last_field log_opts LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, const char *map_name, -- 2.54.0 Add tests to verify that the kernel reports the expected error messages and correct log_true_size when map creation fails. Signed-off-by: Leon Hwang --- .../selftests/bpf/prog_tests/map_init.c | 166 ++++++++++++++++++ 1 file changed, 166 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c index 14a31109dd0e..5c61c8e37306 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_init.c +++ b/tools/testing/selftests/bpf/prog_tests/map_init.c @@ -212,3 +212,169 @@ void test_map_init(void) if (test__start_subtest("pcpu_lru_map_init")) test_pcpu_lru_map_init(); } + +static void test_map_create(enum bpf_map_type map_type, const char *map_name, + struct bpf_map_create_opts *opts, const char *exp_msg) +{ + const int key_size = 4, value_size = 4, max_entries = 1; + char log_buf[128]; + int fd; + LIBBPF_OPTS(bpf_log_opts, log_opts); + + log_buf[0] = '\0'; + log_opts.buf = log_buf; + log_opts.size = sizeof(log_buf); + log_opts.level = 1; + opts->log_opts = &log_opts; + fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, opts); + if (!ASSERT_LT(fd, 0, "bpf_map_create")) { + close(fd); + return; + } + + ASSERT_STREQ(log_buf, exp_msg, "log_buf"); + ASSERT_EQ(log_opts.true_size, strlen(exp_msg) + 1, "true_size"); +} + +static void test_map_create_array(struct bpf_map_create_opts *opts, const char *exp_msg) +{ + test_map_create(BPF_MAP_TYPE_ARRAY, "test_map_create", opts, exp_msg); +} + +static void test_invalid_vmlinux_value_type_id_struct_ops(void) +{ + const char *msg = "btf_vmlinux_value_type_id can only be used with struct_ops maps.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_vmlinux_value_type_id = 1, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_vmlinux_value_type_id_kv_type_id(void) +{ + const char *msg = "btf_vmlinux_value_type_id is mutually exclusive with btf_key_type_id and btf_value_type_id.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_vmlinux_value_type_id = 1, + .btf_key_type_id = 1, + ); + + test_map_create(BPF_MAP_TYPE_STRUCT_OPS, "test_map_create", &opts, msg); +} + +static void test_invalid_value_type_id(void) +{ + const char *msg = "Invalid btf_value_type_id.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_key_type_id = 1, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_map_extra(void) +{ + const char *msg = "Invalid map_extra.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_extra = 1, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_numa_node(void) +{ + const char *msg = "Invalid numa_node.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_flags = BPF_F_NUMA_NODE, + .numa_node = 0xFF, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_map_type(void) +{ + const char *msg = "Invalid map_type.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts); + + test_map_create(__MAX_BPF_MAP_TYPE, "test_map_create", &opts, msg); +} + +static void test_invalid_token_fd(void) +{ + const char *msg = "Invalid map_token_fd.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_flags = BPF_F_TOKEN_FD, + .token_fd = 0xFF, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_map_name(void) +{ + const char *msg = "Invalid map_name.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts); + + test_map_create(BPF_MAP_TYPE_ARRAY, "test-!@#", &opts, msg); +} + +static void test_invalid_btf_fd(void) +{ + const char *msg = "Invalid btf_fd.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_fd = -1, + .btf_key_type_id = 1, + .btf_value_type_id = 1, + ); + + test_map_create_array(&opts, msg); +} + +static void test_excl_prog_hash_size_1(void) +{ + const char *msg = "Invalid excl_prog_hash_size.\n"; + const char *hash = "DEADCODE"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .excl_prog_hash = hash, + ); + + test_map_create_array(&opts, msg); +} + +static void test_excl_prog_hash_size_2(void) +{ + const char *msg = "Invalid excl_prog_hash_size.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .excl_prog_hash_size = 1, + ); + + test_map_create_array(&opts, msg); +} + +void test_map_create_failure(void) +{ + if (test__start_subtest("invalid_vmlinux_value_type_id_struct_ops")) + test_invalid_vmlinux_value_type_id_struct_ops(); + if (test__start_subtest("invalid_vmlinux_value_type_id_kv_type_id")) + test_invalid_vmlinux_value_type_id_kv_type_id(); + if (test__start_subtest("invalid_value_type_id")) + test_invalid_value_type_id(); + if (test__start_subtest("invalid_map_extra")) + test_invalid_map_extra(); + if (test__start_subtest("invalid_numa_node")) + test_invalid_numa_node(); + if (test__start_subtest("invalid_map_type")) + test_invalid_map_type(); + if (test__start_subtest("invalid_token_fd")) + test_invalid_token_fd(); + if (test__start_subtest("invalid_map_name")) + test_invalid_map_name(); + if (test__start_subtest("invalid_btf_fd")) + test_invalid_btf_fd(); + if (test__start_subtest("invalid_excl_prog_hash_size_1")) + test_excl_prog_hash_size_1(); + if (test__start_subtest("invalid_excl_prog_hash_size_2")) + test_excl_prog_hash_size_2(); +} -- 2.54.0