Add generic BPF syscall support for passing common attributes. The initial set of common attributes includes: 1. 'log_buf': User-provided buffer for storing logs. 2. 'log_size': Size of the log buffer. 3. 'log_level': Log verbosity level. 4. 'log_true_size': Actual log size reported by kernel. The common-attribute pointer and its size are passed as the 4th and 5th syscall arguments. A new command bit, 'BPF_COMMON_ATTRS' ('1 << 16'), indicates that common attributes are supplied. This commit adds syscall and uapi plumbing. Command-specific handling is added in follow-up patches. Signed-off-by: Leon Hwang --- include/linux/syscalls.h | 3 ++- include/uapi/linux/bpf.h | 8 ++++++++ kernel/bpf/syscall.c | 25 +++++++++++++++++++++---- tools/include/uapi/linux/bpf.h | 8 ++++++++ 4 files changed, 39 insertions(+), 5 deletions(-) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 02bd6ddb6278..f4848c64872f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -936,7 +936,8 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, asmlinkage long sys_getrandom(char __user *buf, size_t count, unsigned int flags); asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags); -asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size); +asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size, + struct bpf_common_attr __user *attr_common, unsigned int size_common); asmlinkage long sys_execveat(int dfd, const char __user *filename, const char __user *const __user *argv, const char __user *const __user *envp, int flags); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 552bc5d9afbd..49eeb18ad050 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -994,6 +994,7 @@ enum bpf_cmd { BPF_PROG_STREAM_READ_BY_FD, BPF_PROG_ASSOC_STRUCT_OPS, __MAX_BPF_CMD, + BPF_COMMON_ATTRS = 1 << 16, /* Indicate carrying syscall common attrs. */ }; enum bpf_map_type { @@ -1500,6 +1501,13 @@ struct bpf_stack_build_id { }; }; +struct bpf_common_attr { + __u64 log_buf; + __u32 log_size; + __u32 log_level; + __u32 log_true_size; +}; + #define BPF_OBJ_NAME_LEN 16U enum { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b73b25c63073..5bf84ea21bf5 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -6210,8 +6210,10 @@ static int prog_assoc_struct_ops(union bpf_attr *attr) return ret; } -static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) +static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, + bpfptr_t uattr_common, unsigned int size_common) { + struct bpf_common_attr attr_common; union bpf_attr attr; int err; @@ -6225,6 +6227,20 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) if (copy_from_bpfptr(&attr, uattr, size) != 0) return -EFAULT; + memset(&attr_common, 0, sizeof(attr_common)); + if (cmd & BPF_COMMON_ATTRS) { + err = bpf_check_uarg_tail_zero(uattr_common, sizeof(attr_common), size_common); + if (err) + return err; + + cmd &= ~BPF_COMMON_ATTRS; + size_common = min_t(u32, size_common, sizeof(attr_common)); + if (copy_from_bpfptr(&attr_common, uattr_common, size_common) != 0) + return -EFAULT; + } else { + size_common = 0; + } + err = security_bpf(cmd, &attr, size, uattr.is_kernel); if (err < 0) return err; @@ -6360,9 +6376,10 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) return err; } -SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) +SYSCALL_DEFINE5(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size, + struct bpf_common_attr __user *, uattr_common, unsigned int, size_common) { - return __sys_bpf(cmd, USER_BPFPTR(uattr), size); + return __sys_bpf(cmd, USER_BPFPTR(uattr), size, USER_BPFPTR(uattr_common), size_common); } static bool syscall_prog_is_valid_access(int off, int size, @@ -6392,7 +6409,7 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) default: return -EINVAL; } - return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size); + return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size, KERNEL_BPFPTR(NULL), 0); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 677be9a47347..16ff0968fc21 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -994,6 +994,7 @@ enum bpf_cmd { BPF_PROG_STREAM_READ_BY_FD, BPF_PROG_ASSOC_STRUCT_OPS, __MAX_BPF_CMD, + BPF_COMMON_ATTRS = 1 << 16, /* Indicate carrying syscall common attrs. */ }; enum bpf_map_type { @@ -1500,6 +1501,13 @@ struct bpf_stack_build_id { }; }; +struct bpf_common_attr { + __u64 log_buf; + __u32 log_size; + __u32 log_level; + __u32 log_true_size; +}; + #define BPF_OBJ_NAME_LEN 16U enum { -- 2.53.0 To support the extended BPF syscall introduced in the previous commit, introduce the following internal APIs: * 'sys_bpf_ext()' * 'sys_bpf_ext_fd()' They wrap the raw 'syscall()' interface to support passing extended attributes. * 'probe_sys_bpf_ext()' Check whether current kernel supports the BPF syscall common attributes. Acked-by: Andrii Nakryiko Signed-off-by: Leon Hwang --- tools/lib/bpf/bpf.c | 36 +++++++++++++++++++++++++++++++++ tools/lib/bpf/features.c | 8 ++++++++ tools/lib/bpf/libbpf_internal.h | 3 +++ 3 files changed, 47 insertions(+) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 5846de364209..9d8740761b7a 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -69,6 +69,42 @@ static inline __u64 ptr_to_u64(const void *ptr) return (__u64) (unsigned long) ptr; } +static inline int sys_bpf_ext(enum bpf_cmd cmd, union bpf_attr *attr, + unsigned int size, + struct bpf_common_attr *attr_common, + unsigned int size_common) +{ + cmd = attr_common ? (cmd | BPF_COMMON_ATTRS) : (cmd & ~BPF_COMMON_ATTRS); + return syscall(__NR_bpf, cmd, attr, size, attr_common, size_common); +} + +static inline int sys_bpf_ext_fd(enum bpf_cmd cmd, union bpf_attr *attr, + unsigned int size, + struct bpf_common_attr *attr_common, + unsigned int size_common) +{ + int fd; + + fd = sys_bpf_ext(cmd, attr, size, attr_common, size_common); + return ensure_good_fd(fd); +} + +int probe_sys_bpf_ext(void) +{ + const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd); + union bpf_attr attr; + int fd; + + memset(&attr, 0, attr_sz); + fd = syscall(__NR_bpf, BPF_PROG_LOAD | BPF_COMMON_ATTRS, &attr, attr_sz, NULL, + sizeof(struct bpf_common_attr)); + if (fd >= 0) { + close(fd); + return -EINVAL; + } + return errno == EFAULT ? 1 : 0; +} + static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, unsigned int size) { diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c index 4f19a0d79b0c..b7e388f99d0b 100644 --- a/tools/lib/bpf/features.c +++ b/tools/lib/bpf/features.c @@ -615,6 +615,11 @@ static int probe_kern_btf_layout(int token_fd) (char *)layout, token_fd)); } +static int probe_bpf_syscall_common_attrs(int token_fd) +{ + return probe_sys_bpf_ext(); +} + typedef int (*feature_probe_fn)(int /* token_fd */); static struct kern_feature_cache feature_cache; @@ -699,6 +704,9 @@ static struct kern_feature_desc { [FEAT_BTF_LAYOUT] = { "kernel supports BTF layout", probe_kern_btf_layout, }, + [FEAT_BPF_SYSCALL_COMMON_ATTRS] = { + "BPF syscall common attributes support", probe_bpf_syscall_common_attrs, + }, }; bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index cabdaef79098..21ccdf4d9284 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -398,6 +398,8 @@ enum kern_feature_id { FEAT_UPROBE_SYSCALL, /* Kernel supports BTF layout information */ FEAT_BTF_LAYOUT, + /* Kernel supports BPF syscall common attributes */ + FEAT_BPF_SYSCALL_COMMON_ATTRS, __FEAT_CNT, }; @@ -767,4 +769,5 @@ int probe_fd(int fd); #define SHA256_DWORD_SIZE SHA256_DIGEST_LENGTH / sizeof(__u64) void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]); +int probe_sys_bpf_ext(void); #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ -- 2.53.0 The next commit will add support for reporting logs via extended common attributes, including 'log_true_size'. To prepare for that, refactor the 'log_true_size' reporting logic by introducing a new struct bpf_log_attr to encapsulate log-related behavior: * bpf_log_attr_init(): initialize log fields, which will support extended common attributes in the next commit. * bpf_log_attr_finalize(): handle log finalization and write back 'log_true_size' to userspace. Acked-by: Andrii Nakryiko Signed-off-by: Leon Hwang --- include/linux/bpf.h | 4 +++- include/linux/bpf_verifier.h | 12 ++++++++++++ kernel/bpf/log.c | 29 +++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 12 +++++++++--- kernel/bpf/verifier.c | 17 ++++------------- 5 files changed, 57 insertions(+), 17 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 30d35d5fe40b..76b28153224a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2912,7 +2912,9 @@ int bpf_check_uarg_tail_zero(bpfptr_t uaddr, size_t expected_size, size_t actual_size); /* verify correctness of eBPF program */ -int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size); +struct bpf_log_attr; +int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, bpfptr_t uattr, + struct bpf_log_attr *attr_log); #ifndef CONFIG_BPF_JIT_ALWAYS_ON void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 36bfd96d4563..1162b5492841 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -647,6 +647,18 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) return log && log->level; } +struct bpf_log_attr { + char __user *ubuf; + u32 size; + u32 level; + u32 offsetof_true_size; + bpfptr_t uattr; +}; + +int bpf_log_attr_init(struct bpf_log_attr *log, u64 log_buf, u32 log_size, u32 log_level, + u32 offsetof_log_true_size, bpfptr_t uattr); +int bpf_log_attr_finalize(struct bpf_log_attr *attr, struct bpf_verifier_log *log); + #define BPF_MAX_SUBPROGS 256 struct bpf_subprog_arg_info { diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 37d72b052192..9c5c755d5972 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -865,3 +865,32 @@ void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_st } print_verifier_state(env, vstate, frameno, false); } + +int bpf_log_attr_init(struct bpf_log_attr *log, u64 log_buf, u32 log_size, u32 log_level, + u32 offsetof_log_true_size, bpfptr_t uattr) +{ + char __user *ubuf = u64_to_user_ptr(log_buf); + + memset(log, 0, sizeof(*log)); + log->ubuf = ubuf; + log->size = log_size; + log->level = log_level; + log->offsetof_true_size = offsetof_log_true_size; + log->uattr = uattr; + return 0; +} + +int bpf_log_attr_finalize(struct bpf_log_attr *attr, struct bpf_verifier_log *log) +{ + u32 log_true_size; + int err; + + err = bpf_vlog_finalize(log, &log_true_size); + + if (attr->offsetof_true_size && + copy_to_bpfptr_offset(attr->uattr, attr->offsetof_true_size, &log_true_size, + sizeof(log_true_size))) + return -EFAULT; + + return err; +} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 5bf84ea21bf5..b560dab9bfa8 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2861,7 +2861,7 @@ static int bpf_prog_mark_insn_arrays_ready(struct bpf_prog *prog) /* last field in 'union bpf_attr' used by this command */ #define BPF_PROG_LOAD_LAST_FIELD keyring_id -static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) +static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, struct bpf_log_attr *attr_log) { enum bpf_prog_type type = attr->prog_type; struct bpf_prog *prog, *dst_prog = NULL; @@ -3079,7 +3079,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) goto free_prog_sec; /* run eBPF verifier */ - err = bpf_check(&prog, attr, uattr, uattr_size); + err = bpf_check(&prog, attr, uattr, attr_log); if (err < 0) goto free_used_maps; @@ -6214,6 +6214,8 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, bpfptr_t uattr_common, unsigned int size_common) { struct bpf_common_attr attr_common; + u32 offsetof_log_true_size = 0; + struct bpf_log_attr attr_log; union bpf_attr attr; int err; @@ -6265,7 +6267,11 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, err = map_freeze(&attr); break; case BPF_PROG_LOAD: - err = bpf_prog_load(&attr, uattr, size); + if (size >= offsetofend(union bpf_attr, log_true_size)) + offsetof_log_true_size = offsetof(union bpf_attr, log_true_size); + err = bpf_log_attr_init(&attr_log, attr.log_buf, attr.log_size, attr.log_level, + offsetof_log_true_size, uattr); + err = err ?: bpf_prog_load(&attr, uattr, &attr_log); break; case BPF_OBJ_PIN: err = bpf_obj_pin(&attr); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 594260c1f382..668a3628013e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -26526,12 +26526,12 @@ static int compute_scc(struct bpf_verifier_env *env) return err; } -int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) +int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, + struct bpf_log_attr *attr_log) { u64 start_time = ktime_get_ns(); struct bpf_verifier_env *env; int i, len, ret = -EINVAL, err; - u32 log_true_size; bool is_priv; BTF_TYPE_EMIT(enum bpf_features); @@ -26578,9 +26578,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 /* user could have requested verbose verifier output * and supplied buffer to store the verification trace */ - ret = bpf_vlog_init(&env->log, attr->log_level, - (char __user *) (unsigned long) attr->log_buf, - attr->log_size); + ret = bpf_vlog_init(&env->log, attr_log->level, attr_log->ubuf, attr_log->size); if (ret) goto err_unlock; @@ -26742,17 +26740,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 env->prog->aux->verified_insns = env->insn_processed; /* preserve original error even if log finalization is successful */ - err = bpf_vlog_finalize(&env->log, &log_true_size); + err = bpf_log_attr_finalize(attr_log, &env->log); if (err) ret = err; - if (uattr_size >= offsetofend(union bpf_attr, log_true_size) && - copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size), - &log_true_size, sizeof(log_true_size))) { - ret = -EFAULT; - goto err_release_maps; - } - if (ret) goto err_release_maps; -- 2.53.0 BPF_PROG_LOAD can now take log parameters from both union bpf_attr and struct bpf_common_attr. The merge rules are: - if both sides provide a complete log tuple (buf/size/level) and they match, use it; - if only one side provides log parameters, use that one; - if both sides provide complete tuples but they differ, return -EINVAL. Signed-off-by: Leon Hwang --- include/linux/bpf_verifier.h | 3 ++- kernel/bpf/log.c | 34 +++++++++++++++++++++++++++------- kernel/bpf/syscall.c | 3 ++- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 1162b5492841..9045dc88c5b6 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -656,7 +656,8 @@ struct bpf_log_attr { }; int bpf_log_attr_init(struct bpf_log_attr *log, u64 log_buf, u32 log_size, u32 log_level, - u32 offsetof_log_true_size, bpfptr_t uattr); + u32 offsetof_log_true_size, bpfptr_t uattr, struct bpf_common_attr *common, + bpfptr_t uattr_common, u32 size_common); int bpf_log_attr_finalize(struct bpf_log_attr *attr, struct bpf_verifier_log *log); #define BPF_MAX_SUBPROGS 256 diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 9c5c755d5972..dc8734e27752 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -13,17 +13,17 @@ #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args) -static bool bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log) +static bool bpf_verifier_log_attr_valid(u32 log_level, char __user *log_buf, u32 log_size) { /* ubuf and len_total should both be specified (or not) together */ - if (!!log->ubuf != !!log->len_total) + if (!!log_buf != !!log_size) return false; /* log buf without log_level is meaningless */ - if (log->ubuf && log->level == 0) + if (log_buf && log_level == 0) return false; - if (log->level & ~BPF_LOG_MASK) + if (log_level & ~BPF_LOG_MASK) return false; - if (log->len_total > UINT_MAX >> 2) + if (log_size > UINT_MAX >> 2) return false; return true; } @@ -36,7 +36,7 @@ int bpf_vlog_init(struct bpf_verifier_log *log, u32 log_level, log->len_total = log_size; /* log attributes have to be sane */ - if (!bpf_verifier_log_attr_valid(log)) + if (!bpf_verifier_log_attr_valid(log_level, log_buf, log_size)) return -EINVAL; return 0; @@ -867,16 +867,36 @@ void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_st } int bpf_log_attr_init(struct bpf_log_attr *log, u64 log_buf, u32 log_size, u32 log_level, - u32 offsetof_log_true_size, bpfptr_t uattr) + u32 offsetof_log_true_size, bpfptr_t uattr, struct bpf_common_attr *common, + bpfptr_t uattr_common, u32 size_common) { + char __user *ubuf_common = u64_to_user_ptr(common->log_buf); char __user *ubuf = u64_to_user_ptr(log_buf); + if (!bpf_verifier_log_attr_valid(common->log_level, ubuf_common, common->log_size) || + !bpf_verifier_log_attr_valid(log_level, ubuf, log_size)) + return -EINVAL; + + if (ubuf && ubuf_common && (ubuf != ubuf_common || log_size != common->log_size || + log_level != common->log_level)) + return -EINVAL; + memset(log, 0, sizeof(*log)); log->ubuf = ubuf; log->size = log_size; log->level = log_level; log->offsetof_true_size = offsetof_log_true_size; log->uattr = uattr; + + if (!ubuf && ubuf_common) { + log->ubuf = ubuf_common; + log->size = common->log_size; + log->level = common->log_level; + log->uattr = uattr_common; + log->offsetof_true_size = 0; + if (size_common >= offsetofend(struct bpf_common_attr, log_true_size)) + log->offsetof_true_size = offsetof(struct bpf_common_attr, log_true_size); + } return 0; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b560dab9bfa8..1eb15a51c7dc 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -6270,7 +6270,8 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, if (size >= offsetofend(union bpf_attr, log_true_size)) offsetof_log_true_size = offsetof(union bpf_attr, log_true_size); err = bpf_log_attr_init(&attr_log, attr.log_buf, attr.log_size, attr.log_level, - offsetof_log_true_size, uattr); + offsetof_log_true_size, uattr, &attr_common, uattr_common, + size_common); err = err ?: bpf_prog_load(&attr, uattr, &attr_log); break; case BPF_OBJ_PIN: -- 2.53.0 BPF_BTF_LOAD can now take log parameters from both union bpf_attr and struct bpf_common_attr, with the same merge rules as BPF_PROG_LOAD: - if both sides provide a complete log tuple (buf/size/level) and they match, use it; - if only one side provides log parameters, use that one; - if both sides provide complete tuples but they differ, return -EINVAL. Acked-by: Andrii Nakryiko Signed-off-by: Leon Hwang --- include/linux/btf.h | 3 ++- kernel/bpf/btf.c | 30 +++++++----------------------- kernel/bpf/syscall.c | 11 ++++++++--- 3 files changed, 17 insertions(+), 27 deletions(-) diff --git a/include/linux/btf.h b/include/linux/btf.h index 48108471c5b1..f64d87315c5f 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -145,7 +145,8 @@ const char *btf_get_name(const struct btf *btf); void btf_get(struct btf *btf); void btf_put(struct btf *btf); const struct btf_header *btf_header(const struct btf *btf); -int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz); +struct bpf_log_attr; +int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_log_attr *attr_log); struct btf *btf_get_by_fd(int fd); int btf_get_info_by_fd(const struct btf *btf, const union bpf_attr *attr, diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index a62d78581207..9f69ef774a0b 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5914,25 +5914,10 @@ static int btf_check_type_tags(struct btf_verifier_env *env, return 0; } -static int finalize_log(struct bpf_verifier_log *log, bpfptr_t uattr, u32 uattr_size) -{ - u32 log_true_size; - int err; - - err = bpf_vlog_finalize(log, &log_true_size); - - if (uattr_size >= offsetofend(union bpf_attr, btf_log_true_size) && - copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, btf_log_true_size), - &log_true_size, sizeof(log_true_size))) - err = -EFAULT; - - return err; -} - -static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) +static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, + struct bpf_log_attr *attr_log) { bpfptr_t btf_data = make_bpfptr(attr->btf, uattr.is_kernel); - char __user *log_ubuf = u64_to_user_ptr(attr->btf_log_buf); struct btf_struct_metas *struct_meta_tab; struct btf_verifier_env *env = NULL; struct btf *btf = NULL; @@ -5949,8 +5934,7 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uat /* user could have requested verbose verifier output * and supplied buffer to store the verification trace */ - err = bpf_vlog_init(&env->log, attr->btf_log_level, - log_ubuf, attr->btf_log_size); + err = bpf_vlog_init(&env->log, attr_log->level, attr_log->ubuf, attr_log->size); if (err) goto errout_free; @@ -6015,7 +5999,7 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uat } } - err = finalize_log(&env->log, uattr, uattr_size); + err = bpf_log_attr_finalize(attr_log, &env->log); if (err) goto errout_free; @@ -6027,7 +6011,7 @@ static struct btf *btf_parse(const union bpf_attr *attr, bpfptr_t uattr, u32 uat btf_free_struct_meta_tab(btf); errout: /* overwrite err with -ENOSPC or -EFAULT */ - ret = finalize_log(&env->log, uattr, uattr_size); + ret = bpf_log_attr_finalize(attr_log, &env->log); if (ret) err = ret; errout_free: @@ -8196,12 +8180,12 @@ static int __btf_new_fd(struct btf *btf) return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC); } -int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) +int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_log_attr *attr_log) { struct btf *btf; int ret; - btf = btf_parse(attr, uattr, uattr_size); + btf = btf_parse(attr, uattr, attr_log); if (IS_ERR(btf)) return PTR_ERR(btf); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 1eb15a51c7dc..06aea4e499a1 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5473,7 +5473,7 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, #define BPF_BTF_LOAD_LAST_FIELD btf_token_fd -static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) +static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_log_attr *attr_log) { struct bpf_token *token = NULL; @@ -5500,7 +5500,7 @@ static int bpf_btf_load(const union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_ bpf_token_put(token); - return btf_new_fd(attr, uattr, uattr_size); + return btf_new_fd(attr, uattr, attr_log); } #define BPF_BTF_GET_FD_BY_ID_LAST_FIELD fd_by_id_token_fd @@ -6317,7 +6317,12 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, err = bpf_raw_tracepoint_open(&attr); break; case BPF_BTF_LOAD: - err = bpf_btf_load(&attr, uattr, size); + if (size >= offsetofend(union bpf_attr, btf_log_true_size)) + offsetof_log_true_size = offsetof(union bpf_attr, btf_log_true_size); + err = bpf_log_attr_init(&attr_log, attr.btf_log_buf, attr.btf_log_size, + attr.btf_log_level, offsetof_log_true_size, uattr, + &attr_common, uattr_common, size_common); + err = err ?: bpf_btf_load(&attr, uattr, &attr_log); break; case BPF_BTF_GET_FD_BY_ID: err = bpf_btf_get_fd_by_id(&attr); -- 2.53.0 Many BPF_MAP_CREATE validation failures currently return -EINVAL without any explanation to userspace. Plumb common syscall log attributes into map_create(), create a verifier log from bpf_common_attr::log_buf/log_size/log_level, and report map-creation failure reasons through that buffer. This improves debuggability by allowing userspace to inspect why map creation failed and read back log_true_size from common attributes. Signed-off-by: Leon Hwang --- include/linux/bpf_verifier.h | 3 ++ kernel/bpf/log.c | 30 +++++++++++++++++ kernel/bpf/syscall.c | 65 ++++++++++++++++++++++++++++++------ 3 files changed, 88 insertions(+), 10 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 9045dc88c5b6..9f565f7fd3f9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -658,6 +658,9 @@ struct bpf_log_attr { int bpf_log_attr_init(struct bpf_log_attr *log, u64 log_buf, u32 log_size, u32 log_level, u32 offsetof_log_true_size, bpfptr_t uattr, struct bpf_common_attr *common, bpfptr_t uattr_common, u32 size_common); +struct bpf_verifier_log *bpf_log_attr_create_vlog(struct bpf_log_attr *attr_log, + struct bpf_common_attr *common, bpfptr_t uattr, + u32 size); int bpf_log_attr_finalize(struct bpf_log_attr *attr, struct bpf_verifier_log *log); #define BPF_MAX_SUBPROGS 256 diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index dc8734e27752..0c972f577b60 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -900,6 +900,36 @@ int bpf_log_attr_init(struct bpf_log_attr *log, u64 log_buf, u32 log_size, u32 l return 0; } + +struct bpf_verifier_log *bpf_log_attr_create_vlog(struct bpf_log_attr *attr_log, + struct bpf_common_attr *common, bpfptr_t uattr, + u32 size) +{ + struct bpf_verifier_log *log; + int err; + + memset(attr_log, 0, sizeof(*attr_log)); + attr_log->uattr = uattr; + if (size >= offsetofend(struct bpf_common_attr, log_true_size)) + attr_log->offsetof_true_size = offsetof(struct bpf_common_attr, log_true_size); + + if (!common->log_buf) + return NULL; + + log = kzalloc_obj(*log, GFP_KERNEL); + if (!log) + return ERR_PTR(-ENOMEM); + + err = bpf_vlog_init(log, common->log_level, u64_to_user_ptr(common->log_buf), + common->log_size); + if (err) { + kfree(log); + return ERR_PTR(err); + } + + return log; +} + int bpf_log_attr_finalize(struct bpf_log_attr *attr, struct bpf_verifier_log *log) { u32 log_true_size; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 06aea4e499a1..39f4d4b00469 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1359,7 +1359,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token, #define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size /* called via syscall */ -static int map_create(union bpf_attr *attr, bpfptr_t uattr) +static int __map_create(union bpf_attr *attr, bpfptr_t uattr, struct bpf_verifier_log *log) { const struct bpf_map_ops *ops; struct bpf_token *token = NULL; @@ -1371,8 +1371,10 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) int err; err = CHECK_ATTR(BPF_MAP_CREATE); - if (err) + if (err) { + bpf_log(log, "Invalid attr.\n"); return -EINVAL; + } /* check BPF_F_TOKEN_FD flag, remember if it's set, and then clear it * to avoid per-map type checks tripping on unknown flag @@ -1381,17 +1383,25 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) attr->map_flags &= ~BPF_F_TOKEN_FD; if (attr->btf_vmlinux_value_type_id) { - if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS || - attr->btf_key_type_id || attr->btf_value_type_id) + if (attr->map_type != BPF_MAP_TYPE_STRUCT_OPS) { + bpf_log(log, "btf_vmlinux_value_type_id can only be used with struct_ops maps.\n"); return -EINVAL; + } + if (attr->btf_key_type_id || attr->btf_value_type_id) { + bpf_log(log, "btf_vmlinux_value_type_id is mutually exclusive with btf_key_type_id and btf_value_type_id.\n"); + return -EINVAL; + } } else if (attr->btf_key_type_id && !attr->btf_value_type_id) { + bpf_log(log, "Invalid btf_value_type_id.\n"); return -EINVAL; } if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && attr->map_type != BPF_MAP_TYPE_ARENA && - attr->map_extra != 0) + attr->map_extra != 0) { + bpf_log(log, "Invalid map_extra.\n"); return -EINVAL; + } f_flags = bpf_get_file_flag(attr->map_flags); if (f_flags < 0) @@ -1399,13 +1409,17 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) if (numa_node != NUMA_NO_NODE && ((unsigned int)numa_node >= nr_node_ids || - !node_online(numa_node))) + !node_online(numa_node))) { + bpf_log(log, "Invalid numa_node.\n"); return -EINVAL; + } /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ map_type = attr->map_type; - if (map_type >= ARRAY_SIZE(bpf_map_types)) + if (map_type >= ARRAY_SIZE(bpf_map_types)) { + bpf_log(log, "Invalid map_type.\n"); return -EINVAL; + } map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types)); ops = bpf_map_types[map_type]; if (!ops) @@ -1423,8 +1437,10 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) if (token_flag) { token = bpf_token_get_from_fd(attr->map_token_fd); - if (IS_ERR(token)) + if (IS_ERR(token)) { + bpf_log(log, "Invalid map_token_fd.\n"); return PTR_ERR(token); + } /* if current token doesn't grant map creation permissions, * then we can't use this token, so ignore it and rely on @@ -1507,8 +1523,10 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) err = bpf_obj_name_cpy(map->name, attr->map_name, sizeof(attr->map_name)); - if (err < 0) + if (err < 0) { + bpf_log(log, "Invalid map_name.\n"); goto free_map; + } preempt_disable(); map->cookie = gen_cookie_next(&bpf_map_cookie); @@ -1531,6 +1549,7 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) btf = btf_get_by_fd(attr->btf_fd); if (IS_ERR(btf)) { + bpf_log(log, "Invalid btf_fd.\n"); err = PTR_ERR(btf); goto free_map; } @@ -1558,6 +1577,7 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) bpfptr_t uprog_hash = make_bpfptr(attr->excl_prog_hash, uattr.is_kernel); if (attr->excl_prog_hash_size != SHA256_DIGEST_SIZE) { + bpf_log(log, "Invalid excl_prog_hash_size.\n"); err = -EINVAL; goto free_map; } @@ -1573,6 +1593,7 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) goto free_map; } } else if (attr->excl_prog_hash_size) { + bpf_log(log, "Invalid excl_prog_hash_size.\n"); err = -EINVAL; goto free_map; } @@ -1611,6 +1632,30 @@ static int map_create(union bpf_attr *attr, bpfptr_t uattr) return err; } +static int map_create(union bpf_attr *attr, bpfptr_t uattr, struct bpf_common_attr *attr_common, + bpfptr_t uattr_common, u32 size_common) +{ + struct bpf_verifier_log *log; + struct bpf_log_attr attr_log; + int err, ret; + + log = bpf_log_attr_create_vlog(&attr_log, attr_common, uattr_common, size_common); + if (IS_ERR(log)) + return PTR_ERR(log); + + err = __map_create(attr, uattr, log); + if (err >= 0) + goto free; + + ret = bpf_log_attr_finalize(&attr_log, log); + if (ret) + err = ret; + +free: + kfree(log); + return err; +} + void bpf_map_inc(struct bpf_map *map) { atomic64_inc(&map->refcnt); @@ -6249,7 +6294,7 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size, switch (cmd) { case BPF_MAP_CREATE: - err = map_create(&attr, uattr); + err = map_create(&attr, uattr, &attr_common, uattr_common, size_common); break; case BPF_MAP_LOOKUP_ELEM: err = map_lookup_elem(&attr); -- 2.53.0 With the previous commit adding common attribute support for BPF_MAP_CREATE, users can now retrieve detailed error messages when map creation fails via the log_buf field. Introduce struct bpf_log_opts with the following fields: log_buf, log_size, log_level, and log_true_size. Extend bpf_map_create_opts with a new field log_opts, allowing users to capture and inspect log messages on map creation failures. Acked-by: Andrii Nakryiko Signed-off-by: Leon Hwang --- tools/lib/bpf/bpf.c | 16 +++++++++++++++- tools/lib/bpf/bpf.h | 17 ++++++++++++++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 9d8740761b7a..483c02cf21d1 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -209,6 +209,9 @@ int bpf_map_create(enum bpf_map_type map_type, const struct bpf_map_create_opts *opts) { const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size); + const size_t attr_common_sz = sizeof(struct bpf_common_attr); + struct bpf_common_attr attr_common; + struct bpf_log_opts *log_opts; union bpf_attr attr; int fd; @@ -242,7 +245,18 @@ int bpf_map_create(enum bpf_map_type map_type, attr.excl_prog_hash = ptr_to_u64(OPTS_GET(opts, excl_prog_hash, NULL)); attr.excl_prog_hash_size = OPTS_GET(opts, excl_prog_hash_size, 0); - fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); + log_opts = OPTS_GET(opts, log_opts, NULL); + if (log_opts && feat_supported(NULL, FEAT_BPF_SYSCALL_COMMON_ATTRS)) { + memset(&attr_common, 0, attr_common_sz); + attr_common.log_buf = ptr_to_u64(OPTS_GET(log_opts, buf, NULL)); + attr_common.log_size = OPTS_GET(log_opts, size, 0); + attr_common.log_level = OPTS_GET(log_opts, level, 0); + fd = sys_bpf_ext_fd(BPF_MAP_CREATE, &attr, attr_sz, &attr_common, attr_common_sz); + OPTS_SET(log_opts, true_size, attr_common.log_true_size); + } else { + fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); + OPTS_SET(log_opts, true_size, 0); + } return libbpf_err_errno(fd); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 2c8e88ddb674..2312900a3263 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -37,6 +37,18 @@ extern "C" { LIBBPF_API int libbpf_set_memlock_rlim(size_t memlock_bytes); +struct bpf_log_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + + char *buf; + __u32 size; + __u32 level; + __u32 true_size; /* out parameter set by kernel */ + + size_t :0; +}; +#define bpf_log_opts__last_field true_size + struct bpf_map_create_opts { size_t sz; /* size of this struct for forward/backward compatibility */ @@ -57,9 +69,12 @@ struct bpf_map_create_opts { const void *excl_prog_hash; __u32 excl_prog_hash_size; + + struct bpf_log_opts *log_opts; + size_t :0; }; -#define bpf_map_create_opts__last_field excl_prog_hash_size +#define bpf_map_create_opts__last_field log_opts LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, const char *map_name, -- 2.53.0 Add tests to verify that the kernel reports the expected error messages and correct log_true_size when map creation fails. Signed-off-by: Leon Hwang --- .../selftests/bpf/prog_tests/map_init.c | 166 ++++++++++++++++++ 1 file changed, 166 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c index 14a31109dd0e..5c61c8e37306 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_init.c +++ b/tools/testing/selftests/bpf/prog_tests/map_init.c @@ -212,3 +212,169 @@ void test_map_init(void) if (test__start_subtest("pcpu_lru_map_init")) test_pcpu_lru_map_init(); } + +static void test_map_create(enum bpf_map_type map_type, const char *map_name, + struct bpf_map_create_opts *opts, const char *exp_msg) +{ + const int key_size = 4, value_size = 4, max_entries = 1; + char log_buf[128]; + int fd; + LIBBPF_OPTS(bpf_log_opts, log_opts); + + log_buf[0] = '\0'; + log_opts.buf = log_buf; + log_opts.size = sizeof(log_buf); + log_opts.level = 1; + opts->log_opts = &log_opts; + fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, opts); + if (!ASSERT_LT(fd, 0, "bpf_map_create")) { + close(fd); + return; + } + + ASSERT_STREQ(log_buf, exp_msg, "log_buf"); + ASSERT_EQ(log_opts.true_size, strlen(exp_msg) + 1, "true_size"); +} + +static void test_map_create_array(struct bpf_map_create_opts *opts, const char *exp_msg) +{ + test_map_create(BPF_MAP_TYPE_ARRAY, "test_map_create", opts, exp_msg); +} + +static void test_invalid_vmlinux_value_type_id_struct_ops(void) +{ + const char *msg = "btf_vmlinux_value_type_id can only be used with struct_ops maps.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_vmlinux_value_type_id = 1, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_vmlinux_value_type_id_kv_type_id(void) +{ + const char *msg = "btf_vmlinux_value_type_id is mutually exclusive with btf_key_type_id and btf_value_type_id.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_vmlinux_value_type_id = 1, + .btf_key_type_id = 1, + ); + + test_map_create(BPF_MAP_TYPE_STRUCT_OPS, "test_map_create", &opts, msg); +} + +static void test_invalid_value_type_id(void) +{ + const char *msg = "Invalid btf_value_type_id.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_key_type_id = 1, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_map_extra(void) +{ + const char *msg = "Invalid map_extra.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_extra = 1, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_numa_node(void) +{ + const char *msg = "Invalid numa_node.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_flags = BPF_F_NUMA_NODE, + .numa_node = 0xFF, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_map_type(void) +{ + const char *msg = "Invalid map_type.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts); + + test_map_create(__MAX_BPF_MAP_TYPE, "test_map_create", &opts, msg); +} + +static void test_invalid_token_fd(void) +{ + const char *msg = "Invalid map_token_fd.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_flags = BPF_F_TOKEN_FD, + .token_fd = 0xFF, + ); + + test_map_create_array(&opts, msg); +} + +static void test_invalid_map_name(void) +{ + const char *msg = "Invalid map_name.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts); + + test_map_create(BPF_MAP_TYPE_ARRAY, "test-!@#", &opts, msg); +} + +static void test_invalid_btf_fd(void) +{ + const char *msg = "Invalid btf_fd.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .btf_fd = -1, + .btf_key_type_id = 1, + .btf_value_type_id = 1, + ); + + test_map_create_array(&opts, msg); +} + +static void test_excl_prog_hash_size_1(void) +{ + const char *msg = "Invalid excl_prog_hash_size.\n"; + const char *hash = "DEADCODE"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .excl_prog_hash = hash, + ); + + test_map_create_array(&opts, msg); +} + +static void test_excl_prog_hash_size_2(void) +{ + const char *msg = "Invalid excl_prog_hash_size.\n"; + LIBBPF_OPTS(bpf_map_create_opts, opts, + .excl_prog_hash_size = 1, + ); + + test_map_create_array(&opts, msg); +} + +void test_map_create_failure(void) +{ + if (test__start_subtest("invalid_vmlinux_value_type_id_struct_ops")) + test_invalid_vmlinux_value_type_id_struct_ops(); + if (test__start_subtest("invalid_vmlinux_value_type_id_kv_type_id")) + test_invalid_vmlinux_value_type_id_kv_type_id(); + if (test__start_subtest("invalid_value_type_id")) + test_invalid_value_type_id(); + if (test__start_subtest("invalid_map_extra")) + test_invalid_map_extra(); + if (test__start_subtest("invalid_numa_node")) + test_invalid_numa_node(); + if (test__start_subtest("invalid_map_type")) + test_invalid_map_type(); + if (test__start_subtest("invalid_token_fd")) + test_invalid_token_fd(); + if (test__start_subtest("invalid_map_name")) + test_invalid_map_name(); + if (test__start_subtest("invalid_btf_fd")) + test_invalid_btf_fd(); + if (test__start_subtest("invalid_excl_prog_hash_size_1")) + test_excl_prog_hash_size_1(); + if (test__start_subtest("invalid_excl_prog_hash_size_2")) + test_excl_prog_hash_size_2(); +} -- 2.53.0