This patch saves information on the verifier states, at each pruning point, into bpf_insn_aux_data, for use by the BPF oracle. The verifier is already saving states into explored_states for state pruning, but we can't reuse it for the oracle. For state pruning, we only save a subset of all states at each pruning point. Specifically, we will only save a new state if we've seen at least 8 instructions and 2 BPF_JMPs since we last saved a state. For the oracle, we will use the saved information to ensure that concrete values match at least one verifier state. If we are missing states, we will have false positives. This patch therefore saves information on verifier states at every pruning point, regardless of existing heuristics. A later patch will limit this behavior to CONFIG_BPF_ORACLE. At the moment, the oracle only saves information on the type and ranges (in case of scalars) of registers. No information is kept for stack slots. More checks can be added later. Signed-off-by: Paul Chaignon --- include/linux/bpf_verifier.h | 34 +++++++++++++++++++ kernel/bpf/Makefile | 2 +- kernel/bpf/oracle.c | 63 ++++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 11 +++---- 4 files changed, 103 insertions(+), 7 deletions(-) create mode 100644 kernel/bpf/oracle.c diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 130bcbd66f60..adaeff35aaa6 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -485,6 +485,30 @@ struct bpf_verifier_state_list { u32 in_free_list:1; }; +struct bpf_reg_oracle_state { + bool scalar; + bool ptr_not_null; + + struct tnum var_off; + s64 smin_value; + s64 smax_value; + u64 umin_value; + u64 umax_value; + s32 s32_min_value; + s32 s32_max_value; + u32 u32_min_value; + u32 u32_max_value; +}; + +struct bpf_oracle_state { + struct bpf_reg_oracle_state regs[MAX_BPF_REG - 1]; +}; + +struct bpf_oracle_state_list { + struct bpf_oracle_state state; + struct list_head node; +}; + struct bpf_loop_inline_state { unsigned int initialized:1; /* set to true upon first entry */ unsigned int fit_for_inline:1; /* true if callback function is the same @@ -551,6 +575,7 @@ struct bpf_insn_aux_data { }; struct bpf_iarray *jt; /* jump table for gotox or bpf_tailcall call instruction */ struct btf_struct_meta *kptr_struct_meta; + struct list_head *oracle_states; u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ u32 seen; /* this insn was processed by the verifier at env->pass_cnt */ @@ -1060,11 +1085,18 @@ static inline bool insn_is_gotox(struct bpf_insn *insn) BPF_SRC(insn->code) == BPF_X; } +static inline struct bpf_insn_aux_data *cur_aux(const struct bpf_verifier_env *env) +{ + return &env->insn_aux_data[env->insn_idx]; +} + const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type); const char *dynptr_type_str(enum bpf_dynptr_type type); const char *iter_type_str(const struct btf *btf, u32 btf_id); const char *iter_state_str(enum bpf_iter_state state); +bool reg_not_null(const struct bpf_reg_state *reg); + void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, u32 frameno, bool print_all); void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, @@ -1087,4 +1119,6 @@ int bpf_live_stack_query_init(struct bpf_verifier_env *env, struct bpf_verifier_ bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 spi); void bpf_reset_live_stack_callchain(struct bpf_verifier_env *env); +int save_state_in_oracle(struct bpf_verifier_env *env, int insn_idx); + #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 232cbc97434d..b94c9af3288a 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse endif CFLAGS_core.o += -Wno-override-init $(cflags-nogcse-yy) -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o liveness.o +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o oracle.o log.o token.o liveness.o obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_insn_array.o diff --git a/kernel/bpf/oracle.c b/kernel/bpf/oracle.c new file mode 100644 index 000000000000..adbb153aadee --- /dev/null +++ b/kernel/bpf/oracle.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This file implements a test oracle for the verifier. When the oracle is enabled, the verifier + * saves information on variables at regular points throughout the program. This information is + * then compared at runtime with the concrete values to ensure that the verifier's information is + * correct. + */ + +#include + +static void convert_oracle_state(struct bpf_verifier_state *istate, struct bpf_oracle_state *ostate) +{ + struct bpf_func_state *frame = istate->frame[istate->curframe]; + struct bpf_reg_oracle_state *oreg; + struct bpf_reg_state *ireg; + int i; + + /* No need to check R10 with the oracle. */ + for (i = 0; i < MAX_BPF_REG - 1; i++) { + ireg = &frame->regs[i]; + oreg = &ostate->regs[i]; + + oreg->scalar = ireg->type == SCALAR_VALUE; + oreg->ptr_not_null = reg_not_null(ireg); + + oreg->var_off = ireg->var_off; + oreg->smin_value = ireg->smin_value; + oreg->smax_value = ireg->smax_value; + oreg->umin_value = ireg->umin_value; + oreg->umax_value = ireg->umax_value; + oreg->s32_min_value = ireg->s32_min_value; + oreg->s32_max_value = ireg->s32_max_value; + oreg->u32_min_value = ireg->u32_min_value; + oreg->u32_max_value = ireg->u32_max_value; + } +} + +int save_state_in_oracle(struct bpf_verifier_env *env, int insn_idx) +{ + struct bpf_verifier_state *cur = env->cur_state; + struct bpf_insn_aux_data *aux = cur_aux(env); + struct bpf_oracle_state_list *new_sl; + + if (env->subprog_cnt > 1) + /* Skip the oracle if subprogs are used. */ + return 0; + + if (!aux->oracle_states) { + aux->oracle_states = kmalloc(sizeof(*aux->oracle_states), GFP_KERNEL_ACCOUNT); + if (!aux->oracle_states) + return -ENOMEM; + + INIT_LIST_HEAD(aux->oracle_states); + } + + new_sl = kzalloc(sizeof(*new_sl), GFP_KERNEL_ACCOUNT); + if (!new_sl) + return -ENOMEM; + convert_oracle_state(cur, &new_sl->state); + list_add(&new_sl->node, aux->oracle_states); + + return 0; +} diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bb7eca1025c3..2e48e5c9abae 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -394,7 +394,7 @@ static void verbose_invalid_scalar(struct bpf_verifier_env *env, verbose(env, " should have been in [%d, %d]\n", range.minval, range.maxval); } -static bool reg_not_null(const struct bpf_reg_state *reg) +bool reg_not_null(const struct bpf_reg_state *reg) { enum bpf_reg_type type; @@ -11398,11 +11398,6 @@ static int check_get_func_ip(struct bpf_verifier_env *env) return -ENOTSUPP; } -static struct bpf_insn_aux_data *cur_aux(const struct bpf_verifier_env *env) -{ - return &env->insn_aux_data[env->insn_idx]; -} - static bool loop_flag_is_zero(struct bpf_verifier_env *env) { struct bpf_reg_state *regs = cur_regs(env); @@ -20508,6 +20503,10 @@ static int do_check(struct bpf_verifier_env *env) state->insn_idx = env->insn_idx; if (is_prune_point(env, env->insn_idx)) { + err = save_state_in_oracle(env, env->insn_idx); + if (err < 0) + return err; + err = is_state_visited(env, env->insn_idx); if (err < 0) return err; -- 2.43.0 This commit patches the BPF bytecode with special instructions to tell the interpreter to check the oracle. These instructions need to be added whenever we saved information on verifier states, so at each pruning point. At the moment, it relies on a special LD_IMM64 instruction with the address to the array map holding the information from the verifier states. This needs to be changed to not expose a new BPF_PSEUDO_MAP_* constant. One option would be to choose something closer to the existing BPF_ST_NOSPEC instruction, which serves a similar internal-only purpose. This patch defines a zero immediate for our LD_IMM64 instruction. The next patch sets the immediate to our map address. Signed-off-by: Paul Chaignon --- include/linux/bpf_verifier.h | 4 ++++ include/uapi/linux/bpf.h | 10 +++++++++ kernel/bpf/disasm.c | 3 ++- kernel/bpf/oracle.c | 36 +++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 4 +++- kernel/bpf/verifier.c | 16 +++++++++++--- tools/bpf/bpftool/xlated_dumper.c | 3 +++ 7 files changed, 71 insertions(+), 5 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index adaeff35aaa6..e4c8457e02c1 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -1107,6 +1107,8 @@ int bpf_jmp_offset(struct bpf_insn *insn); struct bpf_iarray *bpf_insn_successors(struct bpf_verifier_env *env, u32 idx); void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask); bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx); +struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, + const struct bpf_insn *patch, u32 len); int bpf_stack_liveness_init(struct bpf_verifier_env *env); void bpf_stack_liveness_free(struct bpf_verifier_env *env); @@ -1120,5 +1122,7 @@ bool bpf_stack_slot_alive(struct bpf_verifier_env *env, u32 frameno, u32 spi); void bpf_reset_live_stack_callchain(struct bpf_verifier_env *env); int save_state_in_oracle(struct bpf_verifier_env *env, int insn_idx); +struct bpf_prog *patch_oracle_check_insn(struct bpf_verifier_env *env, struct bpf_insn *insn, + int i, int *cnt); #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 84ced3ed2d21..ca4827933d26 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1345,6 +1345,16 @@ enum { #define BPF_PSEUDO_MAP_VALUE 2 #define BPF_PSEUDO_MAP_IDX_VALUE 6 +/* Internal only. + * insn[0].dst_reg: 0 + * insn[0].src_reg: BPF_PSEUDO_MAP_ORACLE + * insn[0].imm: address of oracle state list + * insn[1].imm: address of oracle state list + * insn[0].off: 0 + * insn[1].off: 0 + */ +#define BPF_PSEUDO_MAP_ORACLE 7 + /* insn[0].src_reg: BPF_PSEUDO_BTF_ID * insn[0].imm: kernel btd id of VAR * insn[1].imm: 0 diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index f8a3c7eb451e..a591a0bd0284 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -323,7 +323,8 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, */ u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; bool is_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD || - insn->src_reg == BPF_PSEUDO_MAP_VALUE; + insn->src_reg == BPF_PSEUDO_MAP_VALUE || + insn->src_reg == BPF_PSEUDO_MAP_ORACLE; char tmp[64]; if (is_ptr && !allow_ptr_leaks) diff --git a/kernel/bpf/oracle.c b/kernel/bpf/oracle.c index adbb153aadee..924a86c90b4e 100644 --- a/kernel/bpf/oracle.c +++ b/kernel/bpf/oracle.c @@ -61,3 +61,39 @@ int save_state_in_oracle(struct bpf_verifier_env *env, int insn_idx) return 0; } + +struct bpf_prog *patch_oracle_check_insn(struct bpf_verifier_env *env, struct bpf_insn *insn, + int i, int *cnt) +{ + struct bpf_insn ld_addrs[2] = { + BPF_LD_IMM64_RAW(0, BPF_PSEUDO_MAP_ORACLE, 0), + }; + struct bpf_insn_aux_data *aux = &env->insn_aux_data[i]; + struct list_head *head = aux->oracle_states; + struct bpf_insn *insn_buf = env->insn_buf; + struct bpf_prog *new_prog = env->prog; + int num_oracle_states; + + if (env->subprog_cnt > 1) + /* Skip the oracle if subprogs are used. */ + goto noop; + + num_oracle_states = list_count_nodes(head); + if (!num_oracle_states) + goto noop; + + insn_buf[0] = ld_addrs[0]; + insn_buf[1] = ld_addrs[1]; + insn_buf[2] = *insn; + *cnt = 3; + + new_prog = bpf_patch_insn_data(env, i, insn_buf, *cnt); + if (!new_prog) + return ERR_PTR(-ENOMEM); + + return new_prog; + +noop: + *cnt = 1; + return new_prog; +} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 3080cc48bfc3..211912c91652 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4863,7 +4863,8 @@ static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog, for (i = 0, *off = 0; i < prog->aux->used_map_cnt; i++) { map = prog->aux->used_maps[i]; if (map == (void *)addr) { - *type = BPF_PSEUDO_MAP_FD; + if (*type != BPF_PSEUDO_MAP_ORACLE) + *type = BPF_PSEUDO_MAP_FD; goto out; } if (!map->ops->map_direct_value_meta) @@ -4925,6 +4926,7 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog, if (code != (BPF_LD | BPF_IMM | BPF_DW)) continue; + type = insns[i].src_reg; imm = ((u64)insns[i + 1].imm << 32) | (u32)insns[i].imm; map = bpf_map_from_imm(prog, imm, &off, &type); if (map) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2e48e5c9abae..4ca52c6aaa3b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -21202,7 +21202,7 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) for (i = 0; i < insn_cnt; i++, insn++) { if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) continue; - if (insn->src_reg == BPF_PSEUDO_FUNC) + if (insn->src_reg == BPF_PSEUDO_FUNC || insn->src_reg == BPF_PSEUDO_MAP_ORACLE) continue; insn->src_reg = 0; } @@ -21296,8 +21296,8 @@ static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len) } } -static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, - const struct bpf_insn *patch, u32 len) +struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, + const struct bpf_insn *patch, u32 len) { struct bpf_prog *new_prog; struct bpf_insn_aux_data *new_data = NULL; @@ -22639,6 +22639,16 @@ static int do_misc_fixups(struct bpf_verifier_env *env) } for (i = 0; i < insn_cnt;) { + if (is_prune_point(env, i + delta)) { + new_prog = patch_oracle_check_insn(env, insn, i + delta, &cnt); + if (IS_ERR(new_prog)) + return PTR_ERR(new_prog); + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + } + if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) { if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) || (((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) { diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 5e7cb8b36fef..08bcd0c7d72d 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -206,6 +206,9 @@ static const char *print_imm(void *private_data, else if (insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), "map[idx:%d]+%d", insn->imm, (insn + 1)->imm); + else if (insn->src_reg == BPF_PSEUDO_MAP_ORACLE) + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "oracle_map[id:%d]", insn->imm); else if (insn->src_reg == BPF_PSEUDO_FUNC) snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), "subprog[%+d]", insn->imm); -- 2.43.0 This patch creates the inner oracle maps that will store the information on verifier states. Each pruning point needs an inner oracle map. They are called inner because they will all be referred by a hashmap in a later commit, indexed by instruction indexes. They are also referred in the oracle instructions, for easier lookup from the interpreter. For the inner maps, we can rely on array maps because at the time we create them we know how many states will need to be saved. They won't grow after program loading so they can have a static size. These maps are not only useful for the oracle to iterate through states, but also for debugging from userspace after we hit an oracle test warning. Userspace should however never need to update them, so let's limit permissions accordingly. The bytecode ends up looking like: 0: (bf) r2 = r10 1: (7a) *(u64 *)(r2 -40) = -44 2: (79) r6 = *(u64 *)(r2 -40) 3: (85) call bpf_user_rnd_u32#28800 4: (18) r0 = oracle_map[id:21] 6: (b7) r0 = 0 7: (95) exit with our special instruction at index 4. A subsequent patch teaches the interpreter to skip this special instruction at runtime, to avoid overwriting R0. Signed-off-by: Paul Chaignon --- include/linux/bpf.h | 3 ++ include/linux/bpf_verifier.h | 6 +++- kernel/bpf/oracle.c | 64 +++++++++++++++++++++++++++++++++++- kernel/bpf/syscall.c | 8 ++--- kernel/bpf/verifier.c | 2 +- 5 files changed, 76 insertions(+), 7 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 28d8d6b7bb1e..6bec31816485 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -617,6 +617,9 @@ void bpf_rb_root_free(const struct btf_field *field, void *rb_root, u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena); u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena); int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size); +int bpf_map_alloc_id(struct bpf_map *map); +void bpf_map_save_memcg(struct bpf_map *map); +void bpf_map_free(struct bpf_map *map); struct bpf_offload_dev; struct bpf_offloaded_map; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e4c8457e02c1..a93b5e2f4d7f 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -575,7 +575,10 @@ struct bpf_insn_aux_data { }; struct bpf_iarray *jt; /* jump table for gotox or bpf_tailcall call instruction */ struct btf_struct_meta *kptr_struct_meta; - struct list_head *oracle_states; + union { + struct list_head *oracle_states; + struct bpf_map *oracle_inner_map; + }; u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ u32 seen; /* this insn was processed by the verifier at env->pass_cnt */ @@ -1109,6 +1112,7 @@ void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask); bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx); struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, const struct bpf_insn *patch, u32 len); +int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map); int bpf_stack_liveness_init(struct bpf_verifier_env *env); void bpf_stack_liveness_free(struct bpf_verifier_env *env); diff --git a/kernel/bpf/oracle.c b/kernel/bpf/oracle.c index 924a86c90b4e..66ee840a35eb 100644 --- a/kernel/bpf/oracle.c +++ b/kernel/bpf/oracle.c @@ -62,6 +62,53 @@ int save_state_in_oracle(struct bpf_verifier_env *env, int insn_idx) return 0; } +static struct bpf_map *create_inner_oracle_map(size_t size) +{ + struct bpf_map *map; + int err; + + union bpf_attr map_attr = { + .map_type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct bpf_oracle_state), + .max_entries = size, + .map_flags = BPF_F_INNER_MAP | BPF_F_RDONLY, + .map_name = "oracle_inner", + }; + map = array_map_ops.map_alloc(&map_attr); + if (IS_ERR(map)) + return map; + + map->ops = &array_map_ops; + map->map_type = BPF_MAP_TYPE_ARRAY; + + err = bpf_obj_name_cpy(map->name, map_attr.map_name, + sizeof(map_attr.map_name)); + if (err < 0) + goto free_map; + + mutex_init(&map->freeze_mutex); + spin_lock_init(&map->owner_lock); + + err = security_bpf_map_create(map, &map_attr, NULL, false); + if (err) + goto free_map_sec; + + err = bpf_map_alloc_id(map); + if (err) + goto free_map_sec; + + bpf_map_save_memcg(map); + + return map; + +free_map_sec: + security_bpf_map_free(map); +free_map: + bpf_map_free(map); + return ERR_PTR(err); +} + struct bpf_prog *patch_oracle_check_insn(struct bpf_verifier_env *env, struct bpf_insn *insn, int i, int *cnt) { @@ -72,7 +119,8 @@ struct bpf_prog *patch_oracle_check_insn(struct bpf_verifier_env *env, struct bp struct list_head *head = aux->oracle_states; struct bpf_insn *insn_buf = env->insn_buf; struct bpf_prog *new_prog = env->prog; - int num_oracle_states; + int num_oracle_states, err; + struct bpf_map *inner_map; if (env->subprog_cnt > 1) /* Skip the oracle if subprogs are used. */ @@ -82,6 +130,12 @@ struct bpf_prog *patch_oracle_check_insn(struct bpf_verifier_env *env, struct bp if (!num_oracle_states) goto noop; + inner_map = create_inner_oracle_map(num_oracle_states); + if (IS_ERR(inner_map)) + return (void *)inner_map; + + ld_addrs[0].imm = (u32)(u64)inner_map; + ld_addrs[1].imm = ((u64)inner_map) >> 32; insn_buf[0] = ld_addrs[0]; insn_buf[1] = ld_addrs[1]; insn_buf[2] = *insn; @@ -91,6 +145,14 @@ struct bpf_prog *patch_oracle_check_insn(struct bpf_verifier_env *env, struct bp if (!new_prog) return ERR_PTR(-ENOMEM); + /* Attach oracle inner map to new LDIMM64 instruction. */ + aux = &env->insn_aux_data[i]; + aux->oracle_inner_map = inner_map; + + err = __add_used_map(env, inner_map); + if (err < 0) + return ERR_PTR(err); + return new_prog; noop: diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 211912c91652..5d8db1fed082 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -441,7 +441,7 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) map->map_extra = attr->map_extra; } -static int bpf_map_alloc_id(struct bpf_map *map) +int bpf_map_alloc_id(struct bpf_map *map) { int id; @@ -480,7 +480,7 @@ void bpf_map_free_id(struct bpf_map *map) } #ifdef CONFIG_MEMCG -static void bpf_map_save_memcg(struct bpf_map *map) +void bpf_map_save_memcg(struct bpf_map *map) { /* Currently if a map is created by a process belonging to the root * memory cgroup, get_obj_cgroup_from_current() will return NULL. @@ -580,7 +580,7 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, } #else -static void bpf_map_save_memcg(struct bpf_map *map) +void bpf_map_save_memcg(struct bpf_map *map) { } @@ -880,7 +880,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) } } -static void bpf_map_free(struct bpf_map *map) +void bpf_map_free(struct bpf_map *map) { struct btf_record *rec = map->record; struct btf *btf = map->btf; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4ca52c6aaa3b..74fc568c1bc8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -20969,7 +20969,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, return 0; } -static int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map) +int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map) { int i, err; -- 2.43.0 The previous patch created the inner oracle maps, this patch simply populates them by copying the information on verifier states from aux->oracle_states to the inner array maps. After this, aux->oracle_states isn't required anymore and can be freed. Signed-off-by: Paul Chaignon --- kernel/bpf/oracle.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/kernel/bpf/oracle.c b/kernel/bpf/oracle.c index 66ee840a35eb..404c641cb3f6 100644 --- a/kernel/bpf/oracle.c +++ b/kernel/bpf/oracle.c @@ -109,6 +109,33 @@ static struct bpf_map *create_inner_oracle_map(size_t size) return ERR_PTR(err); } +static int populate_oracle_inner_map(struct list_head *head, struct bpf_map *inner_map) +{ + struct bpf_oracle_state_list *sl; + struct list_head *pos, *tmp; + int i = 0; + + list_for_each_safe(pos, tmp, head) { + sl = container_of(pos, struct bpf_oracle_state_list, node); + inner_map->ops->map_update_elem(inner_map, &i, &sl->state, 0); + i++; + } + + return 0; +} + +static void free_oracle_states(struct list_head *oracle_states) +{ + struct bpf_oracle_state_list *sl; + struct list_head *pos, *tmp; + + list_for_each_safe(pos, tmp, oracle_states) { + sl = container_of(pos, struct bpf_oracle_state_list, node); + kfree(sl); + } + kvfree(oracle_states); +} + struct bpf_prog *patch_oracle_check_insn(struct bpf_verifier_env *env, struct bpf_insn *insn, int i, int *cnt) { @@ -141,6 +168,10 @@ struct bpf_prog *patch_oracle_check_insn(struct bpf_verifier_env *env, struct bp insn_buf[2] = *insn; *cnt = 3; + populate_oracle_inner_map(head, inner_map); + free_oracle_states(aux->oracle_states); + aux->oracle_states = NULL; + new_prog = bpf_patch_insn_data(env, i, insn_buf, *cnt); if (!new_prog) return ERR_PTR(-ENOMEM); -- 2.43.0 This creates and populates the main oracle map for our BPF program. The map is populated with the inner oracle map created and populated in previous patches. This main oracle map is a hashmap of maps with pruning point indexes as keys and inner oracle maps as values. Map flag BPF_F_INNER_MAP is required because our inner oracle maps won't all hold the same number of states. Signed-off-by: Paul Chaignon --- include/linux/bpf.h | 3 + include/linux/bpf_verifier.h | 2 + kernel/bpf/hashtab.c | 6 +- kernel/bpf/oracle.c | 130 +++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 5 ++ 5 files changed, 143 insertions(+), 3 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6bec31816485..58cba1b48f80 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2794,6 +2794,9 @@ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); +long htab_map_update_elem_in_place(struct bpf_map *map, void *key, + void *value, u64 map_flags, + bool percpu, bool onallcpus); int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); int bpf_get_file_flag(int flags); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index a93b5e2f4d7f..cffbd0552b43 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -846,6 +846,7 @@ struct bpf_verifier_env { u32 longest_mark_read_walk; u32 free_list_size; u32 explored_states_size; + u32 num_prune_points; u32 num_backedges; bpfptr_t fd_array; @@ -1128,5 +1129,6 @@ void bpf_reset_live_stack_callchain(struct bpf_verifier_env *env); int save_state_in_oracle(struct bpf_verifier_env *env, int insn_idx); struct bpf_prog *patch_oracle_check_insn(struct bpf_verifier_env *env, struct bpf_insn *insn, int i, int *cnt); +int create_and_populate_oracle_map(struct bpf_verifier_env *env); #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index c8a9b27f8663..0cf286ff0084 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1249,9 +1249,9 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value return ret; } -static long htab_map_update_elem_in_place(struct bpf_map *map, void *key, - void *value, u64 map_flags, - bool percpu, bool onallcpus) +long htab_map_update_elem_in_place(struct bpf_map *map, void *key, + void *value, u64 map_flags, + bool percpu, bool onallcpus) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new, *l_old; diff --git a/kernel/bpf/oracle.c b/kernel/bpf/oracle.c index 404c641cb3f6..44b86e6ef3b2 100644 --- a/kernel/bpf/oracle.c +++ b/kernel/bpf/oracle.c @@ -190,3 +190,133 @@ struct bpf_prog *patch_oracle_check_insn(struct bpf_verifier_env *env, struct bp *cnt = 1; return new_prog; } + +static int populate_oracle_map(struct bpf_verifier_env *env, struct bpf_map *oracle_map) +{ + struct bpf_insn_aux_data *aux; + int i, err; + + /* Oracle checks are always before pruning points, so they cannot be the last + * instruction. + */ + for (i = 0; i < env->prog->len - 1; i++) { + aux = &env->insn_aux_data[i]; + if (!aux->oracle_inner_map || !aux->oracle_inner_map->max_entries) + continue; + + bpf_map_inc(aux->oracle_inner_map); + + rcu_read_lock(); + err = htab_map_update_elem_in_place(oracle_map, &i, &aux->oracle_inner_map, + BPF_NOEXIST, false, false); + rcu_read_unlock(); + if (err) { + bpf_map_put(aux->oracle_inner_map); + return err; + } + } + + return 0; +} + +static struct bpf_map *alloc_oracle_inner_map_meta(void) +{ + struct bpf_array *inner_array_meta; + struct bpf_map *inner_map_meta; + + inner_map_meta = kzalloc(sizeof(*inner_map_meta), GFP_USER); + if (!inner_map_meta) + return ERR_PTR(-ENOMEM); + + inner_map_meta->map_type = BPF_MAP_TYPE_ARRAY; + inner_map_meta->key_size = sizeof(__u32); + inner_map_meta->value_size = sizeof(struct bpf_oracle_state); + inner_map_meta->map_flags = BPF_F_INNER_MAP; + inner_map_meta->max_entries = 1; + + inner_map_meta->ops = &array_map_ops; + + inner_array_meta = container_of(inner_map_meta, struct bpf_array, map); + inner_array_meta->index_mask = 0; + inner_array_meta->elem_size = round_up(inner_map_meta->value_size, 8); + inner_map_meta->bypass_spec_v1 = true; + + return inner_map_meta; +} + +static struct bpf_map *create_oracle_map(struct bpf_verifier_env *env) +{ + struct bpf_map *map = NULL, *inner_map; + int err; + + union bpf_attr map_attr = { + .map_type = BPF_MAP_TYPE_HASH_OF_MAPS, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = env->num_prune_points, + .map_flags = BPF_F_RDONLY, + .map_name = "oracle_map", + }; + /* We don't want to use htab_of_maps_map_ops here because it expects map_attr.inner_map_fd + * to be set to the fd of inner_map_meta, which we don't have. Instead we can allocate and + * set inner_map_meta ourselves. + */ + map = htab_map_ops.map_alloc(&map_attr); + if (IS_ERR(map)) + return map; + + map->ops = &htab_of_maps_map_ops; + map->map_type = BPF_MAP_TYPE_HASH_OF_MAPS; + + inner_map = alloc_oracle_inner_map_meta(); + if (IS_ERR(inner_map)) { + err = PTR_ERR(inner_map); + goto free_map; + } + map->inner_map_meta = inner_map; + + err = bpf_obj_name_cpy(map->name, map_attr.map_name, + sizeof(map_attr.map_name)); + if (err < 0) + goto free_map; + + mutex_init(&map->freeze_mutex); + spin_lock_init(&map->owner_lock); + + err = security_bpf_map_create(map, &map_attr, NULL, false); + if (err) + goto free_map_sec; + + err = bpf_map_alloc_id(map); + if (err) + goto free_map_sec; + + bpf_map_save_memcg(map); + + return map; + +free_map_sec: + security_bpf_map_free(map); +free_map: + bpf_map_free(map); + return ERR_PTR(err); +} + +int create_and_populate_oracle_map(struct bpf_verifier_env *env) +{ + struct bpf_map *oracle_map; + int err; + + if (env->num_prune_points == 0 || env->subprog_cnt > 1) + return 0; + + oracle_map = create_oracle_map(env); + if (IS_ERR(oracle_map)) + return PTR_ERR(oracle_map); + + err = __add_used_map(env, oracle_map); + if (err < 0) + return err; + + return populate_oracle_map(env, oracle_map); +} diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 74fc568c1bc8..9b39bc2ca7f1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -17560,6 +17560,8 @@ enum { static void mark_prune_point(struct bpf_verifier_env *env, int idx) { + if (!env->insn_aux_data[idx].prune_point) + env->num_prune_points++; env->insn_aux_data[idx].prune_point = true; } @@ -25301,6 +25303,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (ret == 0) ret = do_misc_fixups(env); + if (ret == 0) + ret = create_and_populate_oracle_map(env); + /* do 32-bit optimization after insn patching has done so those patched * insns could be handled correctly. */ -- 2.43.0 If we run into our special BPF_PSEUDO_MAP_ORACLE instruction in the interpreter, we need to run the oracle test to check for inconsistencies between concrete values and verifier states. This patch implements that check and throws a kernel warning if any inconsistency is found. The kernel warning message looks as follows, if only R6 was found not to match some states: oracle caught invalid states in oracle_map[id:21]: r6=0xffffffffffffffd4 Signed-off-by: Paul Chaignon --- include/linux/bpf_verifier.h | 1 + include/linux/tnum.h | 3 ++ kernel/bpf/core.c | 12 +++++-- kernel/bpf/oracle.c | 65 ++++++++++++++++++++++++++++++++++++ kernel/bpf/tnum.c | 5 +++ 5 files changed, 84 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index cffbd0552b43..6a53087cdd1d 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -1130,5 +1130,6 @@ int save_state_in_oracle(struct bpf_verifier_env *env, int insn_idx); struct bpf_prog *patch_oracle_check_insn(struct bpf_verifier_env *env, struct bpf_insn *insn, int i, int *cnt); int create_and_populate_oracle_map(struct bpf_verifier_env *env); +void oracle_test(struct bpf_map *oracle_states, u64 *regs); #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/tnum.h b/include/linux/tnum.h index c52b862dad45..e028869371ca 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -54,6 +54,9 @@ struct tnum tnum_mul(struct tnum a, struct tnum b); /* Return true if the known bits of both tnums have the same value */ bool tnum_overlap(struct tnum a, struct tnum b); +/* Return true if tnum a matches value b. */ +bool tnum_match(struct tnum a, u64 b); + /* Return a tnum representing numbers satisfying both @a and @b */ struct tnum tnum_intersect(struct tnum a, struct tnum b); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 67226145a4db..fe251f1ff703 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1848,10 +1848,18 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) ALU64_MOV_K: DST = IMM; CONT; - LD_IMM_DW: - DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32; + LD_IMM_DW: { + u64 address = (u64)(u32)insn[0].imm | ((u64)(u32)insn[1].imm) << 32; + + if (insn[0].src_reg == BPF_PSEUDO_MAP_ORACLE) { + oracle_test((struct bpf_map *)address, regs); + insn++; + CONT; + } + DST = address; insn++; CONT; + } ALU_ARSH_X: DST = (u64) (u32) (((s32) DST) >> (SRC & 31)); CONT; diff --git a/kernel/bpf/oracle.c b/kernel/bpf/oracle.c index 44b86e6ef3b2..ce330853b53f 100644 --- a/kernel/bpf/oracle.c +++ b/kernel/bpf/oracle.c @@ -8,6 +8,8 @@ #include +#define REGS_FMT_BUF_LEN 221 + static void convert_oracle_state(struct bpf_verifier_state *istate, struct bpf_oracle_state *ostate) { struct bpf_func_state *frame = istate->frame[istate->curframe]; @@ -320,3 +322,66 @@ int create_and_populate_oracle_map(struct bpf_verifier_env *env) return populate_oracle_map(env, oracle_map); } + +static bool oracle_test_reg(struct bpf_reg_oracle_state *exp, u64 reg) +{ + if (exp->scalar) { + if (reg < exp->umin_value || reg > exp->umax_value || + (s64)reg < exp->smin_value || (s64)reg > exp->smax_value || + (u32)reg < exp->u32_min_value || (u32)reg > exp->u32_max_value || + (s32)reg < exp->s32_min_value || (s32)reg > exp->s32_max_value || + !tnum_match(exp->var_off, reg)) + return true; + } else if (exp->ptr_not_null && !reg) { + return true; + } + return false; +} + +static bool oracle_test_state(struct bpf_oracle_state *state, u64 *regs, u32 *non_match_regs) +{ + int i; + + for (i = 0; i < MAX_BPF_REG - 1; i++) { + if (oracle_test_reg(&state->regs[i], regs[i])) { + *non_match_regs |= 1 << i; + return true; + } + } + + return false; +} + +static void format_non_match_regs(u32 non_match_regs, u64 *regs, char *buf) +{ + int i, delta = 0; + + for (i = 0; i < MAX_BPF_REG - 1; i++) { + if (non_match_regs & (1 << i)) { + delta += snprintf(buf + delta, REGS_FMT_BUF_LEN - delta, "r%d=%#llx ", + i, regs[i]); + } + } +} + +void oracle_test(struct bpf_map *oracle_states, u64 *regs) +{ + struct bpf_oracle_state *state; + u32 non_match_regs = 0; + char regs_fmt[REGS_FMT_BUF_LEN]; + bool expected = false; + int i; + + for (i = 0; i < oracle_states->max_entries; i++) { + state = oracle_states->ops->map_lookup_elem(oracle_states, &i); + if (!oracle_test_state(state, regs, &non_match_regs)) { + expected = true; + break; + } + } + if (!expected) { + format_non_match_regs(non_match_regs, regs, regs_fmt); + BPF_WARN_ONCE(1, "oracle caught invalid states in oracle_map[id:%d]: %s\n", + oracle_states->id, regs_fmt); + } +} diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c index f8e70e9c3998..afe7adf6a6f5 100644 --- a/kernel/bpf/tnum.c +++ b/kernel/bpf/tnum.c @@ -167,6 +167,11 @@ bool tnum_overlap(struct tnum a, struct tnum b) return (a.value & mu) == (b.value & mu); } +bool tnum_match(struct tnum a, u64 b) +{ + return (a.value & ~a.mask) == (b & ~a.mask); +} + /* Note that if a and b disagree - i.e. one has a 'known 1' where the other has * a 'known 0' - this will return a 'known 1' for that bit. */ -- 2.43.0 This patch puts all BPF oracle logic behind a new BPF_ORACLE kernel config. At the moment, this config requires CONFIG_BPF_JIT_ALWAYS_ON to be disabled as the oracle only runs in the interpreter. Signed-off-by: Paul Chaignon --- kernel/bpf/Kconfig | 14 ++++++++++++++ kernel/bpf/Makefile | 3 ++- kernel/bpf/core.c | 2 ++ kernel/bpf/verifier.c | 6 ++++++ 4 files changed, 24 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig index eb3de35734f0..390db0bcca3d 100644 --- a/kernel/bpf/Kconfig +++ b/kernel/bpf/Kconfig @@ -101,4 +101,18 @@ config BPF_LSM If you are unsure how to answer this question, answer N. +config BPF_ORACLE + bool "Enable BPF test oracle" + depends on BPF_SYSCALL + depends on DEBUG_KERNEL + depends on !BPF_JIT_ALWAYS_ON + default n + help + Enable the BPF test oracle to compare concrete runtime values of + registers with their verification-time bounds. This will throw a kernel + warning if the runtime values don't match the expected bounds from the + verifier. + + If you are unsure how to answer this question, answer N. + endmenu # "BPF subsystem" diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index b94c9af3288a..647ff7cb86b9 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse endif CFLAGS_core.o += -Wno-override-init $(cflags-nogcse-yy) -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o oracle.o log.o token.o liveness.o +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o log.o token.o liveness.o obj-$(CONFIG_BPF_SYSCALL) += bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o bpf_insn_array.o @@ -56,6 +56,7 @@ obj-$(CONFIG_BPF_SYSCALL) += kmem_cache_iter.o ifeq ($(CONFIG_DMA_SHARED_BUFFER),y) obj-$(CONFIG_BPF_SYSCALL) += dmabuf_iter.o endif +obj-$(CONFIG_BPF_ORACLE) += oracle.o CFLAGS_REMOVE_percpu_freelist.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_bpf_lru_list.o = $(CC_FLAGS_FTRACE) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index fe251f1ff703..f89fdde66348 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1851,11 +1851,13 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) LD_IMM_DW: { u64 address = (u64)(u32)insn[0].imm | ((u64)(u32)insn[1].imm) << 32; +#ifdef CONFIG_BPF_ORACLE if (insn[0].src_reg == BPF_PSEUDO_MAP_ORACLE) { oracle_test((struct bpf_map *)address, regs); insn++; CONT; } +#endif DST = address; insn++; CONT; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9b39bc2ca7f1..32c0146b9add 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -20505,9 +20505,11 @@ static int do_check(struct bpf_verifier_env *env) state->insn_idx = env->insn_idx; if (is_prune_point(env, env->insn_idx)) { +#ifdef CONFIG_BPF_ORACLE err = save_state_in_oracle(env, env->insn_idx); if (err < 0) return err; +#endif err = is_state_visited(env, env->insn_idx); if (err < 0) @@ -22641,6 +22643,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) } for (i = 0; i < insn_cnt;) { +#ifdef CONFIG_BPF_ORACLE if (is_prune_point(env, i + delta)) { new_prog = patch_oracle_check_insn(env, insn, i + delta, &cnt); if (IS_ERR(new_prog)) @@ -22650,6 +22653,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env) env->prog = prog = new_prog; insn = new_prog->insnsi + i + delta; } +#endif if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) { if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) || @@ -25303,8 +25307,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (ret == 0) ret = do_misc_fixups(env); +#ifdef CONFIG_BPF_ORACLE if (ret == 0) ret = create_and_populate_oracle_map(env); +#endif /* do 32-bit optimization after insn patching has done so those patched * insns could be handled correctly. -- 2.43.0 This patch introduces minimum support in bpftool to dump and format the contents of inner oracle maps. This new "bpftool oracle dump" command is only meant to help demo and debug previous commits and is at the very least missing support for JSON output. The current output looks like: # ./bpftool oracle dump id 22 State 0: R0=scalar(u64=[0; 18446744073709551615], s64=[-9223372036854775808; 9223372036854775807], u32=[0; 4294967295], s32=[-2147483648; 2147483647], var_off=(0; 0xffffffffffffffff) R6=scalar(u64=[4294967252; 4294967252], s64=[4294967252; 4294967252], u32=[4294967252; 4294967252], s32=[-44; -44], var_off=(0xffffffd4; 0) Found 1 state Signed-off-by: Paul Chaignon --- tools/bpf/bpftool/main.c | 3 +- tools/bpf/bpftool/main.h | 1 + tools/bpf/bpftool/oracle.c | 154 +++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 10 +++ 4 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 tools/bpf/bpftool/oracle.c diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index a829a6a49037..c4101d7ae965 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -64,7 +64,7 @@ static int do_help(int argc, char **argv) " %s batch file FILE\n" " %s version\n" "\n" - " OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter | token }\n" + " OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter | token | oracle }\n" " " HELP_SPEC_OPTIONS " |\n" " {-V|--version} }\n" "", @@ -81,6 +81,7 @@ static const struct cmd commands[] = { { "batch", do_batch }, { "prog", do_prog }, { "map", do_map }, + { "oracle", do_oracle }, { "link", do_link }, { "cgroup", do_cgroup }, { "perf", do_perf }, diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 1130299cede0..9ee613d351a4 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -166,6 +166,7 @@ int do_btf(int argc, char **argv); /* non-bootstrap only commands */ int do_prog(int argc, char **arg) __weak; int do_map(int argc, char **arg) __weak; +int do_oracle(int argc, char **arg) __weak; int do_link(int argc, char **arg) __weak; int do_event_pipe(int argc, char **argv) __weak; int do_cgroup(int argc, char **arg) __weak; diff --git a/tools/bpf/bpftool/oracle.c b/tools/bpf/bpftool/oracle.c new file mode 100644 index 000000000000..c0a518ff5ee2 --- /dev/null +++ b/tools/bpf/bpftool/oracle.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +#include "main.h" + +struct tnum { + __u64 value; + __u64 mask; +}; + +struct bpf_reg_oracle_state { + bool scalar; + bool ptr_not_null; + + struct tnum var_off; + __s64 smin_value; + __s64 smax_value; + __u64 umin_value; + __u64 umax_value; + __s32 s32_min_value; + __s32 s32_max_value; + __u32 u32_min_value; + __u32 u32_max_value; +}; + +struct bpf_oracle_state { + struct bpf_reg_oracle_state regs[MAX_BPF_REG - 1]; +}; + +static void print_register_state(int i, struct bpf_reg_oracle_state *reg) +{ + if (!reg->scalar && !reg->ptr_not_null) + return; + + printf("R%d=", i); + if (reg->scalar) { + printf("scalar(u64=[%llu; %llu], s64=[%lld; %lld], u32=[%u; %u], s32=[%d; %d]", + reg->umin_value, reg->umax_value, reg->smin_value, reg->smax_value, + reg->u32_min_value, reg->u32_max_value, reg->s32_min_value, + reg->s32_max_value); + printf(", var_off=(%#llx; %#llx)", reg->var_off.value, reg->var_off.mask); + } else if (reg->ptr_not_null) { + printf("ptr"); + } else { + printf("unknown"); + } + printf("\n"); +} + +static int +oracle_map_dump(int fd, struct bpf_map_info *info, bool show_header) +{ + struct bpf_oracle_state value = {}; + unsigned int num_elems = 0; + __u32 key, *prev_key = NULL; + int err, i; + + while (true) { + err = bpf_map_get_next_key(fd, prev_key, &key); + if (err) { + if (errno == ENOENT) + err = 0; + break; + } + if (bpf_map_lookup_elem(fd, &key, &value)) { + printf(""); + continue; + } + printf("State %u:\n", key); + for (i = 0; i < MAX_BPF_REG - 1; i++) + print_register_state(i, &value.regs[i]); + printf("\n"); + num_elems++; + prev_key = &key; + } + + printf("Found %u state%s\n", num_elems, + num_elems != 1 ? "s" : ""); + + close(fd); + return err; +} + +static int do_dump(int argc, char **argv) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + int nb_fds, i, err; + int *fds = NULL; + + fds = malloc(sizeof(int)); + if (!fds) { + p_err("mem alloc failed"); + return -1; + } + nb_fds = map_parse_fds(&argc, &argv, &fds, BPF_F_RDONLY); + if (nb_fds < 1) + goto exit_free; + + for (i = 0; i < nb_fds; i++) { + if (bpf_map_get_info_by_fd(fds[i], &info, &len)) { + p_err("can't get map info: %s", strerror(errno)); + break; + } + if (info.type != BPF_MAP_TYPE_ARRAY || info.key_size != sizeof(__u32) || + info.value_size != sizeof(struct bpf_oracle_state)) { + p_err("not an oracle map"); + break; + } + err = oracle_map_dump(fds[i], &info, nb_fds > 1); + if (i != nb_fds - 1) + printf("\n"); + + if (err) + break; + close(fds[i]); + } + + for (; i < nb_fds; i++) + close(fds[i]); +exit_free: + free(fds); + return 0; +} + +static int do_help(int argc, char **argv) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %1$s %2$s dump MAP\n" + " %1$s %2$s help\n" + "\n" + " " HELP_SPEC_MAP "\n" + " " HELP_SPEC_OPTIONS " |\n" + " {-f|--bpffs} | {-n|--nomount} }\n" + "", + bin_name, argv[-2]); + + return 0; +} + +static const struct cmd cmds[] = { + { "help", do_help }, + { "dump", do_dump }, + { 0 } +}; + +int do_oracle(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6b92b0847ec2..f19dba37ea7d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1345,6 +1345,16 @@ enum { #define BPF_PSEUDO_MAP_VALUE 2 #define BPF_PSEUDO_MAP_IDX_VALUE 6 +/* Internal only. + * insn[0].dst_reg: 0 + * insn[0].src_reg: BPF_PSEUDO_MAP_ORACLE + * insn[0].imm: address of oracle state list + * insn[1].imm: address of oracle state list + * insn[0].off: 0 + * insn[1].off: 0 + */ +#define BPF_PSEUDO_MAP_ORACLE 7 + /* insn[0].src_reg: BPF_PSEUDO_BTF_ID * insn[0].imm: kernel btd id of VAR * insn[1].imm: 0 -- 2.43.0