From: Kaitao Cheng If a user holds ownership of a node in the middle of a list, they can directly remove it from the list without strictly adhering to deletion rules from the head or tail. We have added an additional parameter bpf_list_head *head to bpf_list_del, as the verifier requires the head parameter to check whether the lock is being held. This is typically paired with bpf_refcount. After calling bpf_list_del, it is generally necessary to drop the reference to the list node twice to prevent reference count leaks. Signed-off-by: Kaitao Cheng --- kernel/bpf/helpers.c | 27 +++++++++++++++++++++++++++ kernel/bpf/verifier.c | 6 +++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6eb6c82ed2ee..cc1a096a1f64 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2459,6 +2459,32 @@ __bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) return __bpf_list_del(head, true); } +__bpf_kfunc struct bpf_list_node *bpf_list_del(struct bpf_list_head *head, + struct bpf_list_node *node) +{ + struct bpf_list_node_kern *knode = (struct bpf_list_node_kern *)node; + struct list_head *h = (void *)head; + + /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't + * called on its fields, so init here + */ + if (unlikely(!h->next)) { + INIT_LIST_HEAD(h); + return NULL; + } + + if (unlikely(!knode)) + return NULL; + + if (WARN_ON_ONCE(READ_ONCE(knode->owner) != head)) + return NULL; + + list_del_init(&knode->list_head); + WRITE_ONCE(knode->owner, NULL); + + return node; +} + __bpf_kfunc struct bpf_list_node *bpf_list_front(struct bpf_list_head *head) { struct list_head *h = (struct list_head *)head; @@ -4545,6 +4571,7 @@ BTF_ID_FLAGS(func, bpf_list_push_front_impl) BTF_ID_FLAGS(func, bpf_list_push_back_impl) BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_list_del, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_front, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_back, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 67c09b43a497..c9557d3fb8dd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12461,6 +12461,7 @@ enum special_kfunc_type { KF_bpf_list_push_back_impl, KF_bpf_list_pop_front, KF_bpf_list_pop_back, + KF_bpf_list_del, KF_bpf_list_front, KF_bpf_list_back, KF_bpf_cast_to_kern_ctx, @@ -12521,6 +12522,7 @@ BTF_ID(func, bpf_list_push_front_impl) BTF_ID(func, bpf_list_push_back_impl) BTF_ID(func, bpf_list_pop_front) BTF_ID(func, bpf_list_pop_back) +BTF_ID(func, bpf_list_del) BTF_ID(func, bpf_list_front) BTF_ID(func, bpf_list_back) BTF_ID(func, bpf_cast_to_kern_ctx) @@ -12996,6 +12998,7 @@ static bool is_bpf_list_api_kfunc(u32 btf_id) btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] || btf_id == special_kfunc_list[KF_bpf_list_pop_front] || btf_id == special_kfunc_list[KF_bpf_list_pop_back] || + btf_id == special_kfunc_list[KF_bpf_list_del] || btf_id == special_kfunc_list[KF_bpf_list_front] || btf_id == special_kfunc_list[KF_bpf_list_back]; } @@ -13118,7 +13121,8 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env, switch (node_field_type) { case BPF_LIST_NODE: ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] || - kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl]); + kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] || + kfunc_btf_id == special_kfunc_list[KF_bpf_list_del]); break; case BPF_RB_NODE: ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] || -- 2.50.1 (Apple Git-155) From: Kaitao Cheng Add a new kfunc bpf_list_add_impl(head, new, prev, meta, off) that inserts 'new' after 'prev' in the BPF linked list. Both must be in the same list; 'prev' must already be in the list. The new node must be an owning reference (e.g. from bpf_obj_new); the kfunc consumes that reference and the node becomes non-owning once inserted. We have added an additional parameter bpf_list_head *head to bpf_list_add_impl, as the verifier requires the head parameter to check whether the lock is being held. Returns 0 on success, -EINVAL if 'prev' is not in a list or 'new' is already in a list (or duplicate insertion). On failure, the kernel drops the passed-in node. Signed-off-by: Kaitao Cheng --- kernel/bpf/helpers.c | 34 ++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 17 +++++++++++++---- 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index cc1a096a1f64..740b53024283 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2505,6 +2505,39 @@ __bpf_kfunc struct bpf_list_node *bpf_list_back(struct bpf_list_head *head) return (struct bpf_list_node *)h->prev; } +__bpf_kfunc int bpf_list_add_impl(struct bpf_list_head *head, + struct bpf_list_node *new, + struct bpf_list_node *prev, + void *meta__ign, u64 off) +{ + struct bpf_list_node_kern *kn = (void *)new, *kp = (void *)prev; + struct btf_struct_meta *meta = meta__ign; + struct list_head *n = &kn->list_head, *p = &kp->list_head; + struct list_head *h = (void *)head; + + /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't + * called on its fields, so init here + */ + if (unlikely(!h->next)) { + INIT_LIST_HEAD(h); + goto fail; + } + + if (WARN_ON_ONCE(READ_ONCE(kp->owner) != head)) + goto fail; + + if (cmpxchg(&kn->owner, NULL, BPF_PTR_POISON)) + goto fail; + + list_add(n, p); + WRITE_ONCE(kn->owner, head); + return 0; + +fail: + __bpf_obj_drop_impl((void *)n - off, meta ? meta->record : NULL, false); + return -EINVAL; +} + __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, struct bpf_rb_node *node) { @@ -4574,6 +4607,7 @@ BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_del, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_front, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_back, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_list_add_impl) BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c9557d3fb8dd..e458cf3b1dd1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12464,6 +12464,7 @@ enum special_kfunc_type { KF_bpf_list_del, KF_bpf_list_front, KF_bpf_list_back, + KF_bpf_list_add_impl, KF_bpf_cast_to_kern_ctx, KF_bpf_rdonly_cast, KF_bpf_rcu_read_lock, @@ -12525,6 +12526,7 @@ BTF_ID(func, bpf_list_pop_back) BTF_ID(func, bpf_list_del) BTF_ID(func, bpf_list_front) BTF_ID(func, bpf_list_back) +BTF_ID(func, bpf_list_add_impl) BTF_ID(func, bpf_cast_to_kern_ctx) BTF_ID(func, bpf_rdonly_cast) BTF_ID(func, bpf_rcu_read_lock) @@ -13000,7 +13002,8 @@ static bool is_bpf_list_api_kfunc(u32 btf_id) btf_id == special_kfunc_list[KF_bpf_list_pop_back] || btf_id == special_kfunc_list[KF_bpf_list_del] || btf_id == special_kfunc_list[KF_bpf_list_front] || - btf_id == special_kfunc_list[KF_bpf_list_back]; + btf_id == special_kfunc_list[KF_bpf_list_back] || + btf_id == special_kfunc_list[KF_bpf_list_add_impl]; } static bool is_bpf_rbtree_api_kfunc(u32 btf_id) @@ -13122,7 +13125,8 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env, case BPF_LIST_NODE: ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] || kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] || - kfunc_btf_id == special_kfunc_list[KF_bpf_list_del]); + kfunc_btf_id == special_kfunc_list[KF_bpf_list_del] || + kfunc_btf_id == special_kfunc_list[KF_bpf_list_add_impl]); break; case BPF_RB_NODE: ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] || @@ -14264,6 +14268,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] || meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] || + meta.func_id == special_kfunc_list[KF_bpf_list_add_impl] || meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { release_ref_obj_id = regs[BPF_REG_2].ref_obj_id; insn_aux->insert_off = regs[BPF_REG_2].off; @@ -23230,13 +23235,17 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, *cnt = 3; } else if (desc->func_id == special_kfunc_list[KF_bpf_list_push_back_impl] || desc->func_id == special_kfunc_list[KF_bpf_list_push_front_impl] || + desc->func_id == special_kfunc_list[KF_bpf_list_add_impl] || desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; int struct_meta_reg = BPF_REG_3; int node_offset_reg = BPF_REG_4; - /* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */ - if (desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { + /* list/rbtree_add_impl have an extra arg (prev/less), + * so args-to-fixup are in different regs. + */ + if (desc->func_id == special_kfunc_list[KF_bpf_list_add_impl] || + desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { struct_meta_reg = BPF_REG_4; node_offset_reg = BPF_REG_5; } -- 2.50.1 (Apple Git-155) From: Kaitao Cheng Add three kfuncs for BPF linked list queries: - bpf_list_node_is_edge(head, node, is_first): true if node is the first (when is_first is true) or the last (when is_first is false) in the list. - bpf_list_empty(head): true if the list has no entries. In previous versions, to implement the above functionality, it was necessary to first call bpf_list_pop_front/back to retrieve the first or last node before checking whether the passed-in node was the first or last one. After the check, the node had to be pushed back into the list using bpf_list_push_front/back, which was very inefficient. Now, with the bpf_list_is_edge/empty kfuncs, we can directly check whether a node is the first, last, or whether the list is empty, without having to first retrieve the node. Signed-off-by: Kaitao Cheng --- kernel/bpf/helpers.c | 37 +++++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 11 +++++++++-- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 740b53024283..3d22b6080185 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2538,6 +2538,41 @@ __bpf_kfunc int bpf_list_add_impl(struct bpf_list_head *head, return -EINVAL; } +__bpf_kfunc bool bpf_list_node_is_edge(struct bpf_list_head *head, + struct bpf_list_node *node, bool is_first) +{ + struct list_head *h = (struct list_head *)head, *edge; + struct bpf_list_node_kern *n = (struct bpf_list_node_kern *)node; + + /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't + * called on its fields, so init here + */ + if (unlikely(!h->next)) + INIT_LIST_HEAD(h); + + if (list_empty(h)) + return false; + + if (READ_ONCE(n->owner) != head) + return false; + + edge = is_first ? h->next : h->prev; + return edge == &n->list_head; +} + +__bpf_kfunc bool bpf_list_empty(struct bpf_list_head *head) +{ + struct list_head *h = (struct list_head *)head; + + /* If list_head was 0-initialized by map, bpf_obj_init_field wasn't + * called on its fields, so init here + */ + if (unlikely(!h->next)) + INIT_LIST_HEAD(h); + + return list_empty(h); +} + __bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, struct bpf_rb_node *node) { @@ -4608,6 +4643,8 @@ BTF_ID_FLAGS(func, bpf_list_del, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_front, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_back, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_add_impl) +BTF_ID_FLAGS(func, bpf_list_node_is_edge) +BTF_ID_FLAGS(func, bpf_list_empty) BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e458cf3b1dd1..d133d18aa0cc 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12465,6 +12465,8 @@ enum special_kfunc_type { KF_bpf_list_front, KF_bpf_list_back, KF_bpf_list_add_impl, + KF_bpf_list_node_is_edge, + KF_bpf_list_empty, KF_bpf_cast_to_kern_ctx, KF_bpf_rdonly_cast, KF_bpf_rcu_read_lock, @@ -12527,6 +12529,8 @@ BTF_ID(func, bpf_list_del) BTF_ID(func, bpf_list_front) BTF_ID(func, bpf_list_back) BTF_ID(func, bpf_list_add_impl) +BTF_ID(func, bpf_list_node_is_edge) +BTF_ID(func, bpf_list_empty) BTF_ID(func, bpf_cast_to_kern_ctx) BTF_ID(func, bpf_rdonly_cast) BTF_ID(func, bpf_rcu_read_lock) @@ -13003,7 +13007,9 @@ static bool is_bpf_list_api_kfunc(u32 btf_id) btf_id == special_kfunc_list[KF_bpf_list_del] || btf_id == special_kfunc_list[KF_bpf_list_front] || btf_id == special_kfunc_list[KF_bpf_list_back] || - btf_id == special_kfunc_list[KF_bpf_list_add_impl]; + btf_id == special_kfunc_list[KF_bpf_list_add_impl] || + btf_id == special_kfunc_list[KF_bpf_list_node_is_edge] || + btf_id == special_kfunc_list[KF_bpf_list_empty]; } static bool is_bpf_rbtree_api_kfunc(u32 btf_id) @@ -13126,7 +13132,8 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env, ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] || kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] || kfunc_btf_id == special_kfunc_list[KF_bpf_list_del] || - kfunc_btf_id == special_kfunc_list[KF_bpf_list_add_impl]); + kfunc_btf_id == special_kfunc_list[KF_bpf_list_add_impl] || + kfunc_btf_id == special_kfunc_list[KF_bpf_list_node_is_edge]); break; case BPF_RB_NODE: ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] || -- 2.50.1 (Apple Git-155) From: Kaitao Cheng Extend the refcounted_kptr test: add a node to both an rbtree and a list, retrieve the node from the rbtree to obtain the node pointer, then add a new node after the first in the list, and finally use bpf_list_del to remove both nodes. The test asserts that the list is non-empty after insert, asserts the first and last nodes after bpf_list_add, and asserts that the list is empty after removing both nodes. To verify the validity of bpf_list_del/add, the test also expects the verifier to reject calls to bpf_list_del/add made without holding the spin_lock. Signed-off-by: Kaitao Cheng --- .../testing/selftests/bpf/bpf_experimental.h | 39 ++++ .../selftests/bpf/progs/refcounted_kptr.c | 182 ++++++++++++++++++ 2 files changed, 221 insertions(+) diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 4b7210c318dd..d5f42ed69166 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -99,6 +99,45 @@ extern struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ks */ extern struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym; +/* Description + * Remove 'node' from the BPF linked list with head 'head'. + * The node must be in the list. Caller receives ownership of the + * removed node and must release it with bpf_obj_drop. + * Returns + * Pointer to the removed bpf_list_node, or NULL if 'node' is NULL + * or not in the list. + */ +extern struct bpf_list_node *bpf_list_del(struct bpf_list_head *head, + struct bpf_list_node *node) __ksym; + +/* Description + * Insert 'new' after 'prev' in the BPF linked list with head 'head'. + * The bpf_spin_lock protecting the list must be held. 'prev' must already + * be in that list; 'new' must not be in any list. The 'meta' and 'off' + * parameters are rewritten by the verifier, no need for BPF programs to + * set them. + * Returns + * 0 on success, -EINVAL if head is NULL, prev is not in the list with head, + * or new is already in a list. + */ +extern int bpf_list_add_impl(struct bpf_list_head *head, struct bpf_list_node *new, + struct bpf_list_node *prev, void *meta, __u64 off) __ksym; + +/* Convenience macro to wrap over bpf_list_add_impl */ +#define bpf_list_add(head, new, prev) bpf_list_add_impl(head, new, prev, NULL, 0) + +/* Description + * Return true if 'node' is the first (when 'is_first' is true) or the last + * (when 'is_first' is false) node in the list with head 'head'. + */ +extern bool bpf_list_node_is_edge(struct bpf_list_head *head, + struct bpf_list_node *node, bool is_first) __ksym; + +/* Description + * Return true if the list with head 'head' has no entries. + */ +extern bool bpf_list_empty(struct bpf_list_head *head) __ksym; + /* Description * Remove 'node' from rbtree with root 'root' * Returns diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c index 1aca85d86aeb..aca201f9fb56 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c @@ -367,6 +367,188 @@ long insert_rbtree_and_stash__del_tree_##rem_tree(void *ctx) \ INSERT_STASH_READ(true, "insert_stash_read: remove from tree"); INSERT_STASH_READ(false, "insert_stash_read: don't remove from tree"); +/* Insert one node in tree and list, remove it from tree, add a second node + * after it with bpf_list_add, check bpf_list_node_is_edge/empty, then + * remove both nodes from list via bpf_list_del. + */ +SEC("tc") +__description("list_add_del_and_check: test bpf_list_add/del/is_first/is_last/empty") +__success __retval(0) +long list_add_del_and_check(void *ctx) +{ + long err = 0; + struct bpf_rb_node *rb; + struct bpf_list_node *l, *l_1; + struct node_data *n, *n_1, *m_1; + + err = __insert_in_tree_and_list(&head, &root, &lock); + if (err) + return err; + + bpf_spin_lock(&lock); + if (bpf_list_empty(&head)) { + bpf_spin_unlock(&lock); + return -7; + } + + rb = bpf_rbtree_first(&root); + if (!rb) { + bpf_spin_unlock(&lock); + return -4; + } + + rb = bpf_rbtree_remove(&root, rb); + bpf_spin_unlock(&lock); + if (!rb) + return -5; + + n = container_of(rb, struct node_data, r); + n_1 = bpf_obj_new(typeof(*n_1)); + if (!n_1) { + bpf_obj_drop(n); + return -1; + } + m_1 = bpf_refcount_acquire(n_1); + if (!m_1) { + bpf_obj_drop(n); + bpf_obj_drop(n_1); + return -1; + } + + bpf_spin_lock(&lock); + if (bpf_list_add(&head, &n_1->l, &n->l)) { + bpf_spin_unlock(&lock); + bpf_obj_drop(n); + bpf_obj_drop(m_1); + return -8; + } + + if (!bpf_list_node_is_edge(&head, &n->l, true) || + !bpf_list_node_is_edge(&head, &m_1->l, false)) { + bpf_spin_unlock(&lock); + bpf_obj_drop(n); + bpf_obj_drop(m_1); + return -9; + } + + l = bpf_list_del(&head, &n->l); + l_1 = bpf_list_del(&head, &m_1->l); + bpf_spin_unlock(&lock); + bpf_obj_drop(n); + bpf_obj_drop(m_1); + + if (l) + bpf_obj_drop(container_of(l, struct node_data, l)); + else + err = -6; + + if (l_1) + bpf_obj_drop(container_of(l_1, struct node_data, l)); + else + err = -6; + + bpf_spin_lock(&lock); + if (!bpf_list_empty(&head)) + err = -7; + bpf_spin_unlock(&lock); + return err; +} + +SEC("?tc") +__failure __msg("bpf_spin_lock at off=32 must be held for bpf_list_head") +long list_del_without_lock_fail(void *ctx) +{ + struct bpf_rb_node *rb; + struct bpf_list_node *l; + struct node_data *n; + + bpf_spin_lock(&lock); + rb = bpf_rbtree_first(&root); + if (!rb) { + bpf_spin_unlock(&lock); + return -4; + } + + rb = bpf_rbtree_remove(&root, rb); + bpf_spin_unlock(&lock); + if (!rb) + return -5; + + n = container_of(rb, struct node_data, r); + l = bpf_list_del(&head, &n->l); + bpf_obj_drop(n); + if (!l) + return -6; + + bpf_obj_drop(container_of(l, struct node_data, l)); + return 0; +} + +SEC("?tc") +__failure __msg("bpf_spin_lock at off=32 must be held for bpf_list_head") +long list_add_without_lock_fail(void *ctx) +{ + long err = 0; + struct bpf_rb_node *rb; + struct bpf_list_node *l, *l_1; + struct node_data *n, *n_1, *m_1; + + err = __insert_in_tree_and_list(&head, &root, &lock); + if (err) + return err; + + bpf_spin_lock(&lock); + rb = bpf_rbtree_first(&root); + if (!rb) { + bpf_spin_unlock(&lock); + return -4; + } + + rb = bpf_rbtree_remove(&root, rb); + bpf_spin_unlock(&lock); + if (!rb) + return -5; + + n = container_of(rb, struct node_data, r); + n_1 = bpf_obj_new(typeof(*n_1)); + if (!n_1) { + bpf_obj_drop(n); + return -1; + } + m_1 = bpf_refcount_acquire(n_1); + if (!m_1) { + bpf_obj_drop(n); + bpf_obj_drop(n_1); + return -1; + } + + /* Intentionally no lock: verifier should reject bpf_list_add without lock */ + if (bpf_list_add(&head, &n_1->l, &n->l)) { + bpf_obj_drop(n); + bpf_obj_drop(m_1); + return -8; + } + + bpf_spin_lock(&lock); + l = bpf_list_del(&head, &n->l); + l_1 = bpf_list_del(&head, &m_1->l); + bpf_spin_unlock(&lock); + bpf_obj_drop(n); + bpf_obj_drop(m_1); + + if (l) + bpf_obj_drop(container_of(l, struct node_data, l)); + else + err = -6; + + if (l_1) + bpf_obj_drop(container_of(l_1, struct node_data, l)); + else + err = -6; + + return err; +} + SEC("tc") __success long rbtree_refcounted_node_ref_escapes(void *ctx) -- 2.50.1 (Apple Git-155) From: Kaitao Cheng Replace per-kfunc btf_id chains in list/rbtree/res_lock and graph node checks with btf_id_in_kfunc_table() and static kfunc tables for easier maintenance. Signed-off-by: Kaitao Cheng --- kernel/bpf/verifier.c | 93 +++++++++++++++++++++++++++++-------------- 1 file changed, 64 insertions(+), 29 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d133d18aa0cc..25961cf83a67 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12600,6 +12600,60 @@ BTF_ID(func, bpf_session_is_return) BTF_ID(func, bpf_stream_vprintk) BTF_ID(func, bpf_stream_print_stack) +static const enum special_kfunc_type bpf_list_api_kfuncs[] = { + KF_bpf_list_push_front_impl, + KF_bpf_list_push_back_impl, + KF_bpf_list_pop_front, + KF_bpf_list_pop_back, + KF_bpf_list_del, + KF_bpf_list_front, + KF_bpf_list_back, + KF_bpf_list_add_impl, + KF_bpf_list_node_is_edge, + KF_bpf_list_empty, +}; + +/* Kfuncs that take a list node argument (bpf_list_node *). */ +static const enum special_kfunc_type bpf_list_node_api_kfuncs[] = { + KF_bpf_list_push_front_impl, + KF_bpf_list_push_back_impl, + KF_bpf_list_del, + KF_bpf_list_add_impl, + KF_bpf_list_node_is_edge, +}; + +/* Kfuncs that take an rbtree node argument (bpf_rb_node *). */ +static const enum special_kfunc_type bpf_rbtree_node_api_kfuncs[] = { + KF_bpf_rbtree_remove, + KF_bpf_rbtree_add_impl, + KF_bpf_rbtree_left, + KF_bpf_rbtree_right, +}; + +static const enum special_kfunc_type bpf_rbtree_api_kfuncs[] = { + KF_bpf_rbtree_add_impl, + KF_bpf_rbtree_remove, + KF_bpf_rbtree_first, + KF_bpf_rbtree_root, + KF_bpf_rbtree_left, + KF_bpf_rbtree_right, +}; + +static const enum special_kfunc_type bpf_res_spin_lock_kfuncs[] = { + KF_bpf_res_spin_lock, + KF_bpf_res_spin_unlock, + KF_bpf_res_spin_lock_irqsave, + KF_bpf_res_spin_unlock_irqrestore, +}; + +static bool btf_id_in_kfunc_table(u32 btf_id, const enum special_kfunc_type *kfuncs, int n) +{ + for (int i = 0; i < n; i++) + if (btf_id == special_kfunc_list[kfuncs[i]]) + return true; + return false; +} + static bool is_task_work_add_kfunc(u32 func_id) { return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] || @@ -13000,26 +13054,14 @@ static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_ static bool is_bpf_list_api_kfunc(u32 btf_id) { - return btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] || - btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] || - btf_id == special_kfunc_list[KF_bpf_list_pop_front] || - btf_id == special_kfunc_list[KF_bpf_list_pop_back] || - btf_id == special_kfunc_list[KF_bpf_list_del] || - btf_id == special_kfunc_list[KF_bpf_list_front] || - btf_id == special_kfunc_list[KF_bpf_list_back] || - btf_id == special_kfunc_list[KF_bpf_list_add_impl] || - btf_id == special_kfunc_list[KF_bpf_list_node_is_edge] || - btf_id == special_kfunc_list[KF_bpf_list_empty]; + return btf_id_in_kfunc_table(btf_id, bpf_list_api_kfuncs, + ARRAY_SIZE(bpf_list_api_kfuncs)); } static bool is_bpf_rbtree_api_kfunc(u32 btf_id) { - return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] || - btf_id == special_kfunc_list[KF_bpf_rbtree_remove] || - btf_id == special_kfunc_list[KF_bpf_rbtree_first] || - btf_id == special_kfunc_list[KF_bpf_rbtree_root] || - btf_id == special_kfunc_list[KF_bpf_rbtree_left] || - btf_id == special_kfunc_list[KF_bpf_rbtree_right]; + return btf_id_in_kfunc_table(btf_id, bpf_rbtree_api_kfuncs, + ARRAY_SIZE(bpf_rbtree_api_kfuncs)); } static bool is_bpf_iter_num_api_kfunc(u32 btf_id) @@ -13037,10 +13079,8 @@ static bool is_bpf_graph_api_kfunc(u32 btf_id) static bool is_bpf_res_spin_lock_kfunc(u32 btf_id) { - return btf_id == special_kfunc_list[KF_bpf_res_spin_lock] || - btf_id == special_kfunc_list[KF_bpf_res_spin_unlock] || - btf_id == special_kfunc_list[KF_bpf_res_spin_lock_irqsave] || - btf_id == special_kfunc_list[KF_bpf_res_spin_unlock_irqrestore]; + return btf_id_in_kfunc_table(btf_id, bpf_res_spin_lock_kfuncs, + ARRAY_SIZE(bpf_res_spin_lock_kfuncs)); } static bool is_bpf_arena_kfunc(u32 btf_id) @@ -13129,17 +13169,12 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env, switch (node_field_type) { case BPF_LIST_NODE: - ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] || - kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] || - kfunc_btf_id == special_kfunc_list[KF_bpf_list_del] || - kfunc_btf_id == special_kfunc_list[KF_bpf_list_add_impl] || - kfunc_btf_id == special_kfunc_list[KF_bpf_list_node_is_edge]); + ret = btf_id_in_kfunc_table(kfunc_btf_id, bpf_list_node_api_kfuncs, + ARRAY_SIZE(bpf_list_node_api_kfuncs)); break; case BPF_RB_NODE: - ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] || - kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] || - kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] || - kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]); + ret = btf_id_in_kfunc_table(kfunc_btf_id, bpf_rbtree_node_api_kfuncs, + ARRAY_SIZE(bpf_rbtree_node_api_kfuncs)); break; default: verbose(env, "verifier internal error: unexpected graph node argument type %s\n", -- 2.50.1 (Apple Git-155)