From: Kaitao Cheng If a user holds ownership of a node in the middle of a list, they can directly remove it from the list without strictly adhering to deletion rules from the head or tail. This is typically paired with bpf_refcount. After calling bpf_list_del, it is generally necessary to drop the reference to the list node twice to prevent reference count leaks. Signed-off-by: Kaitao Cheng --- kernel/bpf/helpers.c | 19 +++++++++++++++++++ kernel/bpf/verifier.c | 6 +++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index db72b96f9c8c..44d9b9ea8d40 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2388,6 +2388,24 @@ __bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) return __bpf_list_del(head, true); } +__bpf_kfunc struct bpf_list_node *bpf_list_del(struct bpf_list_head *head, + struct bpf_list_node *node) +{ + struct bpf_list_node_kern *knode = (struct bpf_list_node_kern *)node; + struct list_head *h = (void *)head; + + if (unlikely(!knode)) + return NULL; + + if (WARN_ON_ONCE(READ_ONCE(knode->owner) != h)) + return NULL; + + list_del_init(&knode->list_head); + WRITE_ONCE(knode->owner, NULL); + + return node; +} + __bpf_kfunc struct bpf_list_node *bpf_list_front(struct bpf_list_head *head) { struct list_head *h = (struct list_head *)head; @@ -4404,6 +4422,7 @@ BTF_ID_FLAGS(func, bpf_list_push_front_impl) BTF_ID_FLAGS(func, bpf_list_push_back_impl) BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_list_del, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_front, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_back, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 50cb4956e5bb..dff6cc8912e4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12345,6 +12345,7 @@ enum special_kfunc_type { KF_bpf_list_push_back_impl, KF_bpf_list_pop_front, KF_bpf_list_pop_back, + KF_bpf_list_del, KF_bpf_list_front, KF_bpf_list_back, KF_bpf_cast_to_kern_ctx, @@ -12399,6 +12400,7 @@ BTF_ID(func, bpf_list_push_front_impl) BTF_ID(func, bpf_list_push_back_impl) BTF_ID(func, bpf_list_pop_front) BTF_ID(func, bpf_list_pop_back) +BTF_ID(func, bpf_list_del) BTF_ID(func, bpf_list_front) BTF_ID(func, bpf_list_back) BTF_ID(func, bpf_cast_to_kern_ctx) @@ -12862,6 +12864,7 @@ static bool is_bpf_list_api_kfunc(u32 btf_id) btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] || btf_id == special_kfunc_list[KF_bpf_list_pop_front] || btf_id == special_kfunc_list[KF_bpf_list_pop_back] || + btf_id == special_kfunc_list[KF_bpf_list_del] || btf_id == special_kfunc_list[KF_bpf_list_front] || btf_id == special_kfunc_list[KF_bpf_list_back]; } @@ -12970,7 +12973,8 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env, switch (node_field_type) { case BPF_LIST_NODE: ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] || - kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl]); + kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] || + kfunc_btf_id == special_kfunc_list[KF_bpf_list_del]); break; case BPF_RB_NODE: ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] || -- 2.50.1 (Apple Git-155) From: Kaitao Cheng Add a node to both an rbtree and a list, retrieve the node from the rbtree, use the obtained node pointer to remove it from the list, and finally free the node. Signed-off-by: Kaitao Cheng --- .../testing/selftests/bpf/bpf_experimental.h | 11 +++++ .../selftests/bpf/progs/refcounted_kptr.c | 41 +++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 2cd9165c7348..6d8175abc590 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -99,6 +99,17 @@ extern struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ks */ extern struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym; +/* Description + * Remove 'node' from the BPF linked list with head 'head'. + * The node must be in the list. Caller receives ownership of the + * removed node and must release it with bpf_obj_drop. + * Returns + * Pointer to the removed bpf_list_node, or NULL if 'node' is NULL + * or not in the list. + */ +extern struct bpf_list_node *bpf_list_del(struct bpf_list_head *head, + struct bpf_list_node *node) __ksym; + /* Description * Remove 'node' from rbtree with root 'root' * Returns diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c index 1aca85d86aeb..2cb6dd73db7a 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c @@ -367,6 +367,47 @@ long insert_rbtree_and_stash__del_tree_##rem_tree(void *ctx) \ INSERT_STASH_READ(true, "insert_stash_read: remove from tree"); INSERT_STASH_READ(false, "insert_stash_read: don't remove from tree"); +/* Insert node_data into both rbtree and list, remove from tree, then remove + * from list via bpf_list_del using the node obtained from the tree. + */ +SEC("tc") +__description("test_bpf_list_del: remove an arbitrary node from the list") +__success __retval(0) +long test_bpf_list_del(void *ctx) +{ + long err; + struct bpf_rb_node *rb; + struct bpf_list_node *l; + struct node_data *n; + + err = __insert_in_tree_and_list(&head, &root, &lock); + if (err) + return err; + + bpf_spin_lock(&lock); + rb = bpf_rbtree_first(&root); + if (!rb) { + bpf_spin_unlock(&lock); + return -4; + } + + rb = bpf_rbtree_remove(&root, rb); + bpf_spin_unlock(&lock); + if (!rb) + return -5; + n = container_of(rb, struct node_data, r); + + bpf_spin_lock(&lock); + l = bpf_list_del(&head, &n->l); + bpf_spin_unlock(&lock); + bpf_obj_drop(n); + if (!l) + return -6; + + bpf_obj_drop(container_of(l, struct node_data, l)); + return 0; +} + SEC("tc") __success long rbtree_refcounted_node_ref_escapes(void *ctx) -- 2.50.1 (Apple Git-155)