Take care of rqspinlock error in bpf_local_storage_{map_free, destroy}() properly by switching to bpf_selem_unlink_lockless(). Pass reuse_now == false when calling bpf_selem_free_list() since both callers iterate lists of selem without lock. An selem can only be freed after an RCU grace period. Similarly, SDATA(selem)->smap and selem->local_storage need to be protected by RCU as well since a caller can update these fields which may also be seen by the other at the same time. Pass reuse_now == false when calling bpf_local_storage_free(). The local storage map is already protected as bpf_local_storage_map_free() waits for an RCU grace period after iterating b->list and before freeing itself. Co-developed-by: Martin KaFai Lau Signed-off-by: Martin KaFai Lau Signed-off-by: Amery Hung --- include/linux/bpf_local_storage.h | 2 +- kernel/bpf/bpf_cgrp_storage.c | 1 + kernel/bpf/bpf_inode_storage.c | 1 + kernel/bpf/bpf_local_storage.c | 52 ++++++++++++++++++------------- kernel/bpf/bpf_task_storage.c | 1 + net/core/bpf_sk_storage.c | 7 ++++- 6 files changed, 41 insertions(+), 23 deletions(-) diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 1fd908c44fb6..14f8e5edf0a2 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -165,7 +165,7 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage, return SDATA(selem); } -void bpf_local_storage_destroy(struct bpf_local_storage *local_storage); +u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage); void bpf_local_storage_map_free(struct bpf_map *map, struct bpf_local_storage_cache *cache); diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c index 853183eead2c..9289b0c3fae9 100644 --- a/kernel/bpf/bpf_cgrp_storage.c +++ b/kernel/bpf/bpf_cgrp_storage.c @@ -28,6 +28,7 @@ void bpf_cgrp_storage_free(struct cgroup *cgroup) goto out; bpf_local_storage_destroy(local_storage); + RCU_INIT_POINTER(cgroup->bpf_cgrp_storage, NULL); out: rcu_read_unlock(); } diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 470f4b02c79e..120354ef0bf8 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -69,6 +69,7 @@ void bpf_inode_storage_free(struct inode *inode) goto out; bpf_local_storage_destroy(local_storage); + RCU_INIT_POINTER(bsb->storage, NULL); out: rcu_read_unlock_migrate(); } diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 4c682d5aef7f..f63b3c2241f0 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -797,13 +797,22 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map, return 0; } -void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) +/* + * Destroy local storage when the owner is going away. Caller must clear owner->storage + * and uncharge memory if memory charging is used. + * + * Since smaps associated with selems may already be gone, mem_uncharge() or + * owner_storage() cannot be called in this function. Let the owner (i.e., the caller) + * do it instead. It is safe for the caller to clear owner_storage without taking + * local_storage->lock as bpf_local_storage_map_free() does not free local_storage and + * no BPF program should be running and freeing the local storage. + */ +u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage) { struct bpf_local_storage_elem *selem; - bool free_storage = false; HLIST_HEAD(free_selem_list); struct hlist_node *n; - unsigned long flags; + u32 uncharge = 0; /* Neither the bpf_prog nor the bpf_map's syscall * could be modifying the local_storage->list now. @@ -814,27 +823,22 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) * when unlinking elem from the local_storage->list and * the map's bucket->list. */ - WARN_ON(raw_res_spin_lock_irqsave(&local_storage->lock, flags)); hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { - /* Always unlink from map before unlinking from - * local_storage. - */ - WARN_ON(bpf_selem_unlink_map(selem)); - /* If local_storage list has only one element, the - * bpf_selem_unlink_storage_nolock() will return true. - * Otherwise, it will return false. The current loop iteration - * intends to remove all local storage. So the last iteration - * of the loop will set the free_cgroup_storage to true. - */ - free_storage = bpf_selem_unlink_storage_nolock( - local_storage, selem, &free_selem_list); + uncharge += selem->size; + bpf_selem_unlink_lockless(selem, &free_selem_list, BPF_LOCAL_STORAGE_DESTROY); } - raw_res_spin_unlock_irqrestore(&local_storage->lock, flags); + uncharge += sizeof(*local_storage); + local_storage->owner = NULL; - bpf_selem_free_list(&free_selem_list, true); + /* + * Need to wait an RCU gp before freeing selem and local_storage + * since bpf_local_storage_map_free() may still be referencing them. + */ + bpf_selem_free_list(&free_selem_list, false); + + bpf_local_storage_free(local_storage, false); - if (free_storage) - bpf_local_storage_free(local_storage, true); + return uncharge; } u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map) @@ -903,6 +907,7 @@ void bpf_local_storage_map_free(struct bpf_map *map, struct bpf_local_storage_map_bucket *b; struct bpf_local_storage_elem *selem; struct bpf_local_storage_map *smap; + HLIST_HEAD(free_selem_list); unsigned int i; smap = (struct bpf_local_storage_map *)map; @@ -931,7 +936,12 @@ void bpf_local_storage_map_free(struct bpf_map *map, while ((selem = hlist_entry_safe( rcu_dereference_raw(hlist_first_rcu(&b->list)), struct bpf_local_storage_elem, map_node))) { - WARN_ON(bpf_selem_unlink(selem, true)); + + bpf_selem_unlink_lockless(selem, &free_selem_list, + BPF_LOCAL_STORAGE_MAP_FREE); + + bpf_selem_free_list(&free_selem_list, false); + cond_resched_rcu(); } rcu_read_unlock(); diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index 4d53aebe6784..7b2c8d428caa 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -54,6 +54,7 @@ void bpf_task_storage_free(struct task_struct *task) goto out; bpf_local_storage_destroy(local_storage); + RCU_INIT_POINTER(task->bpf_storage, NULL); out: rcu_read_unlock(); } diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 38acbecb8ef7..64a52e57953c 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -47,13 +47,18 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) void bpf_sk_storage_free(struct sock *sk) { struct bpf_local_storage *sk_storage; + u32 uncharge; rcu_read_lock_dont_migrate(); sk_storage = rcu_dereference(sk->sk_bpf_storage); if (!sk_storage) goto out; - bpf_local_storage_destroy(sk_storage); + uncharge = bpf_local_storage_destroy(sk_storage); + if (uncharge) + atomic_sub(uncharge, &sk->sk_omem_alloc); + + RCU_INIT_POINTER(sk->sk_bpf_storage, NULL); out: rcu_read_unlock_migrate(); } -- 2.47.3