The next patch will introduce bpf_selem_unlink_nofail() to handle rqspinlock errors. bpf_selem_unlink_nofail() will allow an selem to be partially unlinked from map or local storage. Save memory allocation method in selem so that later an selem can be correctly freed even when SDATA(selem)->smap is init to NULL. In addition, keep track of memory charge to the owner in local storage so that later bpf_selem_unlink_nofail() can return the correct memory charge to the owner. Updating local_storage->mem_charge is protected by local_storage->lock. Finally, extract miscellaneous tasks performed when unlinking an selem from local_storage into bpf_selem_unlink_storage_nolock_misc(). It will be reused by bpf_selem_unlink_nofail(). This patch also takes the chance to remove local_storage->smap, which is no longer used since commit f484f4a3e058 ("bpf: Replace bpf memory allocator with kmalloc_nolock() in local storage"). Signed-off-by: Amery Hung --- include/linux/bpf_local_storage.h | 5 ++- kernel/bpf/bpf_local_storage.c | 67 ++++++++++++++++--------------- 2 files changed, 37 insertions(+), 35 deletions(-) diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index fba3354988d3..a34ed7fa81d8 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -80,7 +80,8 @@ struct bpf_local_storage_elem { * after raw_spin_unlock */ }; - /* 8 bytes hole */ + bool use_kmalloc_nolock; + /* 7 bytes hole */ /* The data is stored in another cacheline to minimize * the number of cachelines access during a cache hit. */ @@ -89,13 +90,13 @@ struct bpf_local_storage_elem { struct bpf_local_storage { struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE]; - struct bpf_local_storage_map __rcu *smap; struct hlist_head list; /* List of bpf_local_storage_elem */ void *owner; /* The object that owns the above "list" of * bpf_local_storage_elem. */ struct rcu_head rcu; rqspinlock_t lock; /* Protect adding/removing from the "list" */ + u64 mem_charge; /* Copy of mem charged to owner. Protected by "lock" */ bool use_kmalloc_nolock; }; diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 3735f79a7b55..f8cfef31e3b8 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -85,6 +85,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, if (selem) { RCU_INIT_POINTER(SDATA(selem)->smap, smap); + selem->use_kmalloc_nolock = smap->use_kmalloc_nolock; if (value) { /* No need to call check_and_init_map_value as memory is zero init */ @@ -214,7 +215,7 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem, smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); - if (!smap->use_kmalloc_nolock) { + if (!selem->use_kmalloc_nolock) { /* * No uptr will be unpin even when reuse_now == false since uptr * is only supported in task local storage, where @@ -251,6 +252,30 @@ static void bpf_selem_free_list(struct hlist_head *list, bool reuse_now) bpf_selem_free(selem, reuse_now); } +static void bpf_selem_unlink_storage_nolock_misc(struct bpf_local_storage_elem *selem, + struct bpf_local_storage_map *smap, + struct bpf_local_storage *local_storage, + bool free_local_storage) +{ + void *owner = local_storage->owner; + u32 uncharge = smap->elem_size; + + if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) == + SDATA(selem)) + RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); + + uncharge += free_local_storage ? sizeof(*local_storage) : 0; + mem_uncharge(smap, local_storage->owner, uncharge); + local_storage->mem_charge -= uncharge; + + if (free_local_storage) { + local_storage->owner = NULL; + + /* After this RCU_INIT, owner may be freed and cannot be used */ + RCU_INIT_POINTER(*owner_storage(smap, owner), NULL); + } +} + /* local_storage->lock must be held and selem->local_storage == local_storage. * The caller must ensure selem->smap is still valid to be * dereferenced for its smap->elem_size and smap->cache_idx. @@ -266,51 +291,27 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); owner = local_storage->owner; - /* All uncharging on the owner must be done first. - * The owner may be freed once the last selem is unlinked - * from local_storage. - */ - mem_uncharge(smap, owner, smap->elem_size); - free_local_storage = hlist_is_singular_node(&selem->snode, &local_storage->list); - if (free_local_storage) { - mem_uncharge(smap, owner, sizeof(struct bpf_local_storage)); - local_storage->owner = NULL; - /* After this RCU_INIT, owner may be freed and cannot be used */ - RCU_INIT_POINTER(*owner_storage(smap, owner), NULL); + bpf_selem_unlink_storage_nolock_misc(selem, smap, local_storage, + free_local_storage); - /* local_storage is not freed now. local_storage->lock is - * still held and raw_spin_unlock_bh(&local_storage->lock) - * will be done by the caller. - * - * Although the unlock will be done under - * rcu_read_lock(), it is more intuitive to - * read if the freeing of the storage is done - * after the raw_spin_unlock_bh(&local_storage->lock). - * - * Hence, a "bool free_local_storage" is returned - * to the caller which then calls then frees the storage after - * all the RCU grace periods have expired. - */ - } hlist_del_init_rcu(&selem->snode); - if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) == - SDATA(selem)) - RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); hlist_add_head(&selem->free_node, free_selem_list); - if (rcu_access_pointer(local_storage->smap) == smap) - RCU_INIT_POINTER(local_storage->smap, NULL); - return free_local_storage; } void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem) { + struct bpf_local_storage_map *smap; + + smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); + local_storage->mem_charge += smap->elem_size; + RCU_INIT_POINTER(selem->local_storage, local_storage); hlist_add_head_rcu(&selem->snode, &local_storage->list); } @@ -472,10 +473,10 @@ int bpf_local_storage_alloc(void *owner, goto uncharge; } - RCU_INIT_POINTER(storage->smap, smap); INIT_HLIST_HEAD(&storage->list); raw_res_spin_lock_init(&storage->lock); storage->owner = owner; + storage->mem_charge = sizeof(*storage); storage->use_kmalloc_nolock = smap->use_kmalloc_nolock; bpf_selem_link_storage_nolock(storage, first_selem); -- 2.47.3