Introduce bpf_map_memcg_enter() and bpf_map_memcg_exit() helpers to reduce code duplication in memcg context management. bpf_map_memcg_enter() gets the memcg from the map, sets it as active, and returns the previous active memcg. bpf_map_memcg_exit() restores the previous active memcg and releases the reference obtained during enter. Signed-off-by: Puranjay Mohan --- include/linux/bpf.h | 15 +++++++++++++ kernel/bpf/syscall.c | 50 +++++++++++++++++++++++--------------------- 2 files changed, 41 insertions(+), 24 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4e7d72dfbcd4..4aedc3ceb482 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2608,6 +2608,10 @@ struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); int bpf_map_alloc_pages(const struct bpf_map *map, int nid, unsigned long nr_pages, struct page **page_array); #ifdef CONFIG_MEMCG +struct mem_cgroup *bpf_map_memcg_enter(const struct bpf_map *map, + struct mem_cgroup **memcg); +void bpf_map_memcg_exit(struct mem_cgroup *old_memcg, + struct mem_cgroup *memcg); void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node); void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, @@ -2632,6 +2636,17 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, kvcalloc(_n, _size, _flags) #define bpf_map_alloc_percpu(_map, _size, _align, _flags) \ __alloc_percpu_gfp(_size, _align, _flags) +static inline struct mem_cgroup *bpf_map_memcg_enter(const struct bpf_map *map, + struct mem_cgroup **memcg) +{ + *memcg = NULL; + return NULL; +} + +static inline void bpf_map_memcg_exit(struct mem_cgroup *old_memcg, + struct mem_cgroup *memcg) +{ +} #endif static inline int diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a4d38272d8bc..e7c0c469c60e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -505,17 +505,29 @@ static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map) return root_mem_cgroup; } +struct mem_cgroup *bpf_map_memcg_enter(const struct bpf_map *map, + struct mem_cgroup **memcg) +{ + *memcg = bpf_map_get_memcg(map); + return set_active_memcg(*memcg); +} + +void bpf_map_memcg_exit(struct mem_cgroup *old_memcg, + struct mem_cgroup *memcg) +{ + set_active_memcg(old_memcg); + mem_cgroup_put(memcg); +} + void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node) { struct mem_cgroup *memcg, *old_memcg; void *ptr; - memcg = bpf_map_get_memcg(map); - old_memcg = set_active_memcg(memcg); + old_memcg = bpf_map_memcg_enter(map, &memcg); ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node); - set_active_memcg(old_memcg); - mem_cgroup_put(memcg); + bpf_map_memcg_exit(old_memcg, memcg); return ptr; } @@ -526,11 +538,9 @@ void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags struct mem_cgroup *memcg, *old_memcg; void *ptr; - memcg = bpf_map_get_memcg(map); - old_memcg = set_active_memcg(memcg); + old_memcg = bpf_map_memcg_enter(map, &memcg); ptr = kmalloc_nolock(size, flags | __GFP_ACCOUNT, node); - set_active_memcg(old_memcg); - mem_cgroup_put(memcg); + bpf_map_memcg_exit(old_memcg, memcg); return ptr; } @@ -540,11 +550,9 @@ void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) struct mem_cgroup *memcg, *old_memcg; void *ptr; - memcg = bpf_map_get_memcg(map); - old_memcg = set_active_memcg(memcg); + old_memcg = bpf_map_memcg_enter(map, &memcg); ptr = kzalloc(size, flags | __GFP_ACCOUNT); - set_active_memcg(old_memcg); - mem_cgroup_put(memcg); + bpf_map_memcg_exit(old_memcg, memcg); return ptr; } @@ -555,11 +563,9 @@ void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, struct mem_cgroup *memcg, *old_memcg; void *ptr; - memcg = bpf_map_get_memcg(map); - old_memcg = set_active_memcg(memcg); + old_memcg = bpf_map_memcg_enter(map, &memcg); ptr = kvcalloc(n, size, flags | __GFP_ACCOUNT); - set_active_memcg(old_memcg); - mem_cgroup_put(memcg); + bpf_map_memcg_exit(old_memcg, memcg); return ptr; } @@ -570,11 +576,9 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, struct mem_cgroup *memcg, *old_memcg; void __percpu *ptr; - memcg = bpf_map_get_memcg(map); - old_memcg = set_active_memcg(memcg); + old_memcg = bpf_map_memcg_enter(map, &memcg); ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT); - set_active_memcg(old_memcg); - mem_cgroup_put(memcg); + bpf_map_memcg_exit(old_memcg, memcg); return ptr; } @@ -615,8 +619,7 @@ int bpf_map_alloc_pages(const struct bpf_map *map, int nid, #ifdef CONFIG_MEMCG struct mem_cgroup *memcg, *old_memcg; - memcg = bpf_map_get_memcg(map); - old_memcg = set_active_memcg(memcg); + old_memcg = bpf_map_memcg_enter(map, &memcg); #endif for (i = 0; i < nr_pages; i++) { pg = __bpf_alloc_page(nid); @@ -632,8 +635,7 @@ int bpf_map_alloc_pages(const struct bpf_map *map, int nid, } #ifdef CONFIG_MEMCG - set_active_memcg(old_memcg); - mem_cgroup_put(memcg); + bpf_map_memcg_exit(old_memcg, memcg); #endif return ret; } -- 2.47.3 When arena allocations were converted from bpf_map_alloc_pages() to kmalloc_nolock() to support non-sleepable contexts, memcg accounting was inadvertently lost. This commit restores proper memory accounting for all arena-related allocations. All arena related allocations are accounted into memcg of the process that created bpf_arena. Signed-off-by: Puranjay Mohan --- kernel/bpf/arena.c | 39 ++++++++++++++++++++++++++++++++++----- kernel/bpf/range_tree.c | 5 +++-- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 456ac989269d..cb9451208b0e 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c @@ -360,6 +360,7 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf) { struct bpf_map *map = vmf->vma->vm_file->private_data; struct bpf_arena *arena = container_of(map, struct bpf_arena, map); + struct mem_cgroup *memcg, *old_memcg; struct page *page; long kbase, kaddr; unsigned long flags; @@ -377,6 +378,8 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf) /* already have a page vmap-ed */ goto out; + old_memcg = bpf_map_memcg_enter(map, &memcg); + if (arena->map.map_flags & BPF_F_SEGV_ON_FAULT) /* User space requested to segfault when page is not allocated by bpf prog */ goto out_unlock_sigsegv; @@ -400,12 +403,14 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf) goto out_unlock_sigsegv; } flush_vmap_cache(kaddr, PAGE_SIZE); + bpf_map_memcg_exit(old_memcg, memcg); out: page_ref_add(page, 1); raw_res_spin_unlock_irqrestore(&arena->spinlock, flags); vmf->page = page; return 0; out_unlock_sigsegv: + bpf_map_memcg_exit(old_memcg, memcg); raw_res_spin_unlock_irqrestore(&arena->spinlock, flags); return VM_FAULT_SIGSEGV; } @@ -557,7 +562,7 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt /* Cap allocation size to KMALLOC_MAX_CACHE_SIZE so kmalloc_nolock() can succeed. */ alloc_pages = min(page_cnt, KMALLOC_MAX_CACHE_SIZE / sizeof(struct page *)); - pages = kmalloc_nolock(alloc_pages * sizeof(struct page *), 0, NUMA_NO_NODE); + pages = kmalloc_nolock(alloc_pages * sizeof(struct page *), __GFP_ACCOUNT, NUMA_NO_NODE); if (!pages) return 0; data.pages = pages; @@ -713,7 +718,7 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt, return; defer: - s = kmalloc_nolock(sizeof(struct arena_free_span), 0, -1); + s = kmalloc_nolock(sizeof(struct arena_free_span), __GFP_ACCOUNT, -1); if (!s) /* * If allocation fails in non-sleepable context, pages are intentionally left @@ -834,49 +839,69 @@ __bpf_kfunc_start_defs(); __bpf_kfunc void *bpf_arena_alloc_pages(void *p__map, void *addr__ign, u32 page_cnt, int node_id, u64 flags) { + void *ret; struct bpf_map *map = p__map; + struct mem_cgroup *memcg, *old_memcg; struct bpf_arena *arena = container_of(map, struct bpf_arena, map); if (map->map_type != BPF_MAP_TYPE_ARENA || flags || !page_cnt) return NULL; - return (void *)arena_alloc_pages(arena, (long)addr__ign, page_cnt, node_id, true); + old_memcg = bpf_map_memcg_enter(map, &memcg); + ret = (void *)arena_alloc_pages(arena, (long)addr__ign, page_cnt, node_id, true); + bpf_map_memcg_exit(old_memcg, memcg); + + return ret; } void *bpf_arena_alloc_pages_non_sleepable(void *p__map, void *addr__ign, u32 page_cnt, int node_id, u64 flags) { + void *ret; struct bpf_map *map = p__map; + struct mem_cgroup *memcg, *old_memcg; struct bpf_arena *arena = container_of(map, struct bpf_arena, map); if (map->map_type != BPF_MAP_TYPE_ARENA || flags || !page_cnt) return NULL; - return (void *)arena_alloc_pages(arena, (long)addr__ign, page_cnt, node_id, false); + old_memcg = bpf_map_memcg_enter(map, &memcg); + ret = (void *)arena_alloc_pages(arena, (long)addr__ign, page_cnt, node_id, false); + bpf_map_memcg_exit(old_memcg, memcg); + + return ret; } __bpf_kfunc void bpf_arena_free_pages(void *p__map, void *ptr__ign, u32 page_cnt) { struct bpf_map *map = p__map; + struct mem_cgroup *memcg, *old_memcg; struct bpf_arena *arena = container_of(map, struct bpf_arena, map); if (map->map_type != BPF_MAP_TYPE_ARENA || !page_cnt || !ptr__ign) return; + old_memcg = bpf_map_memcg_enter(map, &memcg); arena_free_pages(arena, (long)ptr__ign, page_cnt, true); + bpf_map_memcg_exit(old_memcg, memcg); } void bpf_arena_free_pages_non_sleepable(void *p__map, void *ptr__ign, u32 page_cnt) { struct bpf_map *map = p__map; + struct mem_cgroup *memcg, *old_memcg; struct bpf_arena *arena = container_of(map, struct bpf_arena, map); if (map->map_type != BPF_MAP_TYPE_ARENA || !page_cnt || !ptr__ign) return; + old_memcg = bpf_map_memcg_enter(map, &memcg); arena_free_pages(arena, (long)ptr__ign, page_cnt, false); + bpf_map_memcg_exit(old_memcg, memcg); } __bpf_kfunc int bpf_arena_reserve_pages(void *p__map, void *ptr__ign, u32 page_cnt) { + int ret; struct bpf_map *map = p__map; + struct mem_cgroup *memcg, *old_memcg; struct bpf_arena *arena = container_of(map, struct bpf_arena, map); if (map->map_type != BPF_MAP_TYPE_ARENA) @@ -885,7 +910,11 @@ __bpf_kfunc int bpf_arena_reserve_pages(void *p__map, void *ptr__ign, u32 page_c if (!page_cnt) return 0; - return arena_reserve_pages(arena, (long)ptr__ign, page_cnt); + old_memcg = bpf_map_memcg_enter(map, &memcg); + ret = arena_reserve_pages(arena, (long)ptr__ign, page_cnt); + bpf_map_memcg_exit(old_memcg, memcg); + + return ret; } __bpf_kfunc_end_defs(); diff --git a/kernel/bpf/range_tree.c b/kernel/bpf/range_tree.c index 99c63d982c5d..2f28886f3ff7 100644 --- a/kernel/bpf/range_tree.c +++ b/kernel/bpf/range_tree.c @@ -149,7 +149,8 @@ int range_tree_clear(struct range_tree *rt, u32 start, u32 len) range_it_insert(rn, rt); /* Add a range */ - new_rn = kmalloc_nolock(sizeof(struct range_node), 0, NUMA_NO_NODE); + new_rn = kmalloc_nolock(sizeof(struct range_node), __GFP_ACCOUNT, + NUMA_NO_NODE); if (!new_rn) return -ENOMEM; new_rn->rn_start = last + 1; @@ -234,7 +235,7 @@ int range_tree_set(struct range_tree *rt, u32 start, u32 len) right->rn_start = start; range_it_insert(right, rt); } else { - left = kmalloc_nolock(sizeof(struct range_node), 0, NUMA_NO_NODE); + left = kmalloc_nolock(sizeof(struct range_node), __GFP_ACCOUNT, NUMA_NO_NODE); if (!left) return -ENOMEM; left->rn_start = start; -- 2.47.3