This is a preparatory patch for upcoming per-memcg-per-node kmem accounting. pcpu allocations are always fully charged at once using pcpu_obj_full_size(), which returns the size of the pcpu "metadata" + pcpu "payload". But metadata and payload may not be allocated on the same numa node, so charge the metadata independently from the payload. Do this by explicitly passing __GFP_ACCOUNT to the obj_exts allocation and remove its accounting in pcpu_memcg_pre_alloc_hook(). Signed-off-by: Alexandre Ghiti --- mm/percpu-internal.h | 16 +++------------- mm/percpu.c | 15 ++++++++------- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h index 4b3d6ec43703..f01db026d213 100644 --- a/mm/percpu-internal.h +++ b/mm/percpu-internal.h @@ -144,22 +144,12 @@ static inline int pcpu_chunk_map_bits(struct pcpu_chunk *chunk) } /** - * pcpu_obj_full_size - helper to calculate size of each accounted object + * pcpu_obj_total_size - helper to calculate size of each accounted object * @size: size of area to allocate in bytes - * - * For each accounted object there is an extra space which is used to store - * obj_cgroup membership if kmemcg is not disabled. Charge it too. */ -static inline size_t pcpu_obj_full_size(size_t size) +static inline size_t pcpu_obj_total_size(size_t size) { - size_t extra_size = 0; - -#ifdef CONFIG_MEMCG - if (!mem_cgroup_kmem_disabled()) - extra_size += size / PCPU_MIN_ALLOC_SIZE * sizeof(struct obj_cgroup *); -#endif - - return size * num_possible_cpus() + extra_size; + return size * num_possible_cpus(); } #ifdef CONFIG_PERCPU_STATS diff --git a/mm/percpu.c b/mm/percpu.c index b0676b8054ed..13de6e099d96 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1460,7 +1460,8 @@ static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp) if (need_pcpuobj_ext()) { chunk->obj_exts = pcpu_mem_zalloc(pcpu_chunk_map_bits(chunk) * - sizeof(struct pcpuobj_ext), gfp); + sizeof(struct pcpuobj_ext), + gfp | __GFP_ACCOUNT); if (!chunk->obj_exts) goto objcg_fail; } @@ -1625,7 +1626,7 @@ static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp, if (!objcg || obj_cgroup_is_root(objcg)) return true; - if (obj_cgroup_charge(objcg, gfp, pcpu_obj_full_size(size))) + if (obj_cgroup_charge(objcg, gfp, pcpu_obj_total_size(size))) return false; *objcgp = objcg; @@ -1645,10 +1646,10 @@ static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg, rcu_read_lock(); mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B, - pcpu_obj_full_size(size)); + pcpu_obj_total_size(size)); rcu_read_unlock(); } else { - obj_cgroup_uncharge(objcg, pcpu_obj_full_size(size)); + obj_cgroup_uncharge(objcg, pcpu_obj_total_size(size)); } } @@ -1664,11 +1665,11 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size) return; chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].cgroup = NULL; - obj_cgroup_uncharge(objcg, pcpu_obj_full_size(size)); + obj_cgroup_uncharge(objcg, pcpu_obj_total_size(size)); rcu_read_lock(); mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B, - -pcpu_obj_full_size(size)); + -pcpu_obj_total_size(size)); rcu_read_unlock(); obj_cgroup_put(objcg); @@ -1897,7 +1898,7 @@ void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved, trace_percpu_alloc_percpu(_RET_IP_, reserved, is_atomic, size, align, chunk->base_addr, off, ptr, - pcpu_obj_full_size(size), gfp); + pcpu_obj_total_size(size), gfp); pcpu_memcg_post_alloc_hook(objcg, chunk, off, size); -- 2.54.0