This is a preparatory patch for upcoming per-memcg-per-node kmem
accounting.

pcpu allocations are always fully charged at once using
pcpu_obj_full_size(), which returns the size of the pcpu "metadata" +
pcpu "payload". But metadata and payload may not be allocated on the
same numa node, so charge the metadata independently from the payload.

Do this by explicitly passing __GFP_ACCOUNT to the obj_exts allocation
and remove its accounting in pcpu_memcg_pre_alloc_hook().

Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
---
 mm/percpu-internal.h | 16 +++-------------
 mm/percpu.c          | 15 ++++++++-------
 2 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h
index 4b3d6ec43703..f01db026d213 100644
--- a/mm/percpu-internal.h
+++ b/mm/percpu-internal.h
@@ -144,22 +144,12 @@ static inline int pcpu_chunk_map_bits(struct pcpu_chunk *chunk)
 }
 
 /**
- * pcpu_obj_full_size - helper to calculate size of each accounted object
+ * pcpu_obj_total_size - helper to calculate size of each accounted object
  * @size: size of area to allocate in bytes
- *
- * For each accounted object there is an extra space which is used to store
- * obj_cgroup membership if kmemcg is not disabled. Charge it too.
  */
-static inline size_t pcpu_obj_full_size(size_t size)
+static inline size_t pcpu_obj_total_size(size_t size)
 {
-	size_t extra_size = 0;
-
-#ifdef CONFIG_MEMCG
-	if (!mem_cgroup_kmem_disabled())
-		extra_size += size / PCPU_MIN_ALLOC_SIZE * sizeof(struct obj_cgroup *);
-#endif
-
-	return size * num_possible_cpus() + extra_size;
+	return size * num_possible_cpus();
 }
 
 #ifdef CONFIG_PERCPU_STATS
diff --git a/mm/percpu.c b/mm/percpu.c
index b0676b8054ed..13de6e099d96 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1460,7 +1460,8 @@ static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
 	if (need_pcpuobj_ext()) {
 		chunk->obj_exts =
 			pcpu_mem_zalloc(pcpu_chunk_map_bits(chunk) *
-					sizeof(struct pcpuobj_ext), gfp);
+					sizeof(struct pcpuobj_ext),
+					gfp | __GFP_ACCOUNT);
 		if (!chunk->obj_exts)
 			goto objcg_fail;
 	}
@@ -1625,7 +1626,7 @@ static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
 	if (!objcg || obj_cgroup_is_root(objcg))
 		return true;
 
-	if (obj_cgroup_charge(objcg, gfp, pcpu_obj_full_size(size)))
+	if (obj_cgroup_charge(objcg, gfp, pcpu_obj_total_size(size)))
 		return false;
 
 	*objcgp = objcg;
@@ -1645,10 +1646,10 @@ static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg,
 
 		rcu_read_lock();
 		mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
-				pcpu_obj_full_size(size));
+				pcpu_obj_total_size(size));
 		rcu_read_unlock();
 	} else {
-		obj_cgroup_uncharge(objcg, pcpu_obj_full_size(size));
+		obj_cgroup_uncharge(objcg, pcpu_obj_total_size(size));
 	}
 }
 
@@ -1664,11 +1665,11 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
 		return;
 	chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].cgroup = NULL;
 
-	obj_cgroup_uncharge(objcg, pcpu_obj_full_size(size));
+	obj_cgroup_uncharge(objcg, pcpu_obj_total_size(size));
 
 	rcu_read_lock();
 	mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
-			-pcpu_obj_full_size(size));
+			-pcpu_obj_total_size(size));
 	rcu_read_unlock();
 
 	obj_cgroup_put(objcg);
@@ -1897,7 +1898,7 @@ void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved,
 
 	trace_percpu_alloc_percpu(_RET_IP_, reserved, is_atomic, size, align,
 				  chunk->base_addr, off, ptr,
-				  pcpu_obj_full_size(size), gfp);
+				  pcpu_obj_total_size(size), gfp);
 
 	pcpu_memcg_post_alloc_hook(objcg, chunk, off, size);
 
-- 
2.54.0