Convert MEMCG_PERCPU_B from a memcg_stat_item to a memcg_node_stat_item to give visibility into per-node breakdowns for percpu allocations and turn it into NR_PERCPU_B. Because percpu memory is accounted at a sub-PAGE_SIZE level, we must account node level statistics (accounted in PAGE_SIZE units) and memcg-lruvec statistics separately. Account node statistics when the pcpu pages are allocated, and account memcg-lruvec statistics when pcpu objects are handed out. To do account these separately, expose mod_memcg_lruvec_state to be used outside of memcontrol. One functional change is that we do not account the 8 byte objcg pointer per-memcg-lruvec. Since the objcg membership is tracked per-memcg and not percpu, there is no appropriate lruvec to charge this memory to (see pcpu_obj_full_size). Instead of adding additional mechanisms to detect which lruvec the 8 byte pointer belongs to, let's just simplify and account the pcpu objects' size. Limit-checking is still done with the additional 8 bytes. Signed-off-by: Joshua Hahn --- include/linux/memcontrol.h | 4 +++- include/linux/mmzone.h | 4 +++- mm/memcontrol.c | 12 ++++++------ mm/percpu-vm.c | 14 ++++++++++++-- mm/percpu.c | 24 ++++++++++++++++++++---- mm/vmstat.c | 1 + 6 files changed, 45 insertions(+), 14 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 086158969529..96dae769c60d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -34,7 +34,6 @@ struct kmem_cache; enum memcg_stat_item { MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS, MEMCG_SOCK, - MEMCG_PERCPU_B, MEMCG_KMEM, MEMCG_ZSWAP_B, MEMCG_ZSWAPPED, @@ -909,6 +908,9 @@ struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim, struct mem_cgroup *oom_domain); void mem_cgroup_print_oom_group(struct mem_cgroup *memcg); +void mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, + int val); + /* idx can be of type enum memcg_stat_item or node_stat_item */ void mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx, int val); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7bd0134c241c..e38d8fe8552b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -328,6 +328,7 @@ enum node_stat_item { #endif NR_BALLOON_PAGES, NR_KERNEL_FILE_PAGES, + NR_PERCPU_B, NR_VM_NODE_STAT_ITEMS }; @@ -365,7 +366,8 @@ static __always_inline bool vmstat_item_in_bytes(int idx) * byte-precise. */ return (idx == NR_SLAB_RECLAIMABLE_B || - idx == NR_SLAB_UNRECLAIMABLE_B); + idx == NR_SLAB_UNRECLAIMABLE_B || + idx == NR_PERCPU_B); } /* diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a47fb68dd65f..b320b6a42696 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -377,6 +377,7 @@ static const unsigned int memcg_node_stat_items[] = { NR_UNEVICTABLE, NR_SLAB_RECLAIMABLE_B, NR_SLAB_UNRECLAIMABLE_B, + NR_PERCPU_B, WORKINGSET_REFAULT_ANON, WORKINGSET_REFAULT_FILE, WORKINGSET_ACTIVATE_ANON, @@ -428,7 +429,6 @@ static const unsigned int memcg_node_stat_items[] = { static const unsigned int memcg_stat_items[] = { MEMCG_SWAP, MEMCG_SOCK, - MEMCG_PERCPU_B, MEMCG_KMEM, MEMCG_ZSWAP_B, MEMCG_ZSWAPPED, @@ -920,9 +920,8 @@ static void __mod_memcg_lruvec_state(struct mem_cgroup_per_node *pn, put_cpu(); } -static void mod_memcg_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx, - int val) +void mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, + int val) { struct pglist_data *pgdat = lruvec_pgdat(lruvec); struct mem_cgroup_per_node *pn; @@ -936,6 +935,7 @@ static void mod_memcg_lruvec_state(struct lruvec *lruvec, get_non_dying_memcg_end(); } +EXPORT_SYMBOL(mod_memcg_lruvec_state); /** * mod_lruvec_state - update lruvec memory statistics @@ -1535,7 +1535,7 @@ static const struct memory_stat memory_stats[] = { { "kernel_stack", NR_KERNEL_STACK_KB }, { "pagetables", NR_PAGETABLE }, { "sec_pagetables", NR_SECONDARY_PAGETABLE }, - { "percpu", MEMCG_PERCPU_B }, + { "percpu", NR_PERCPU_B }, { "sock", MEMCG_SOCK }, { "vmalloc", NR_VMALLOC }, { "shmem", NR_SHMEM }, @@ -1597,7 +1597,7 @@ static const struct memory_stat memory_stats[] = { static int memcg_page_state_unit(int item) { switch (item) { - case MEMCG_PERCPU_B: + case NR_PERCPU_B: case MEMCG_ZSWAP_B: case NR_SLAB_RECLAIMABLE_B: case NR_SLAB_UNRECLAIMABLE_B: diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index 4f5937090590..e36b639f521d 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c @@ -55,7 +55,8 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk, struct page **pages, int page_start, int page_end) { unsigned int cpu; - int i; + int nr_pages = page_end - page_start; + int i, nid; for_each_possible_cpu(cpu) { for (i = page_start; i < page_end; i++) { @@ -65,6 +66,10 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk, __free_page(page); } } + + for_each_node(nid) + mod_node_page_state(NODE_DATA(nid), NR_PERCPU_B, + -1L * nr_pages * nr_cpus_node(nid) * PAGE_SIZE); } /** @@ -84,7 +89,8 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk, gfp_t gfp) { unsigned int cpu, tcpu; - int i; + int nr_pages = page_end - page_start; + int i, nid; gfp |= __GFP_HIGHMEM; @@ -97,6 +103,10 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk, goto err; } } + + for_each_node(nid) + mod_node_page_state(NODE_DATA(nid), NR_PERCPU_B, + nr_pages * nr_cpus_node(nid) * PAGE_SIZE); return 0; err: diff --git a/mm/percpu.c b/mm/percpu.c index b0676b8054ed..4ad3b9739eb9 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1632,6 +1632,24 @@ static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp, return true; } +static void pcpu_mod_memcg_lruvec(struct obj_cgroup *objcg, int charge) +{ + struct mem_cgroup *memcg; + int nid; + + memcg = obj_cgroup_memcg(objcg); + for_each_node(nid) { + struct lruvec *lruvec; + unsigned int nr_cpus = nr_cpus_node(nid); + + if (!nr_cpus) + continue; + + lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); + mod_memcg_lruvec_state(lruvec, NR_PERCPU_B, nr_cpus * charge); + } +} + static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg, struct pcpu_chunk *chunk, int off, size_t size) @@ -1644,8 +1662,7 @@ static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg, chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].cgroup = objcg; rcu_read_lock(); - mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B, - pcpu_obj_full_size(size)); + pcpu_mod_memcg_lruvec(objcg, size); rcu_read_unlock(); } else { obj_cgroup_uncharge(objcg, pcpu_obj_full_size(size)); @@ -1667,8 +1684,7 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size) obj_cgroup_uncharge(objcg, pcpu_obj_full_size(size)); rcu_read_lock(); - mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B, - -pcpu_obj_full_size(size)); + pcpu_mod_memcg_lruvec(objcg, -size); rcu_read_unlock(); obj_cgroup_put(objcg); diff --git a/mm/vmstat.c b/mm/vmstat.c index b33097ab9bc8..d73c3355be71 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1296,6 +1296,7 @@ const char * const vmstat_text[] = { #endif [I(NR_BALLOON_PAGES)] = "nr_balloon_pages", [I(NR_KERNEL_FILE_PAGES)] = "nr_kernel_file_pages", + [I(NR_PERCPU_B)] = "nr_percpu", #undef I /* system-wide enum vm_stat_item counters */ -- 2.52.0