From: Minchan Kim This patch introduces GCMA (Guaranteed Contiguous Memory Allocator) cleacache backend which reserves some amount of memory at the boot and then donates it to store clean file-backed pages in the cleancache. GCMA aims to guarantee contiguous memory allocation success as well as low and deterministic allocation latency. Notes: Originally, the idea was posted by SeongJae Park and Minchan Kim [1]. Later Minchan reworked it to be used in Android as a reference for Android vendors to use [2]. [1] https://lwn.net/Articles/619865/ [2] https://android-review.googlesource.com/q/topic:%22gcma_6.12%22 Signed-off-by: Minchan Kim Signed-off-by: Suren Baghdasaryan --- MAINTAINERS | 2 + include/linux/gcma.h | 36 +++++++ mm/Kconfig | 15 +++ mm/Makefile | 1 + mm/gcma.c | 231 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 285 insertions(+) create mode 100644 include/linux/gcma.h create mode 100644 mm/gcma.c diff --git a/MAINTAINERS b/MAINTAINERS index 441e68c94177..95b5ad26ec11 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16361,6 +16361,7 @@ F: Documentation/admin-guide/mm/ F: Documentation/mm/ F: include/linux/cma.h F: include/linux/dmapool.h +F: include/linux/gcma.h F: include/linux/ioremap.h F: include/linux/memory-tiers.h F: include/linux/page_idle.h @@ -16372,6 +16373,7 @@ F: mm/dmapool.c F: mm/dmapool_test.c F: mm/early_ioremap.c F: mm/fadvise.c +F: mm/gcma.c F: mm/ioremap.c F: mm/mapping_dirty_helpers.c F: mm/memory-tiers.c diff --git a/include/linux/gcma.h b/include/linux/gcma.h new file mode 100644 index 000000000000..20b2c85de87b --- /dev/null +++ b/include/linux/gcma.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __GCMA_H__ +#define __GCMA_H__ + +#include + +#ifdef CONFIG_GCMA + +int gcma_register_area(const char *name, + unsigned long start_pfn, unsigned long count); + +/* + * NOTE: allocated pages are still marked reserved and when freeing them + * the caller should ensure they are isolated and not referenced by anyone + * other than the caller. + */ +int gcma_alloc_range(unsigned long start_pfn, unsigned long count, gfp_t gfp); +int gcma_free_range(unsigned long start_pfn, unsigned long count); + +#else /* CONFIG_GCMA */ + +static inline int gcma_register_area(const char *name, + unsigned long start_pfn, + unsigned long count) + { return -EOPNOTSUPP; } +static inline int gcma_alloc_range(unsigned long start_pfn, + unsigned long count, gfp_t gfp) + { return -EOPNOTSUPP; } + +static inline int gcma_free_range(unsigned long start_pfn, + unsigned long count) + { return -EOPNOTSUPP; } + +#endif /* CONFIG_GCMA */ + +#endif /* __GCMA_H__ */ diff --git a/mm/Kconfig b/mm/Kconfig index 9f4da8a848f4..41ce5ef8db55 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1013,6 +1013,21 @@ config CMA_AREAS If unsure, leave the default value "8" in UMA and "20" in NUMA. +config GCMA + bool "GCMA (Guaranteed Contiguous Memory Allocator)" + depends on CLEANCACHE + help + This enables the Guaranteed Contiguous Memory Allocator to allow + low latency guaranteed contiguous memory allocations. Memory + reserved by GCMA is donated to cleancache to be used as pagecache + extension. Once GCMA allocation is requested, necessary pages are + taken back from the cleancache and used to satisfy the request. + Cleancache guarantees low latency successful allocation as long + as the total size of GCMA allocations does not exceed the size of + the memory donated to the cleancache. + + If unsure, say "N". + # # Select this config option from the architecture Kconfig, if available, to set # the max page order for physically contiguous allocations. diff --git a/mm/Makefile b/mm/Makefile index 845841a140e3..05aee66a8b07 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -149,3 +149,4 @@ obj-$(CONFIG_TMPFS_QUOTA) += shmem_quota.o obj-$(CONFIG_PT_RECLAIM) += pt_reclaim.o obj-$(CONFIG_CLEANCACHE) += cleancache.o obj-$(CONFIG_CLEANCACHE_SYSFS) += cleancache_sysfs.o +obj-$(CONFIG_GCMA) += gcma.o diff --git a/mm/gcma.c b/mm/gcma.c new file mode 100644 index 000000000000..3ee0e1340db3 --- /dev/null +++ b/mm/gcma.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * GCMA (Guaranteed Contiguous Memory Allocator) + * + */ + +#define pr_fmt(fmt) "gcma: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +#define MAX_GCMA_AREAS 64 +#define GCMA_AREA_NAME_MAX_LEN 32 + +struct gcma_area { + int pool_id; + unsigned long start_pfn; + unsigned long end_pfn; + char name[GCMA_AREA_NAME_MAX_LEN]; +}; + +static struct gcma_area areas[MAX_GCMA_AREAS]; +static atomic_t nr_gcma_area = ATOMIC_INIT(0); +static DEFINE_SPINLOCK(gcma_area_lock); + +static int free_folio_range(struct gcma_area *area, + unsigned long start_pfn, unsigned long end_pfn) +{ + unsigned long scanned = 0; + struct folio *folio; + unsigned long pfn; + + for (pfn = start_pfn; pfn < end_pfn; pfn++) { + int err; + + if (!(++scanned % XA_CHECK_SCHED)) + cond_resched(); + + folio = pfn_folio(pfn); + err = cleancache_backend_put_folio(area->pool_id, folio); + if (WARN(err, "PFN %lu: folio is still in use\n", pfn)) + return -EINVAL; + } + + return 0; +} + +static int alloc_folio_range(struct gcma_area *area, + unsigned long start_pfn, unsigned long end_pfn, + gfp_t gfp) +{ + unsigned long scanned = 0; + unsigned long pfn; + + for (pfn = start_pfn; pfn < end_pfn; pfn++) { + int err; + + if (!(++scanned % XA_CHECK_SCHED)) + cond_resched(); + + err = cleancache_backend_get_folio(area->pool_id, pfn_folio(pfn)); + if (err) { + free_folio_range(area, start_pfn, pfn); + return err; + } + } + + return 0; +} + +static struct gcma_area *find_area(unsigned long start_pfn, unsigned long end_pfn) +{ + int nr_area = atomic_read_acquire(&nr_gcma_area); + int i; + + for (i = 0; i < nr_area; i++) { + struct gcma_area *area = &areas[i]; + + if (area->end_pfn <= start_pfn) + continue; + + if (area->start_pfn > end_pfn) + continue; + + /* The entire range should belong to a single area */ + if (start_pfn < area->start_pfn || end_pfn > area->end_pfn) + break; + + /* Found the area containing the entire range */ + return area; + } + + return NULL; +} + +int gcma_register_area(const char *name, + unsigned long start_pfn, unsigned long count) +{ + LIST_HEAD(folios); + int i, pool_id; + int nr_area; + int ret = 0; + + pool_id = cleancache_backend_register_pool(name); + if (pool_id < 0) + return pool_id; + + for (i = 0; i < count; i++) { + struct folio *folio; + + folio = pfn_folio(start_pfn + i); + folio_clear_reserved(folio); + folio_set_count(folio, 0); + list_add(&folio->lru, &folios); + } + + cleancache_backend_put_folios(pool_id, &folios); + + spin_lock(&gcma_area_lock); + + nr_area = atomic_read(&nr_gcma_area); + if (nr_area < MAX_GCMA_AREAS) { + struct gcma_area *area = &areas[nr_area]; + + area->pool_id = pool_id; + area->start_pfn = start_pfn; + area->end_pfn = start_pfn + count; + strscpy(area->name, name); + /* Ensure above stores complete before we increase the count */ + atomic_set_release(&nr_gcma_area, nr_area + 1); + } else { + ret = -ENOMEM; + } + + spin_unlock(&gcma_area_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(gcma_register_area); + +int gcma_alloc_range(unsigned long start_pfn, unsigned long count, gfp_t gfp) +{ + unsigned long end_pfn = start_pfn + count; + struct gcma_area *area; + struct folio *folio; + int err, order = 0; + + gfp = current_gfp_context(gfp); + if (gfp & __GFP_COMP) { + if (!is_power_of_2(count)) + return -EINVAL; + + order = ilog2(count); + if (order >= MAX_PAGE_ORDER) + return -EINVAL; + } + + area = find_area(start_pfn, end_pfn); + if (!area) + return -EINVAL; + + err = alloc_folio_range(area, start_pfn, end_pfn, gfp); + if (err) + return err; + + /* + * GCMA returns pages with refcount 1 and expects them to have + * the same refcount 1 when they are freed. + */ + if (order) { + folio = pfn_folio(start_pfn); + set_page_count(&folio->page, 1); + prep_compound_page(&folio->page, order); + } else { + for (unsigned long pfn = start_pfn; pfn < end_pfn; pfn++) { + folio = pfn_folio(pfn); + set_page_count(&folio->page, 1); + } + } + + return 0; +} +EXPORT_SYMBOL_GPL(gcma_alloc_range); + +int gcma_free_range(unsigned long start_pfn, unsigned long count) +{ + unsigned long end_pfn = start_pfn + count; + struct gcma_area *area; + struct folio *folio; + + area = find_area(start_pfn, end_pfn); + if (!area) + return -EINVAL; + + folio = pfn_folio(start_pfn); + if (folio_test_large(folio)) { + int expected = folio_nr_pages(folio); + + if (WARN(count != expected, "PFN %lu: count %lu != expected %d\n", + start_pfn, count, expected)) + return -EINVAL; + + if (WARN(!folio_ref_dec_and_test(folio), + "PFN %lu: invalid folio refcount when freeing\n", start_pfn)) + return -EINVAL; + + free_pages_prepare(&folio->page, folio_order(folio)); + } else { + for (unsigned long pfn = start_pfn; pfn < end_pfn; pfn++) { + folio = pfn_folio(pfn); + if (folio_nr_pages(folio) == 1) + count--; + + if (WARN(!folio_ref_dec_and_test(folio), + "PFN %lu: invalid folio refcount when freeing\n", pfn)) + return -EINVAL; + + free_pages_prepare(&folio->page, 0); + } + WARN(count != 0, "%lu pages are still in use!\n", count); + } + + return free_folio_range(area, start_pfn, end_pfn); +} +EXPORT_SYMBOL_GPL(gcma_free_range); -- 2.51.0.740.g6adb054d12-goog