With the removal of the zpool layer the opportunity to select an allocation backend for zswap has been completely lost. While the flexibility zpool used to provide may have seemed unjustified, the option to select an alternative backend should still exist, especially with the recent advancements in the vmap/vmalloc field that allow for faster and simple allocation backends. This patch reinstates zpool API as a thin header-only layer and provides the infrastructure for backend compile time selection. Signed-off-by: Vitaly Wool --- Documentation/admin-guide/mm/zswap.rst | 14 +++--- MAINTAINERS | 1 + include/linux/zpool.h | 62 ++++++++++++++++++++++++++ mm/Kconfig | 22 ++++++++- mm/zsmalloc.c | 3 ++ mm/zswap.c | 30 ++++++------- 6 files changed, 110 insertions(+), 22 deletions(-) create mode 100644 include/linux/zpool.h diff --git a/Documentation/admin-guide/mm/zswap.rst b/Documentation/admin-guide/mm/zswap.rst index 283d77217c6f..44075d35e512 100644 --- a/Documentation/admin-guide/mm/zswap.rst +++ b/Documentation/admin-guide/mm/zswap.rst @@ -53,16 +53,20 @@ Zswap receives pages for compression from the swap subsystem and is able to evict pages from its own compressed pool on an LRU basis and write them back to the backing swap device in the case that the compressed pool is full. -Zswap makes use of zsmalloc for the managing the compressed memory pool. Each -allocation in zsmalloc is not directly accessible by address. Rather, a handle is +Zswap makes use of zpool API for the managing the compressed memory pool. Each +allocation in zpool is not directly accessible by address. Rather, a handle is returned by the allocation routine and that handle must be mapped before being accessed. The compressed memory pool grows on demand and shrinks as compressed -pages are freed. The pool is not preallocated. +pages are freed. The pool is not preallocated. + +An allocator backend implementing zpool API can be selected during the build +time as a kernel configuration option. Currently only one backend (zsmalloc) is +supported and it is selected automatically. When a swap page is passed from swapout to zswap, zswap maintains a mapping of the swap entry, a combination of the swap type and swap offset, to the -zsmalloc handle that references that compressed swap page. This mapping is -achieved with a red-black tree per swap type. The swap offset is the search +zpool handle that references that compressed swap page. This mapping is +achieved with a red-black tree per swap type. The swap offset is the search key for the tree nodes. During a page fault on a PTE that is a swap entry, the swapin code calls the diff --git a/MAINTAINERS b/MAINTAINERS index ca8e3d18eedd..149a06be4000 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -27885,6 +27885,7 @@ R: Chengming Zhou L: linux-mm@kvack.org S: Maintained F: Documentation/admin-guide/mm/zswap.rst +F: include/linux/zpool.h F: include/linux/zswap.h F: mm/zswap.c F: tools/testing/selftests/cgroup/test_zswap.c diff --git a/include/linux/zpool.h b/include/linux/zpool.h new file mode 100644 index 000000000000..05406b00d734 --- /dev/null +++ b/include/linux/zpool.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * zpool memory storage api + * + * Copyright (C) 2014 Dan Streetman + * + * This is a common frontend for the zswap compressed memory storage + * implementations. + */ + +#ifndef _ZPOOL_H_ +#define _ZPOOL_H_ + +struct zpool { + void *pool; +}; + +struct zpool *zpool_create_pool(const char *name); +void zpool_destroy_pool(struct zpool *pool); +unsigned long zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp, const int nid); +void zpool_free(struct zpool *pool, unsigned long handle); +void *zpool_obj_read_begin(struct zpool *zpool, unsigned long handle, void *local_copy); +void zpool_obj_read_end(struct zpool *zpool, unsigned long handle, void *handle_mem); +void zpool_obj_write(struct zpool *zpool, unsigned long handle, void *handle_mem, size_t mem_len); +u64 zpool_get_total_pages(struct zpool *zpool); + +#define DECLARE_ZPOOL(prefix) \ + struct zpool *zpool_create_pool(const char *name) \ + { \ + return (struct zpool *) prefix ## _create_pool(name); \ + } \ + void zpool_destroy_pool(struct zpool *pool) \ + { \ + return prefix ## _destroy_pool(pool->pool); \ + } \ + unsigned long zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp, const int nid) \ + { \ + return prefix ## _malloc(pool->pool, size, gfp, nid); \ + } \ + void zpool_free(struct zpool *pool, unsigned long handle) \ + { \ + return prefix ## _free(pool->pool, handle); \ + } \ + void *zpool_obj_read_begin(struct zpool *zpool, unsigned long handle, void *local_copy) \ + { \ + return prefix ## _obj_read_begin(zpool->pool, handle, local_copy); \ + } \ + void zpool_obj_read_end(struct zpool *zpool, unsigned long handle, void *handle_mem) \ + { \ + return prefix ## _obj_read_end(zpool->pool, handle, handle_mem); \ + } \ + void zpool_obj_write(struct zpool *zpool, unsigned long handle, \ + void *handle_mem, size_t mem_len) \ + { \ + prefix ## _obj_write(zpool->pool, handle, handle_mem, mem_len); \ + } \ + u64 zpool_get_total_pages(struct zpool *zpool) \ + { \ + return prefix ## _get_total_pages(zpool->pool); \ + } + +#endif diff --git a/mm/Kconfig b/mm/Kconfig index d1ed839ca710..f72483b91098 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -23,7 +23,7 @@ config ZSWAP bool "Compressed cache for swap pages" depends on SWAP select CRYPTO - select ZSMALLOC + select ZPOOL_ALLOCATOR help A lightweight compressed cache for swap pages. It takes pages that are in the process of being swapped out and attempts to @@ -122,8 +122,26 @@ config ZSWAP_COMPRESSOR_DEFAULT default "zstd" if ZSWAP_COMPRESSOR_DEFAULT_ZSTD default "" +config ZPOOL_ALLOCATOR + bool + +choice + prompt "Allocator for zswap" + default ZSMALLOC + help + Allocator to be used for compressed object storage in zswap. + Currently only zsmalloc is supported. + config ZSMALLOC - tristate + bool "N:1 compression allocator (zsmalloc)" + select ZPOOL_ALLOCATOR + depends on MMU + help + zsmalloc is a slab-based memory allocator designed to store + pages of various compression levels efficiently. It achieves + the highest storage density with the least amount of fragmentation. + +endchoice if ZSMALLOC diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 5bf832f9c05c..c84e96fd30f9 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include "zpdesc.h" @@ -432,6 +433,8 @@ static void record_obj(unsigned long handle, unsigned long obj) *(unsigned long *)handle = obj; } +DECLARE_ZPOOL(zs); + static inline bool __maybe_unused is_first_zpdesc(struct zpdesc *zpdesc) { return PagePrivate(zpdesc_page(zpdesc)); diff --git a/mm/zswap.c b/mm/zswap.c index c1af782e54ec..a567cc16b9a3 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include "swap.h" #include "internal.h" @@ -151,7 +151,7 @@ struct crypto_acomp_ctx { * needs to be verified that it's still valid in the tree. */ struct zswap_pool { - struct zs_pool *zs_pool; + struct zpool *pool; struct crypto_acomp_ctx __percpu *acomp_ctx; struct percpu_ref ref; struct list_head list; @@ -257,8 +257,8 @@ static struct zswap_pool *zswap_pool_create(char *compressor) /* unique name for each pool specifically required by zsmalloc */ snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count)); - pool->zs_pool = zs_create_pool(name); - if (!pool->zs_pool) + pool->pool = zpool_create_pool(name); + if (!pool->pool) goto error; strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); @@ -295,8 +295,8 @@ static struct zswap_pool *zswap_pool_create(char *compressor) error: if (pool->acomp_ctx) free_percpu(pool->acomp_ctx); - if (pool->zs_pool) - zs_destroy_pool(pool->zs_pool); + if (pool->pool) + zpool_destroy_pool(pool->pool); kfree(pool); return NULL; } @@ -327,7 +327,7 @@ static void zswap_pool_destroy(struct zswap_pool *pool) cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); free_percpu(pool->acomp_ctx); - zs_destroy_pool(pool->zs_pool); + zpool_destroy_pool(pool->pool); kfree(pool); } @@ -454,7 +454,7 @@ unsigned long zswap_total_pages(void) rcu_read_lock(); list_for_each_entry_rcu(pool, &zswap_pools, list) - total += zs_get_total_pages(pool->zs_pool); + total += zpool_get_total_pages(pool->pool); rcu_read_unlock(); return total; @@ -717,7 +717,7 @@ static void zswap_entry_cache_free(struct zswap_entry *entry) static void zswap_entry_free(struct zswap_entry *entry) { zswap_lru_del(&zswap_list_lru, entry); - zs_free(entry->pool->zs_pool, entry->handle); + zpool_free(entry->pool->pool, entry->handle); zswap_pool_put(entry->pool); if (entry->objcg) { obj_cgroup_uncharge_zswap(entry->objcg, entry->length); @@ -907,13 +907,13 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry, } gfp = GFP_NOWAIT | __GFP_NORETRY | __GFP_HIGHMEM | __GFP_MOVABLE; - handle = zs_malloc(pool->zs_pool, dlen, gfp, page_to_nid(page)); + handle = zpool_malloc(pool->pool, dlen, gfp, page_to_nid(page)); if (IS_ERR_VALUE(handle)) { alloc_ret = PTR_ERR((void *)handle); goto unlock; } - zs_obj_write(pool->zs_pool, handle, dst, dlen); + zpool_obj_write(pool->pool, handle, dst, dlen); entry->handle = handle; entry->length = dlen; @@ -940,7 +940,7 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio) u8 *src, *obj; acomp_ctx = acomp_ctx_get_cpu_lock(pool); - obj = zs_obj_read_begin(pool->zs_pool, entry->handle, acomp_ctx->buffer); + obj = zpool_obj_read_begin(pool->pool, entry->handle, acomp_ctx->buffer); /* zswap entries of length PAGE_SIZE are not compressed. */ if (entry->length == PAGE_SIZE) { @@ -949,7 +949,7 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio) } /* - * zs_obj_read_begin() might return a kmap address of highmem when + * zpool_obj_read_begin() might return a kmap address of highmem when * acomp_ctx->buffer is not used. However, sg_init_one() does not * handle highmem addresses, so copy the object to acomp_ctx->buffer. */ @@ -969,7 +969,7 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio) dlen = acomp_ctx->req->dlen; read_done: - zs_obj_read_end(pool->zs_pool, entry->handle, obj); + zpool_obj_read_end(pool->pool, entry->handle, obj); acomp_ctx_put_unlock(acomp_ctx); if (!decomp_ret && dlen == PAGE_SIZE) @@ -1486,7 +1486,7 @@ static bool zswap_store_page(struct page *page, return true; store_failed: - zs_free(pool->zs_pool, entry->handle); + zpool_free(pool->pool, entry->handle); compress_failed: zswap_entry_cache_free(entry); return false; -- 2.39.2