Do not silently autocorrect bad recompression priority parameter value and just error out. Suggested-by: Minchan Kim Signed-off-by: Sergey Senozhatsky --- drivers/block/zram/zram_drv.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 4eaec24a23fe..53e0bbf9d6ec 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2524,19 +2524,16 @@ static ssize_t recompress_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { + u32 prio = ZRAM_SECONDARY_COMP, prio_max = ZRAM_MAX_COMPS; struct zram *zram = dev_to_zram(dev); char *args, *param, *val, *algo = NULL; u64 num_recomp_pages = ULLONG_MAX; struct zram_pp_ctl *ctl = NULL; struct zram_pp_slot *pps; u32 mode = 0, threshold = 0; - u32 prio, prio_max; struct page *page = NULL; ssize_t ret; - prio = ZRAM_SECONDARY_COMP; - prio_max = zram->num_active_comps; - args = skip_spaces(buf); while (*args) { args = next_arg(args, ¶m, &val); @@ -2586,10 +2583,7 @@ static ssize_t recompress_store(struct device *dev, if (ret) return ret; - if (prio == ZRAM_PRIMARY_COMP) - prio = ZRAM_SECONDARY_COMP; - - prio_max = prio + 1; + prio_max = min(prio + 1, ZRAM_MAX_COMPS); continue; } } @@ -2609,7 +2603,7 @@ static ssize_t recompress_store(struct device *dev, continue; if (!strcmp(zram->comp_algs[prio], algo)) { - prio_max = prio + 1; + prio_max = min(prio + 1, ZRAM_MAX_COMPS); found = true; break; } @@ -2627,6 +2621,11 @@ static ssize_t recompress_store(struct device *dev, goto out; } + if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS) { + ret = -EINVAL; + goto out; + } + page = alloc_page(GFP_KERNEL); if (!page) { ret = -ENOMEM; -- 2.53.0.473.g4a7958ca14-goog It's not entirely correct to use ->num_active_comps for max-prio limit, as ->num_active_comps just tells the number of configured algorithms, not the max configured priority. For instance, in the following theoretical example: [lz4] [nil] [nil] [deflate] ->num_active_comps is 2, while the actual max-prio is 3. Drop ->num_active_comps and use ZRAM_MAX_COMPS instead. Suggested-by: Minchan Kim Signed-off-by: Sergey Senozhatsky --- drivers/block/zram/zram_drv.c | 29 ++++++++++++++++------------- drivers/block/zram/zram_drv.h | 1 - 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 53e0bbf9d6ec..6ca5a76c3865 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2371,6 +2371,18 @@ static int scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio_max, return 0; } +static bool highest_priority_algorithm(struct zram *zram, u32 prio) +{ + u32 p; + + for (p = prio + 1; p < ZRAM_MAX_COMPS; p++) { + if (zram->comp_algs[p]) + return false; + } + + return true; +} + /* * This function will decompress (unless it's ZRAM_HUGE) the page and then * attempt to compress it using provided compression algorithm priority @@ -2478,12 +2490,11 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, * Secondary algorithms failed to re-compress the page * in a way that would save memory. * - * Mark the object incompressible if the max-priority - * algorithm couldn't re-compress it. + * Mark the object incompressible if the max-priority (the + * last configured one) algorithm couldn't re-compress it. */ - if (prio < zram->num_active_comps) - return 0; - set_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE); + if (highest_priority_algorithm(zram, prio)) + set_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE); return 0; } @@ -2615,12 +2626,6 @@ static ssize_t recompress_store(struct device *dev, } } - prio_max = min(prio_max, (u32)zram->num_active_comps); - if (prio >= prio_max) { - ret = -EINVAL; - goto out; - } - if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS) { ret = -EINVAL; goto out; @@ -2833,7 +2838,6 @@ static void zram_destroy_comps(struct zram *zram) if (!comp) continue; zcomp_destroy(comp); - zram->num_active_comps--; } for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) @@ -2898,7 +2902,6 @@ static ssize_t disksize_store(struct device *dev, struct device_attribute *attr, } zram->comps[prio] = comp; - zram->num_active_comps++; } zram->disksize = disksize; set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index f0de8f8218f5..08d1774c15db 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -125,7 +125,6 @@ struct zram { */ u64 disksize; /* bytes */ const char *comp_algs[ZRAM_MAX_COMPS]; - s8 num_active_comps; /* * zram is claimed so open request will be failed */ -- 2.53.0.473.g4a7958ca14-goog Recompression algorithm lookup by name is ambiguous and can lead to unexpected results. The problem is that the system can configure the same algorithm but with different parameters (compression level, C/D-dicts, etc.) multiple times: [zstd clevel=3] [zstd clevel=8 dict=/etc/dict] making it impossible to distinguish compressors by name. It is advised to always use "priority". Additionally, override "algo" with "priority", when both params are provided. Suggested-by: Minchan Kim Signed-off-by: Sergey Senozhatsky --- drivers/block/zram/zram_drv.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 6ca5a76c3865..118b0b277e37 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2594,6 +2594,16 @@ static ssize_t recompress_store(struct device *dev, if (ret) return ret; + /* + * "priority" overrides "algo". + * + * We can have several algorithms configured with + * different params (compression/acceleration level, + * C/D-dict, etc.) but under the same name. + * + * "algorithm" name lookup is ambiguous. + */ + algo = NULL; prio_max = min(prio + 1, ZRAM_MAX_COMPS); continue; } -- 2.53.0.473.g4a7958ca14-goog Emphasize usage of the `priority` parameter for recompression and explain why `algo` parameter can lead to unexpected behavior and thus is not recommended. Signed-off-by: Sergey Senozhatsky --- Documentation/admin-guide/blockdev/zram.rst | 40 ++++++++++----------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst index 451fa00d3004..967b58c3aad2 100644 --- a/Documentation/admin-guide/blockdev/zram.rst +++ b/Documentation/admin-guide/blockdev/zram.rst @@ -462,7 +462,7 @@ know it via /sys/block/zram0/bd_stat's 3rd column. recompression ------------- -With CONFIG_ZRAM_MULTI_COMP, zram can recompress pages using alternative +With `CONFIG_ZRAM_MULTI_COMP`, zram can recompress pages using alternative (secondary) compression algorithms. The basic idea is that alternative compression algorithm can provide better compression ratio at a price of (potentially) slower compression/decompression speeds. Alternative compression @@ -471,7 +471,7 @@ that default algorithm failed to compress). Another application is idle pages recompression - pages that are cold and sit in the memory can be recompressed using more effective algorithm and, hence, reduce zsmalloc memory usage. -With CONFIG_ZRAM_MULTI_COMP, zram supports up to 4 compression algorithms: +With `CONFIG_ZRAM_MULTI_COMP`, zram supports up to 4 compression algorithms: one primary and up to 3 secondary ones. Primary zram compressor is explained in "3) Select compression algorithm", secondary algorithms are configured using recomp_algorithm device attribute. @@ -495,34 +495,43 @@ configuration::: #select deflate recompression algorithm, priority 2 echo "algo=deflate priority=2" > /sys/block/zramX/recomp_algorithm -Another device attribute that CONFIG_ZRAM_MULTI_COMP enables is recompress, +Another device attribute that `CONFIG_ZRAM_MULTI_COMP` enables is `recompress`, which controls recompression. Examples::: #IDLE pages recompression is activated by `idle` mode - echo "type=idle" > /sys/block/zramX/recompress + echo "type=idle priority=1" > /sys/block/zramX/recompress #HUGE pages recompression is activated by `huge` mode - echo "type=huge" > /sys/block/zram0/recompress + echo "type=huge priority=2" > /sys/block/zram0/recompress #HUGE_IDLE pages recompression is activated by `huge_idle` mode - echo "type=huge_idle" > /sys/block/zramX/recompress + echo "type=huge_idle priority=1" > /sys/block/zramX/recompress The number of idle pages can be significant, so user-space can pass a size threshold (in bytes) to the recompress knob: zram will recompress only pages of equal or greater size::: #recompress all pages larger than 3000 bytes - echo "threshold=3000" > /sys/block/zramX/recompress + echo "threshold=3000 priority=1" > /sys/block/zramX/recompress #recompress idle pages larger than 2000 bytes - echo "type=idle threshold=2000" > /sys/block/zramX/recompress + echo "type=idle threshold=2000 priority=1" > \ + /sys/block/zramX/recompress It is also possible to limit the number of pages zram re-compression will attempt to recompress::: - echo "type=huge_idle max_pages=42" > /sys/block/zramX/recompress + echo "type=huge_idle priority=1 max_pages=42" > \ + /sys/block/zramX/recompress + +It is advised to always specify `priority` parameter. While it is also +possible to specify `algo` parameter, so that `zram` will use algorithm's +name to determine the priority, it is not recommended, since it can lead to +unexpected results when the same algorithm is configured with different +priorities (e.g. different parameters). `priority` is the only way to +guarantee that the expected algorithm will be used. During re-compression for every page, that matches re-compression criteria, ZRAM iterates the list of registered alternative compression algorithms in @@ -533,19 +542,6 @@ no secondary algorithms left to try. If none of the secondary algorithms can successfully re-compressed the page such a page is marked as incompressible, so ZRAM will not attempt to re-compress it in the future. -This re-compression behaviour, when it iterates through the list of -registered compression algorithms, increases our chances of finding the -algorithm that successfully compresses a particular page. Sometimes, however, -it is convenient (and sometimes even necessary) to limit recompression to -only one particular algorithm so that it will not try any other algorithms. -This can be achieved by providing a `algo` or `priority` parameter::: - - #use zstd algorithm only (if registered) - echo "type=huge algo=zstd" > /sys/block/zramX/recompress - - #use zstd algorithm only (if zstd was registered under priority 1) - echo "type=huge priority=1" > /sys/block/zramX/recompress - memory tracking =============== -- 2.53.0.473.g4a7958ca14-goog Chained recompression has unpredictable behavior and is not useful in practice. First, systems usually configure just one alternative recompression algorithm, which has slower compression/decompression but better compression ratio. A single alternative algorithm doesn't need chaining. Second, even with multiple recompression algorithms, chained recompression is suboptimal. If a lower priority algorithm succeeds, the page is never attempted with a higher priority algorithm, leading to worse memory savings. If a lower priority algorithm fails, the page is still attempted with a higher priority algorithm, wasting resources on the failed lower priority attempt. In either case, the system would be better off targeting a specific priority directly. Chained recompression also significantly complicates the code. Remove it. Signed-off-by: Sergey Senozhatsky --- Documentation/admin-guide/blockdev/zram.rst | 9 --- drivers/block/zram/zram_drv.c | 83 ++++++--------------- 2 files changed, 24 insertions(+), 68 deletions(-) diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst index 967b58c3aad2..60b07a7e30cd 100644 --- a/Documentation/admin-guide/blockdev/zram.rst +++ b/Documentation/admin-guide/blockdev/zram.rst @@ -533,15 +533,6 @@ unexpected results when the same algorithm is configured with different priorities (e.g. different parameters). `priority` is the only way to guarantee that the expected algorithm will be used. -During re-compression for every page, that matches re-compression criteria, -ZRAM iterates the list of registered alternative compression algorithms in -order of their priorities. ZRAM stops either when re-compression was -successful (re-compressed object is smaller in size than the original one) -and matches re-compression criteria (e.g. size threshold) or when there are -no secondary algorithms left to try. If none of the secondary algorithms can -successfully re-compressed the page such a page is marked as incompressible, -so ZRAM will not attempt to re-compress it in the future. - memory tracking =============== diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 118b0b277e37..ff1931e700c3 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2331,7 +2331,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, #define RECOMPRESS_IDLE (1 << 0) #define RECOMPRESS_HUGE (1 << 1) -static int scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio_max, +static int scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio, struct zram_pp_ctl *ctl) { unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; @@ -2357,8 +2357,8 @@ static int scan_slots_for_recompress(struct zram *zram, u32 mode, u32 prio_max, test_slot_flag(zram, index, ZRAM_INCOMPRESSIBLE)) goto next; - /* Already compressed with same of higher priority */ - if (get_slot_comp_priority(zram, index) + 1 >= prio_max) + /* Already compressed with same or higher priority */ + if (get_slot_comp_priority(zram, index) >= prio) goto next; ok = place_pp_slot(zram, ctl, index); @@ -2391,8 +2391,7 @@ static bool highest_priority_algorithm(struct zram *zram, u32 prio) * Corresponding ZRAM slot should be locked. */ static int recompress_slot(struct zram *zram, u32 index, struct page *page, - u64 *num_recomp_pages, u32 threshold, u32 prio, - u32 prio_max) + u64 *num_recomp_pages, u32 threshold, u32 prio) { struct zcomp_strm *zstrm = NULL; unsigned long handle_old; @@ -2404,6 +2403,9 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, void *src; int ret = 0; + if (!zram->comps[prio]) + return -EINVAL; + handle_old = get_slot_handle(zram, index); if (!handle_old) return -EINVAL; @@ -2426,51 +2428,10 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, */ clear_slot_flag(zram, index, ZRAM_IDLE); - class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old); - - prio = max(prio, get_slot_comp_priority(zram, index) + 1); - /* - * Recompression slots scan should not select slots that are - * already compressed with a higher priority algorithm, but - * just in case - */ - if (prio >= prio_max) - return 0; - - /* - * Iterate the secondary comp algorithms list (in order of priority) - * and try to recompress the page. - */ - for (; prio < prio_max; prio++) { - if (!zram->comps[prio]) - continue; - - zstrm = zcomp_stream_get(zram->comps[prio]); - src = kmap_local_page(page); - ret = zcomp_compress(zram->comps[prio], zstrm, - src, &comp_len_new); - kunmap_local(src); - - if (ret) { - zcomp_stream_put(zstrm); - zstrm = NULL; - break; - } - - class_index_new = zs_lookup_class_index(zram->mem_pool, - comp_len_new); - - /* Continue until we make progress */ - if (class_index_new >= class_index_old || - (threshold && comp_len_new >= threshold)) { - zcomp_stream_put(zstrm); - zstrm = NULL; - continue; - } - - /* Recompression was successful so break out */ - break; - } + zstrm = zcomp_stream_get(zram->comps[prio]); + src = kmap_local_page(page); + ret = zcomp_compress(zram->comps[prio], zstrm, src, &comp_len_new); + kunmap_local(src); /* * Decrement the limit (if set) on pages we can recompress, even @@ -2481,11 +2442,18 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page, if (*num_recomp_pages) *num_recomp_pages -= 1; - /* Compression error */ - if (ret) + if (ret) { + zcomp_stream_put(zstrm); return ret; + } + + class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old); + class_index_new = zs_lookup_class_index(zram->mem_pool, comp_len_new); + + if (class_index_new >= class_index_old || + (threshold && comp_len_new >= threshold)) { + zcomp_stream_put(zstrm); - if (!zstrm) { /* * Secondary algorithms failed to re-compress the page * in a way that would save memory. @@ -2535,11 +2503,11 @@ static ssize_t recompress_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - u32 prio = ZRAM_SECONDARY_COMP, prio_max = ZRAM_MAX_COMPS; struct zram *zram = dev_to_zram(dev); char *args, *param, *val, *algo = NULL; u64 num_recomp_pages = ULLONG_MAX; struct zram_pp_ctl *ctl = NULL; + u32 prio = ZRAM_SECONDARY_COMP; struct zram_pp_slot *pps; u32 mode = 0, threshold = 0; struct page *page = NULL; @@ -2604,7 +2572,6 @@ static ssize_t recompress_store(struct device *dev, * "algorithm" name lookup is ambiguous. */ algo = NULL; - prio_max = min(prio + 1, ZRAM_MAX_COMPS); continue; } } @@ -2624,7 +2591,6 @@ static ssize_t recompress_store(struct device *dev, continue; if (!strcmp(zram->comp_algs[prio], algo)) { - prio_max = min(prio + 1, ZRAM_MAX_COMPS); found = true; break; } @@ -2653,7 +2619,7 @@ static ssize_t recompress_store(struct device *dev, goto out; } - scan_slots_for_recompress(zram, mode, prio_max, ctl); + scan_slots_for_recompress(zram, mode, prio, ctl); ret = len; while ((pps = select_pp_slot(ctl))) { @@ -2667,8 +2633,7 @@ static ssize_t recompress_store(struct device *dev, goto next; err = recompress_slot(zram, pps->index, page, - &num_recomp_pages, threshold, - prio, prio_max); + &num_recomp_pages, threshold, prio); next: slot_unlock(zram, pps->index); release_pp_slot(zram, pps); -- 2.53.0.473.g4a7958ca14-goog