Add a kunit benchmark comparing crc32c_flip_range() against full crc32c recomputation across bitmap sizes from 1KB to 64KB. The benchmark reports per-call latency in nanoseconds and the speedup ratio. Sample results (x86_64, Intel(R) Xeon(R) Platinum 8331C): bitmap=1024: flip_range=48 ns, full_crc=45 ns, speedup=0.9x bitmap=2048: flip_range=53 ns, full_crc=88 ns, speedup=1.6x bitmap=4096: flip_range=57 ns, full_crc=182 ns, speedup=3.1x bitmap=8192: flip_range=63 ns, full_crc=357 ns, speedup=5.6x bitmap=16384: flip_range=68 ns, full_crc=709 ns, speedup=10.3x bitmap=32768: flip_range=73 ns, full_crc=1421 ns, speedup=19.3x bitmap=65536: flip_range=78 ns, full_crc=2853 ns, speedup=36.3x Signed-off-by: Baokun Li --- lib/crc/tests/crc_kunit.c | 52 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/lib/crc/tests/crc_kunit.c b/lib/crc/tests/crc_kunit.c index 46f9df5b58e4..8e8b541b37d3 100644 --- a/lib/crc/tests/crc_kunit.c +++ b/lib/crc/tests/crc_kunit.c @@ -554,6 +554,57 @@ static void crc32c_flip_range_test(struct kunit *test) } } +/* + * Benchmark crc32c_flip_range vs full crc32c recomputation + */ +static void crc32c_flip_range_benchmark(struct kunit *test) +{ + static const size_t bitmap_sizes[] = { + 1024, 2048, 4096, 8192, 16384, 32768, 65536, + }; + size_t i, j, num_iters, buflen, total_bits; + volatile u32 crc; + u64 t_flip, t_full; + u8 *buf; + + if (!IS_ENABLED(CONFIG_CRC_BENCHMARK)) + kunit_skip(test, "not enabled"); + + buf = kunit_kzalloc(test, 65536, GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, buf); + + for (i = 0; i < ARRAY_SIZE(bitmap_sizes); i++) { + buflen = bitmap_sizes[i]; + total_bits = buflen * 8; + num_iters = 10000000 / (buflen + 128); + + /* Benchmark crc32c_flip_range */ + crc = crc32c(0, buf, buflen); + preempt_disable(); + t_flip = ktime_get_ns(); + for (j = 0; j < num_iters; j++) + crc = crc32c_flip_range(crc, total_bits, 100, 100); + t_flip = ktime_get_ns() - t_flip; + preempt_enable(); + + /* Benchmark full crc32c recomputation */ + preempt_disable(); + t_full = ktime_get_ns(); + for (j = 0; j < num_iters; j++) + crc = crc32c(0, buf, buflen); + t_full = ktime_get_ns() - t_full; + preempt_enable(); + + kunit_info(test, + "bitmap=%zu: flip_range=%llu ns, full_crc=%llu ns, speedup=%llu.%01llux\n", + buflen, + div64_u64(t_flip, num_iters), + div64_u64(t_full, num_iters), + div64_u64(t_full * 10, t_flip ? t_flip : 1) / 10, + div64_u64(t_full * 10, t_flip ? t_flip : 1) % 10); + } +} + static struct kunit_case crc_test_cases[] = { #if IS_REACHABLE(CONFIG_CRC7) KUNIT_CASE(crc7_be_test), @@ -575,6 +626,7 @@ static struct kunit_case crc_test_cases[] = { KUNIT_CASE(crc32c_test), KUNIT_CASE(crc32c_benchmark), KUNIT_CASE(crc32c_flip_range_test), + KUNIT_CASE(crc32c_flip_range_benchmark), #endif #if IS_REACHABLE(CONFIG_CRC64) KUNIT_CASE(crc64_be_test), -- 2.43.7