Create a new blktrace test group and add a regression test for a blktrace false positive WARNING that occurs when zone management commands are traced with blktrace on V1 version. Bug: https://syzkaller.appspot.com/bug?extid=153e64c0aa875d7e4c37 Location: kernel/trace/blktrace.c:367-368 The test: 1. Creates a zoned null_blk device (8 zones, 1GB, no conventional zones) 2. Starts blktrace on the device 3. Issues zone open command for all zones 4. Checks dmesg for the false positive WARNING Device configuration: - Total size: 1GB - Zone size: 128MB - Number of zones: 8 - Conventional zones: 0 If the WARNING is found, the bug is present and logged to the full output. If no WARNING appears, the bug is fixed. Note: The bug uses WARN_ON_ONCE, so it triggers only once per boot. Subsequent runs after the first trigger will not show the WARNING. Signed-off-by: Chaitanya Kulkarni --- V1->V2: Removed dmesg -C to avoid clearing dmesg buffer from other tests Use _dmesg_since_test_start() to only check messages from this test (Johannes) --- tests/blktrace/001 | 94 ++++++++++++++++++++++++++++++++++++++++++ tests/blktrace/001.out | 2 + tests/blktrace/rc | 11 +++++ 3 files changed, 107 insertions(+) create mode 100755 tests/blktrace/001 create mode 100644 tests/blktrace/001.out create mode 100644 tests/blktrace/rc diff --git a/tests/blktrace/001 b/tests/blktrace/001 new file mode 100755 index 0000000..43331c1 --- /dev/null +++ b/tests/blktrace/001 @@ -0,0 +1,94 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-3.0+ +# Copyright (C) 2025 Chaitanya Kulkarni +# +# Regression test for blktrace false positive WARNING on zone management +# commands. +# +# Bug: https://syzkaller.appspot.com/bug?extid=153e64c0aa875d7e4c37 +# Location: kernel/trace/blktrace.c:367-368 +# +# The bug triggers a WARNING when zone management commands (zone open/close/ +# finish/reset) are traced with blktrace on V1 version. This is a false +# positive that should be fixed. + +. tests/blktrace/rc +. common/null_blk + +DESCRIPTION="blktrace zone management command tracing" +QUICK=1 + +requires() { + _have_program blkzone + _have_null_blk + _have_module_param null_blk zoned +} + +test() { + echo "Running ${TEST_NAME}" + + local blktrace_pid + local warning_count + local device + + # Initialize null_blk with no default devices + if ! _init_null_blk nr_devices=0; then + return 1 + fi + + # Create zoned null_blk device via configfs + # 8 zones, 1GB total, 128MB per zone, no conventional zones + if ! _configure_null_blk nullb0 \ + memory_backed=1 \ + zone_size=128 \ + zone_nr_conv=0 \ + size=1024 \ + zoned=1 \ + power=1; then + return 1 + fi + + device=/dev/nullb0 + + # Verify it's a zoned device + local zoned_mode + zoned_mode=$(cat /sys/block/nullb0/queue/zoned) + if [[ "$zoned_mode" != "host-managed" ]]; then + echo "Device is not zoned (mode: $zoned_mode)" + _exit_null_blk + return 1 + fi + + # Start blktrace + blktrace -d "${device}" -o trace >> "$FULL" 2>&1 & + blktrace_pid=$! + sleep 2 + + # Verify blktrace started + if ! ps -p $blktrace_pid > /dev/null 2>&1; then + echo "blktrace failed to start" + _exit_null_blk + return 1 + fi + + # Issue zone open command for all zones (triggers bug if present) + blkzone open "${device}" >> "$FULL" 2>&1 + + sleep 1 + + # Stop blktrace + kill $blktrace_pid 2>/dev/null + wait $blktrace_pid 2>/dev/null || true + + # Check for WARNING (bug present if WARNING found) + warning_count=$(_dmesg_since_test_start | grep -c "WARNING.*blktrace.c:367" || true) + + if [[ $warning_count -gt 0 ]]; then + echo "WARNING: blktrace bug detected at blktrace.c:367" + _dmesg_since_test_start | grep -A 10 "WARNING.*blktrace.c:367" >> "$FULL" + fi + + _exit_null_blk + + echo "Test complete" +} diff --git a/tests/blktrace/001.out b/tests/blktrace/001.out new file mode 100644 index 0000000..a122a65 --- /dev/null +++ b/tests/blktrace/001.out @@ -0,0 +1,2 @@ +Running blktrace/001 +Test complete diff --git a/tests/blktrace/rc b/tests/blktrace/rc new file mode 100644 index 0000000..9b987a2 --- /dev/null +++ b/tests/blktrace/rc @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-3.0+ +# Copyright (C) 2025 Chaitanya Kulkarni +# +# Tests for blktrace infrastructure + +. common/rc + +group_requires() { + _have_root && _have_blktrace && _have_program blkparse +} -- 2.40.0 Add regression test for blktrace ftrace corruption bug that occurs when sysfs trace is enabled followed by ftrace blk tracer. When /sys/block/*/trace/enable is enabled and then ftrace's blk tracer is activated, the trace output becomes corrupted showing "Unknown action" with invalid hex values instead of proper action codes. The root cause is that ftrace allocates a blk_io_trace2 buffer (64 bytes) but calls record_blktrace_event() which writes v1 format (48 bytes), causing field offset mismatches and corruption. This test verifies that the trace output is correct and doesn't show the corruption pattern. Signed-off-by: Chaitanya Kulkarni --- tests/blktrace/002 | 97 ++++++++++++++++++++++++++++++++++++++++++ tests/blktrace/002.out | 3 ++ 2 files changed, 100 insertions(+) create mode 100755 tests/blktrace/002 create mode 100644 tests/blktrace/002.out diff --git a/tests/blktrace/002 b/tests/blktrace/002 new file mode 100755 index 0000000..73b8597 --- /dev/null +++ b/tests/blktrace/002 @@ -0,0 +1,97 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-3.0+ +# Copyright (C) 2025 Chaitanya Kulkarni +# +# Regression test for blktrace ftrace corruption bug when using sysfs +# trace enable with ftrace blk tracer. +# +# Bug: When sysfs trace is enabled (/sys/block/*/trace/enable) and then +# ftrace blk tracer is enabled, the trace output becomes corrupted showing +# "Unknown action" with invalid hex values. +# +# Root cause: ftrace allocated blk_io_trace2 buffer (64 bytes) but called +# record_blktrace_event() which writes v1 format (48 bytes), causing field +# offset mismatches and corruption. + +. tests/blktrace/rc +. common/null_blk + +DESCRIPTION="blktrace ftrace corruption with sysfs trace" +QUICK=1 + +requires() { + _have_null_blk + _have_tracefs +} + +test() { + echo "Running ${TEST_NAME}" + + local trace_dir="/sys/kernel/debug/tracing" + local device + + # Initialize null_blk with one device + if ! _init_null_blk nr_devices=1; then + return 1 + fi + + device=/dev/nullb0 + + # Verify device exists + if [[ ! -b "$device" ]]; then + echo "Device $device not found" + _exit_null_blk + return 1 + fi + + # Clean up any previous trace state + echo 0 > "$trace_dir/tracing_on" 2>/dev/null || true + echo > "$trace_dir/trace" 2>/dev/null || true + echo nop > "$trace_dir/current_tracer" 2>/dev/null || true + + # Enable sysfs trace for nullb0 (this triggers the bug path) + if [[ -f /sys/block/nullb0/trace/enable ]]; then + echo 1 > /sys/block/nullb0/trace/enable + else + echo "No sysfs trace support" + _exit_null_blk + return 1 + fi + + # Enable blk ftrace tracer + echo blk > "$trace_dir/current_tracer" + echo 1 > "$trace_dir/tracing_on" + + # Generate some I/O + dd if="$device" of=/dev/null bs=4k count=10 iflag=direct >> "$FULL" 2>&1 + + # Stop tracing + echo 0 > "$trace_dir/tracing_on" + + # Check trace output for corruption + # Get first 10 non-comment lines + local trace_output + trace_output=$(grep -v "^#" "$trace_dir/trace" | head -10) + + if [[ -z "$trace_output" ]]; then + echo "No trace output captured" + _exit_null_blk + return 1 + fi + + # Check for "Unknown action" which indicates the bug + if echo "$trace_output" | grep -q "Unknown action"; then + echo "BUG: Trace corruption detected with 'Unknown action'" + echo "$trace_output" | head -5 >> "$FULL" + else + echo "Trace output looks correct" + fi + + # Cleanup: disable sysfs trace + echo 0 > /sys/block/nullb0/trace/enable 2>/dev/null || true + echo nop > "$trace_dir/current_tracer" 2>/dev/null || true + + _exit_null_blk + + echo "Test complete" +} diff --git a/tests/blktrace/002.out b/tests/blktrace/002.out new file mode 100644 index 0000000..b358be9 --- /dev/null +++ b/tests/blktrace/002.out @@ -0,0 +1,3 @@ +Running blktrace/002 +Trace output looks correct +Test complete -- 2.40.0