The in-tree approach requires sequential IO maintained in the whole IO stack, which is too strict and may cause false positive because of IO reorder caused by block layer core code, and generic_01 only covers IO order in ublk dispatch code path. Rework test_generic_01 to verify that ublk dispatch doesn't reorder I/O by comparing request start order with completion order using bpftrace. The bpftrace script now: - Tracks each request's start sequence number in a map keyed by sector - On completion, verifies the request's start order matches expected completion order - Reports any out-of-order completions detected The test script: - Wait bpftrace BEGIN code block is run - Pins fio to CPU 0 for deterministic behavior - Uses block_io_start and block_rq_complete tracepoints - Checks bpftrace output for reordering errors Reported-and-tested-by: Alexander Atanasov Signed-off-by: Ming Lei --- .../testing/selftests/ublk/test_generic_01.sh | 23 ++++++--- tools/testing/selftests/ublk/trace/seq_io.bt | 47 +++++++++++++++---- 2 files changed, 54 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/ublk/test_generic_01.sh b/tools/testing/selftests/ublk/test_generic_01.sh index 21a31cd5491a..e1e0b5f72e3d 100755 --- a/tools/testing/selftests/ublk/test_generic_01.sh +++ b/tools/testing/selftests/ublk/test_generic_01.sh @@ -14,7 +14,7 @@ if ! _have_program fio; then exit "$UBLK_SKIP_CODE" fi -_prep_test "null" "sequential io order" +_prep_test "null" "ublk dispatch won't reorder IO" dev_id=$(_add_ublk_dev -t null) _check_add_dev $TID $? @@ -22,15 +22,20 @@ _check_add_dev $TID $? dev_t=$(_get_disk_dev_t "$dev_id") bpftrace trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 & btrace_pid=$! -sleep 2 -if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then +# Wait for bpftrace probes to be attached (BEGIN block prints BPFTRACE_READY) +for _ in $(seq 100); do + grep -q "BPFTRACE_READY" "$UBLK_TMP" 2>/dev/null && break + sleep 0.1 +done + +if ! kill -0 "$btrace_pid" 2>/dev/null; then _cleanup_test "null" exit "$UBLK_SKIP_CODE" fi -# run fio over this ublk disk -fio --name=write_seq \ +# run fio over this ublk disk (pinned to CPU 0) +taskset -c 0 fio --name=write_seq \ --filename=/dev/ublkb"${dev_id}" \ --ioengine=libaio --iodepth=16 \ --rw=write \ @@ -40,9 +45,13 @@ fio --name=write_seq \ ERR_CODE=$? kill "$btrace_pid" wait -if grep -q "io_out_of_order" "$UBLK_TMP"; then - cat "$UBLK_TMP" + +# Check for out-of-order completions detected by bpftrace +if grep -q "^out_of_order:" "$UBLK_TMP"; then + echo "I/O reordering detected:" + grep "^out_of_order:" "$UBLK_TMP" ERR_CODE=255 fi + _cleanup_test "null" _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/trace/seq_io.bt b/tools/testing/selftests/ublk/trace/seq_io.bt index b2f60a92b118..9d36ba35468f 100644 --- a/tools/testing/selftests/ublk/trace/seq_io.bt +++ b/tools/testing/selftests/ublk/trace/seq_io.bt @@ -2,23 +2,52 @@ $1: dev_t $2: RWBS $3: strlen($2) + + Track request order between block_io_start and block_rq_complete. + Sequence starts at 1 so 0 means "never seen". On first valid + completion, sync complete_seq to handle probe attachment races. + block_rq_complete listed first to reduce missed completion window. */ + BEGIN { - @last_rw[$1, str($2)] = (uint64)0; + @start_seq = (uint64)1; + @complete_seq = (uint64)0; + @out_of_order = (uint64)0; + @start_order[0] = (uint64)0; + delete(@start_order[0]); + printf("BPFTRACE_READY\n"); } + tracepoint:block:block_rq_complete +/(int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)/ { - $dev = $1; - if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) { - $last = @last_rw[$dev, str($2)]; - if ((uint64)args.sector != $last) { - printf("io_out_of_order: exp %llu actual %llu\n", - args.sector, $last); + $expected = @start_order[args.sector]; + if ($expected > 0) { + if (@complete_seq == 0) { + @complete_seq = $expected; + } + if ($expected != @complete_seq) { + printf("out_of_order: sector %llu started at seq %llu but completed at seq %llu\n", + args.sector, $expected, @complete_seq); + @out_of_order = @out_of_order + 1; } - @last_rw[$dev, str($2)] = (args.sector + args.nr_sector); + delete(@start_order[args.sector]); + @complete_seq = @complete_seq + 1; } } +tracepoint:block:block_io_start +/(int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)/ +{ + @start_order[args.sector] = @start_seq; + @start_seq = @start_seq + 1; +} + END { - clear(@last_rw); + printf("total_start: %llu total_complete: %llu out_of_order: %llu\n", + @start_seq - 1, @complete_seq, @out_of_order); + clear(@start_order); + clear(@start_seq); + clear(@complete_seq); + clear(@out_of_order); } -- 2.47.1