This patch implements bitwise tracking (tnum analysis) for BPF_END (byte swap) operation. Currently, the BPF verifier does not track value for BPF_END operation, treating the result as completely unknown. This limits the verifier's ability to prove safety of programs that perform endianness conversions, which are common in networking code. For example, the following code pattern for port number validation: int test(struct pt_regs *ctx) { __u64 x = bpf_get_prandom_u32(); x &= 0x3f00; // Range: [0, 0x3f00], var_off: (0x0; 0x3f00) x = bswap16(x); // Should swap to range [0, 0x3f], var_off: (0x0; 0x3f) if (x > 0x3f) goto trap; return 0; trap: return *(u64 *)NULL; // Should be unreachable } Currently generates verifier output: 1: (54) w0 &= 16128 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=16128,var_off=(0x0; 0x3f00)) 2: (d7) r0 = bswap16 r0 ; R0=scalar() 3: (25) if r0 > 0x3f goto pc+2 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=63,var_off=(0x0; 0x3f)) Without this patch, even though the verifier knows `x` has certain bits set, after bswap16, it loses all tracking information and treats port as having a completely unknown value [0, 65535]. According to the BPF instruction set[1], there are 3 kinds of BPF_END: 1. `bswap(16|32|64)`: opcode=0xd7 (BPF_END | BPF_ALU64 | BPF_TO_LE) - do unconditional swap 2. `le(16|32|64)`: opcode=0xd4 (BPF_END | BPF_ALU | BPF_TO_LE) - on big-endian: do swap - on little-endian: truncation (16/32-bit) or no-op (64-bit) 3. `be(16|32|64)`: opcode=0xdc (BPF_END | BPF_ALU | BPF_TO_BE) - on little-endian: do swap - on big-endian: truncation (16/32-bit) or no-op (64-bit) Since BPF_END operations are inherently bit-wise permutations, tnum (bitwise tracking) offers the most efficient and precise mechanism for value analysis. By implementing `tnum_bswap16`, `tnum_bswap32`, and `tnum_bswap64`, we can derive exact `var_off` values concisely, directly reflecting the bit-level changes. Here is the overview of changes: 1. In `tnum_bswap(16|32|64)` (kernel/bpf/tnum.c): Call `swab(16|32|64)` function on the value and mask of `var_off`, and do truncation for 16/32-bit cases. 2. In `adjust_scalar_min_max_vals` (kernel/bpf/verifier.c): Call helper function `scalar_byte_swap`. - Only do byte swap when * alu64 (unconditional swap) OR * switching between big-endian and little-endian machines. - If need do byte swap: * Firstly call `tnum_bswap(16|32|64)` to update `var_off`. * Then reset the bound since byte swap scrambles the range. - For 16/32-bit cases, truncate dst register to match the swapped size. This enables better verification of networking code that frequently uses byte swaps for protocol processing, reducing false positive rejections. [1] https://www.kernel.org/doc/Documentation/bpf/standardization/instruction-set.rst Co-developed-by: Shenghao Yuan Signed-off-by: Shenghao Yuan Co-developed-by: Yazhou Tang Signed-off-by: Yazhou Tang Signed-off-by: Tianci Cao Acked-by: Eduard Zingerman --- include/linux/tnum.h | 5 +++++ kernel/bpf/tnum.c | 40 ++++++++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 43 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 86 insertions(+), 2 deletions(-) diff --git a/include/linux/tnum.h b/include/linux/tnum.h index c52b862dad45..fa4654ffb621 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -63,6 +63,11 @@ struct tnum tnum_union(struct tnum t1, struct tnum t2); /* Return @a with all but the lowest @size bytes cleared */ struct tnum tnum_cast(struct tnum a, u8 size); +/* Swap the bytes of a tnum */ +struct tnum tnum_bswap16(struct tnum a); +struct tnum tnum_bswap32(struct tnum a); +struct tnum tnum_bswap64(struct tnum a); + /* Returns true if @a is a known constant */ static inline bool tnum_is_const(struct tnum a) { diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c index f8e70e9c3998..76e71489529a 100644 --- a/kernel/bpf/tnum.c +++ b/kernel/bpf/tnum.c @@ -8,6 +8,7 @@ */ #include #include +#include #define TNUM(_v, _m) (struct tnum){.value = _v, .mask = _m} /* A completely unknown value */ @@ -253,3 +254,42 @@ struct tnum tnum_const_subreg(struct tnum a, u32 value) { return tnum_with_subreg(a, tnum_const(value)); } + +struct tnum tnum_bswap16(struct tnum a) +{ + u64 low16_value, low16_mask; + u64 swapped_value, swapped_mask; + + low16_value = a.value & 0xFFFF; + low16_mask = a.mask & 0xFFFF; + + swapped_value = __builtin_bswap16(low16_value); + swapped_mask = __builtin_bswap16(low16_mask); + + return TNUM(swapped_value, swapped_mask); +} + +struct tnum tnum_bswap32(struct tnum a) +{ + u64 low32_value, low32_mask; + u64 swapped_value, swapped_mask; + + low32_value = a.value & 0xFFFFFFFF; + low32_mask = a.mask & 0xFFFFFFFF; + + swapped_value = __builtin_bswap32(low32_value); + + swapped_mask = __builtin_bswap32(low32_mask); + + return TNUM(swapped_value, swapped_mask); +} + +struct tnum tnum_bswap64(struct tnum a) +{ + u64 swapped_value, swapped_mask; + + swapped_value = __builtin_bswap64(a.value); + swapped_mask = __builtin_bswap64(a.mask); + + return TNUM(swapped_value, swapped_mask); +} diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e7ff8394e0da..ec1ad2b5c606 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -15844,6 +15844,7 @@ static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn, case BPF_XOR: case BPF_OR: case BPF_MUL: + case BPF_END: return true; /* @@ -16033,12 +16034,50 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, else scalar_min_max_arsh(dst_reg, &src_reg); break; + case BPF_END: { + /* Byte swap operation - update var_off using tnum_bswap. + * Three cases: + * 1. bswap* (ALU64 + TO_LE): unconditional swap + * 2. to_le* (ALU + TO_LE): swap on big-endian, zero + * extension or no-op on little-endian + * 3. to_be* (ALU + TO_BE): swap on little-endian, zero + * extension or no-op on big-endian + */ + + bool to_le = BPF_SRC(insn->code) == BPF_TO_LE; + bool is_big_endian; +#ifdef CONFIG_CPU_BIG_ENDIAN + is_big_endian = true; +#else + is_big_endian = false; +#endif + /* Apply bswap if alu64 or switch between large and small end machines*/ + bool need_bswap = (!alu32) || (to_le ? is_big_endian : !is_big_endian); + + if (need_bswap) { + /* Apply byte swap to var_off */ + if (insn->imm == 16) + dst_reg->var_off = tnum_bswap16(dst_reg->var_off); + else if (insn->imm == 32) + dst_reg->var_off = tnum_bswap32(dst_reg->var_off); + else if (insn->imm == 64) + dst_reg->var_off = tnum_bswap64(dst_reg->var_off); + + /* Reset bounds so they can be re-derived from new var_off */ + __mark_reg_unbounded(dst_reg); + } + /* For bswap16/32, coerce dst register to match the swapped size */ + if (insn->imm == 16 || insn->imm == 32) + coerce_reg_to_size(dst_reg, insn->imm / 8); + break; + } default: break; } /* ALU32 ops are zero extended into 64bit register */ - if (alu32) + /* BPF_END is already handled above, no need for additional zero extension */ + if (alu32 && opcode != BPF_END) zext_32_to_64(dst_reg); reg_bounds_sync(dst_reg); return 0; @@ -16218,7 +16257,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) } /* check dest operand */ - if (opcode == BPF_NEG && + if ((opcode == BPF_NEG || opcode == BPF_END) && regs[insn->dst_reg].type == SCALAR_VALUE) { err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK); err = err ?: adjust_scalar_min_max_vals(env, insn, -- 2.34.1 Now BPF_END has bitwise tracking support. This patch adds selftests to cover various cases of BPF_END (`bswap(16|32|64)`, `be(16|32|64)`, `le(16|32|64)`) with bitwise propagation. This patch is based on existing `verifier_bswap.c`, and add several types of new tests: 1. Unconditional byte swap operations: - bswap16/bswap32/bswap64 with unknown bytes 2. Endian conversion operations (architecture-aware): - be16/be32/be64: convert to big-endian * on little-endian: do swap * on big-endian: truncation (16/32-bit) or no-op (64-bit) - le16/le32/le64: convert to little-endian * on big-endian: do swap * on little-endian: truncation (16/32-bit) or no-op (64-bit) Each test simulates realistic networking scenarios where a value is masked with unknown bits (e.g., var_off=(0x0; 0x3f00), range=[0,0x3f00]), then byte-swapped, and the verifier must prove the result stays within expected bounds. Specifically, these selftests are based on dead code elimination: If the BPF verifier can precisely track bitwise through byte swap operations, it can prune the trap path (invalid memory access) that should be unreachable, allowing the program to pass verification. If bitwise tracking is incorrect, the verifier cannot prove the trap is unreachable, causing verification failure. The tests use preprocessor conditionals (#ifdef __BYTE_ORDER__) to verify correct behavior on both little-endian and big-endian architectures, and require Clang 18+ for bswap instruction support. Co-developed-by: Shenghao Yuan Signed-off-by: Shenghao Yuan Co-developed-by: Yazhou Tang Signed-off-by: Yazhou Tang Signed-off-by: Tianci Cao Acked-by: Eduard Zingerman --- .../selftests/bpf/progs/verifier_bswap.c | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c index e61755656e8d..1700dccbf166 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bswap.c +++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c @@ -48,6 +48,49 @@ __naked void bswap_64(void) : __clobber_all); } +#define BSWAP_RANGE_TEST(name, op, in_value, out_value) \ + SEC("socket") \ + __success __log_level(2) \ + __msg("r0 &= {{.*}}; R0=scalar({{.*}},var_off=(0x0; " #in_value "))") \ + __msg("r0 = " op " r0 {{.*}}; R0=scalar({{.*}},var_off=(0x0; " #out_value "))") \ + __naked void name(void) \ + { \ + asm volatile ( \ + "call %[bpf_get_prandom_u32];" \ + "r0 &= " #in_value ";" \ + "r0 = " op " r0;" \ + "r2 = " #out_value " ll;" \ + "if r0 > r2 goto trap_%=;" \ + "r0 = 0;" \ + "exit;" \ + "trap_%=:" \ + "r1 = 42;" \ + "r0 = *(u64 *)(r1 + 0);" \ + "exit;" \ + : \ + : __imm(bpf_get_prandom_u32) \ + : __clobber_all); \ + } + +BSWAP_RANGE_TEST(bswap16_range, "bswap16", 0x3f00, 0x3f) +BSWAP_RANGE_TEST(bswap32_range, "bswap32", 0x3f00, 0x3f0000) +BSWAP_RANGE_TEST(bswap64_range, "bswap64", 0x3f00, 0x3f000000000000) +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +BSWAP_RANGE_TEST(be16_range, "be16", 0x3f00, 0x3f) +BSWAP_RANGE_TEST(be32_range, "be32", 0x3f00, 0x3f0000) +BSWAP_RANGE_TEST(be64_range, "be64", 0x3f00, 0x3f000000000000) +BSWAP_RANGE_TEST(le16_range, "le16", 0x3f00, 0x3f00) +BSWAP_RANGE_TEST(le32_range, "le32", 0x3f00, 0x3f00) +BSWAP_RANGE_TEST(le64_range, "le64", 0x3f00, 0x3f00) +#else +BSWAP_RANGE_TEST(be16_range, "be16", 0x3f00, 0x3f00) +BSWAP_RANGE_TEST(be32_range, "be32", 0x3f00, 0x3f00) +BSWAP_RANGE_TEST(be64_range, "be64", 0x3f00, 0x3f00) +BSWAP_RANGE_TEST(le16_range, "le16", 0x3f00, 0x3f) +BSWAP_RANGE_TEST(le32_range, "le32", 0x3f00, 0x3f0000) +BSWAP_RANGE_TEST(le64_range, "le64", 0x3f00, 0x3f000000000000) +#endif + #else SEC("socket") -- 2.34.1