Currently, the BPF verifier only compute shift operations when the shift
amount is a known constant. This is overly restrictive for cases where
the shift amount is bounded but not fully determined at verification time.
For example, the result of following code is set unknown even though
the shift amount is bounded to [1, 4]:

    u32 shift = bpf_get_prandom_u32();
    shift &= 3;    // shift is in range [0, 3]
    shift += 1;    // shift is in range [1, 4]
    r1 <<= shift;  // non-const but bounded shift amount

Modify the shift helper functions (scalar_min_max_lsh,
scalar32_min_max_lsh, scalar_min_max_rsh, scalar32_min_max_rsh,
scalar_min_max_arsh, scalar32_min_max_arsh) to handle non-const
but bounded shift amounts.

Update is_safe_to_compute_dst_reg_range() to remove the src_is_const
check for shift operations. This approach ensures the verifier
remains sound while allowing more programs to pass verification.

Also modify the comment on is_safe_to_compute_dst_reg_range.
Shifts by more than insn bitness are legal in the BPF ISA; they are
currently implementation-defined behaviour of the underlying architecture,
rather than UB, and have been made legal for performance reasons.
See: https://lore.kernel.org/bpf/20210706112502.2064236-47-sashal@kernel.org

Co-developed-by: Yazhou Tang <tangyazhou518@outlook.com>
Signed-off-by: Yazhou Tang <tangyazhou518@outlook.com>
Co-developed-by: Shenghao Yuan <shenghaoyuan0928@163.com>
Signed-off-by: Shenghao Yuan <shenghaoyuan0928@163.com>
Signed-off-by: Tianci Cao <ziye@zju.edu.cn>
---
 kernel/bpf/verifier.c | 99 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 74 insertions(+), 25 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2abc79dbf281..bf247c219cab 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -14277,7 +14277,8 @@ static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
 
 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
-	dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
+	dst_reg->var_off = (umin_val == umax_val) ?
+			   tnum_subreg(tnum_lshift(subreg, umin_val)) : tnum_unknown;
 	/* Not required but being careful mark reg64 bounds as unknown so
 	 * that we are forced to pick them up from tnum and zext later and
 	 * if some path skips this step we are still safe.
@@ -14322,7 +14323,8 @@ static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
 	__scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
 
-	dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
+	dst_reg->var_off = (umin_val == umax_val) ?
+			   tnum_lshift(dst_reg->var_off, umin_val) : tnum_unknown;
 	/* We may learn something more from the var_off */
 	__update_reg_bounds(dst_reg);
 }
@@ -14349,7 +14351,8 @@ static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
 	 * var_off of the result.
 	 */
 
-	dst_reg->var_off = tnum_rshift(subreg, umin_val);
+	dst_reg->var_off = (umin_val == umax_val) ?
+			   tnum_rshift(subreg, umin_val) : tnum_unknown;
 	reg_set_urange32(dst_reg, reg_u32_min(dst_reg) >> umax_val,
 			 reg_u32_max(dst_reg) >> umin_val);
 
@@ -14377,7 +14380,8 @@ static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
 	 * and rely on inferring new ones from the unsigned bounds and
 	 * var_off of the result.
 	 */
-	dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
+	dst_reg->var_off = (umin_val == umax_val) ?
+			   tnum_rshift(dst_reg->var_off, umin_val) : tnum_unknown;
 	reg_set_urange64(dst_reg, reg_umin(dst_reg) >> umax_val,
 			 reg_umax(dst_reg) >> umin_val);
 
@@ -14392,18 +14396,41 @@ static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
 				  struct bpf_reg_state *src_reg)
 {
-	u64 umin_val = reg_u32_min(src_reg);
+	u32 umin_shift = reg_u32_min(src_reg);
+	u32 umax_shift = reg_u32_max(src_reg);
+	s32 smin = reg_s32_min(dst_reg);
+	s32 smax = reg_s32_max(dst_reg);
 
-	/* Upon reaching here, src_known is true and
-	 * umax_val is equal to umin_val.
-	 * Blow away the dst_reg umin_value/umax_value and rely on
-	 * dst_reg var_off to refine the result.
+	/*
+	 * BPF_ARSH on 32-bit subregister. ARSH is a signed divide by
+	 * 2^k (rounding toward -inf for negatives). With a non-constant
+	 * shift in [umin, umax], the result is bounded by dividing the
+	 * extremes of [smin, smax] by the extremes of the divisor:
+	 *
+	 *   smin >= 0:  [smin / 2^umax, smax / 2^umin]
+	 *   smax <  0:  [smin / 2^umin, smax / 2^umax]
+	 *   mixed:      [smin / 2^umin, smax / 2^umin]
+	 *
+	 * var_off is tnum_unknown since a non-constant shift prevents
+	 * precise bit tracking.
 	 */
-	reg_set_srange32(dst_reg,
-			 (u32)(((s32)reg_s32_min(dst_reg)) >> umin_val),
-			 (u32)(((s32)reg_s32_max(dst_reg)) >> umin_val));
-
-	dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
+	if (umin_shift == umax_shift) {
+		reg_set_srange32(dst_reg, (u32)(smin >> umin_shift),
+				 (u32)(smax >> umin_shift));
+		dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off),
+						umin_shift, 32);
+	} else {
+		if (smin >= 0)
+			reg_set_srange32(dst_reg, (u32)(smin >> umax_shift),
+					 (u32)(smax >> umin_shift));
+		else if (smax < 0)
+			reg_set_srange32(dst_reg, (u32)(smin >> umin_shift),
+					 (u32)(smax >> umax_shift));
+		else
+			reg_set_srange32(dst_reg, (u32)(smin >> umin_shift),
+					 (u32)(smax >> umin_shift));
+		dst_reg->var_off = tnum_unknown;
+	}
 
 	__mark_reg64_unbounded(dst_reg);
 	__update_reg32_bounds(dst_reg);
@@ -14412,15 +14439,37 @@ static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
 				struct bpf_reg_state *src_reg)
 {
-	u64 umin_val = reg_umin(src_reg);
+	u64 umin_shift = reg_umin(src_reg);
+	u64 umax_shift = reg_umax(src_reg);
+	s64 smin = reg_smin(dst_reg);
+	s64 smax = reg_smax(dst_reg);
 
-	/* Upon reaching here, src_known is true and umax_val is equal
-	 * to umin_val.
+	/*
+	 * BPF_ARSH (arithmetic right shift) on 64-bit register. Signed
+	 * divide by 2^k, rounding toward -inf for negatives. With a
+	 * non-constant shift in [umin, umax], the result is bounded
+	 * by dividing the extremes of [smin, smax] by the extremes
+	 * of the divisor:
+	 *
+	 *   smin >= 0:  [smin / 2^umax, smax / 2^umin]
+	 *   smax <  0:  [smin / 2^umin, smax / 2^umax]
+	 *   mixed:      [smin / 2^umin, smax / 2^umin]
+	 *
+	 * var_off is tnum_unknown since a non-constant shift prevents
+	 * precise bit tracking.
 	 */
-	reg_set_srange64(dst_reg, reg_smin(dst_reg) >> umin_val,
-			 reg_smax(dst_reg) >> umin_val);
-
-	dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
+	if (umin_shift == umax_shift) {
+		reg_set_srange64(dst_reg, smin >> umin_shift, smax >> umin_shift);
+		dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_shift, 64);
+	} else {
+		if (smin >= 0)
+			reg_set_srange64(dst_reg, smin >> umax_shift, smax >> umin_shift);
+		else if (smax < 0)
+			reg_set_srange64(dst_reg, smin >> umin_shift, smax >> umax_shift);
+		else
+			reg_set_srange64(dst_reg, smin >> umin_shift, smax >> umin_shift);
+		dst_reg->var_off = tnum_unknown;
+	}
 
 	/* Its not easy to operate on alu32 bounds here because it depends
 	 * on bits being shifted in from upper 32-bits. Take easy way out
@@ -14516,14 +14565,14 @@ static bool is_safe_to_compute_dst_reg_range(struct bpf_insn *insn,
 	case BPF_MOD:
 		return src_is_const;
 
-	/* Shift operators range is only computable if shift dimension operand
-	 * is a constant. Shifts greater than 31 or 63 are undefined. This
-	 * includes shifts by a negative number.
+	/*
+	 * Shifts greater than 31 or 63 are implementation-defined behaviour.
+	 * This includes shifts by a negative number.
 	 */
 	case BPF_LSH:
 	case BPF_RSH:
 	case BPF_ARSH:
-		return (src_is_const && reg_umax(src_reg) < insn_bitness);
+		return reg_umax(src_reg) < insn_bitness;
 	default:
 		return false;
 	}
-- 
2.43.0