Replace deduce_bounds_64_from_32() with cnum64_cnum32_intersect()
based implementation. Assume:
- a is a 64-bit range
- b is a 32-bit range
- t is a refined 64-bit range, such that ∀ v ∈ a, (u32)a ∈ b: v ∈ t.

New deduce_bounds_64_from_32() makes the following deductions:

(A): 'b' is a sub-range of the first or the last 32-bit
     sub-range of 'a':

                                                         64-bit number axis --->

 N*2^32                   (N+1)*2^32                (N+2)*2^32                (N+3)*2^32
 ||------|---|=====|-------||----------|=====|-------||----------|=====|----|--||
         |   |< b >|                   |< b >|                   |< b >|    |
         |   |                                                         |    |
         |<--+--------------------------- a ---------------------------+--->|
             |                                                         |
             |<-------------------------- t -------------------------->|

(B) 'b' does not intersect with the first of the last 32-bit
    sub-range of 'a':

N*2^32                   (N+1)*2^32                (N+2)*2^32                (N+3)*2^32
||--|=====|----|----------||--|=====|---------------||--|=====|------------|--||
    |< b >|    |              |< b >|                   |< b >|            |
               |              |                               |            |
               |<-------------+--------- a -------------------|----------->|
                              |                               |
                              |<-------- t ------------------>|

(C) 'b' crosses 0/U32_MAX boundary:

N*2^32                   (N+1)*2^32                (N+2)*2^32                (N+3)*2^32
||===|---------|------|===||===|----------------|===||===|---------|------|===||
 |b >|         |      |< b||b >|                |< b||b >|         |      |< b|
               |      |                                  |         |
               |<-----+----------------- a --------------+-------->|
                      |                                  |
                      |<---------------- t ------------->|

Current implementation of deduce_bounds_64_from_32() only handles
case (A).

reg_bounds.c is updated with similar logic to keep selftests passing.
Instead of using cnums it inspects intersection between 'b' and
first / last / next-after-first / previous-before-last sub-ranges
of 'a'.

The reg_bounds.c:range64_range32_intersect() function is verified
using cbmc model checker, see [1].

[1] https://github.com/eddyz87/cnum-verif

Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
---
 kernel/bpf/verifier.c                              | 76 ++++----------------
 .../testing/selftests/bpf/prog_tests/reg_bounds.c  | 82 ++++++++++++++--------
 2 files changed, 65 insertions(+), 93 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 45ee8711c50c5ef19d7a24333e55abcded93a8fb..745aef7d5467f92b0499b480987ef9daf940d52f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2521,69 +2521,19 @@ static void deduce_bounds_64_from_64(struct bpf_reg_state *reg)
 
 static void deduce_bounds_64_from_32(struct bpf_reg_state *reg)
 {
-	/* Try to tighten 64-bit bounds from 32-bit knowledge, using 32-bit
-	 * values on both sides of 64-bit range in hope to have tighter range.
-	 * E.g., if r1 is [0x1'00000000, 0x3'80000000], and we learn from
-	 * 32-bit signed > 0 operation that s32 bounds are now [1; 0x7fffffff].
-	 * With this, we can substitute 1 as low 32-bits of _low_ 64-bit bound
-	 * (0x100000000 -> 0x100000001) and 0x7fffffff as low 32-bits of
-	 * _high_ 64-bit bound (0x380000000 -> 0x37fffffff) and arrive at a
-	 * better overall bounds for r1 as [0x1'000000001; 0x3'7fffffff].
-	 * We just need to make sure that derived bounds we are intersecting
-	 * with are well-formed ranges in respective s64 or u64 domain, just
-	 * like we do with similar kinds of 32-to-64 or 64-to-32 adjustments.
-	 */
-	__u64 new_umin, new_umax;
-	__s64 new_smin, new_smax;
-
-	/* u32 -> u64 tightening, it's always well-formed */
-	new_umin = (reg->umin_value & ~0xffffffffULL) | reg->u32_min_value;
-	new_umax = (reg->umax_value & ~0xffffffffULL) | reg->u32_max_value;
-	reg->umin_value = max_t(u64, reg->umin_value, new_umin);
-	reg->umax_value = min_t(u64, reg->umax_value, new_umax);
-	/* u32 -> s64 tightening, u32 range embedded into s64 preserves range validity */
-	new_smin = (reg->smin_value & ~0xffffffffULL) | reg->u32_min_value;
-	new_smax = (reg->smax_value & ~0xffffffffULL) | reg->u32_max_value;
-	reg->smin_value = max_t(s64, reg->smin_value, new_smin);
-	reg->smax_value = min_t(s64, reg->smax_value, new_smax);
-
-	/* Here we would like to handle a special case after sign extending load,
-	 * when upper bits for a 64-bit range are all 1s or all 0s.
-	 *
-	 * Upper bits are all 1s when register is in a range:
-	 *   [0xffff_ffff_0000_0000, 0xffff_ffff_ffff_ffff]
-	 * Upper bits are all 0s when register is in a range:
-	 *   [0x0000_0000_0000_0000, 0x0000_0000_ffff_ffff]
-	 * Together this forms are continuous range:
-	 *   [0xffff_ffff_0000_0000, 0x0000_0000_ffff_ffff]
-	 *
-	 * Now, suppose that register range is in fact tighter:
-	 *   [0xffff_ffff_8000_0000, 0x0000_0000_ffff_ffff] (R)
-	 * Also suppose that it's 32-bit range is positive,
-	 * meaning that lower 32-bits of the full 64-bit register
-	 * are in the range:
-	 *   [0x0000_0000, 0x7fff_ffff] (W)
-	 *
-	 * If this happens, then any value in a range:
-	 *   [0xffff_ffff_0000_0000, 0xffff_ffff_7fff_ffff]
-	 * is smaller than a lowest bound of the range (R):
-	 *   0xffff_ffff_8000_0000
-	 * which means that upper bits of the full 64-bit register
-	 * can't be all 1s, when lower bits are in range (W).
-	 *
-	 * Note that:
-	 *  - 0xffff_ffff_8000_0000 == (s64)S32_MIN
-	 *  - 0x0000_0000_7fff_ffff == (s64)S32_MAX
-	 * These relations are used in the conditions below.
-	 */
-	if (reg->s32_min_value >= 0 && reg->smin_value >= S32_MIN && reg->smax_value <= S32_MAX) {
-		reg->smin_value = reg->s32_min_value;
-		reg->smax_value = reg->s32_max_value;
-		reg->umin_value = reg->s32_min_value;
-		reg->umax_value = reg->s32_max_value;
-		reg->var_off = tnum_intersect(reg->var_off,
-					      tnum_range(reg->smin_value, reg->smax_value));
-	}
+	struct cnum32 s = cnum32_from_sreg(reg);
+	struct cnum32 u = cnum32_from_ureg(reg);
+	struct cnum64 t;
+
+	t = cnum64_from_ureg(reg);
+	if (cnum64_cnum32_intersect(t, s, &t) &&
+	    cnum64_cnum32_intersect(t, u, &t))
+		cnum_update_reg64_bounds(reg, t);
+
+	t = cnum64_from_sreg(reg);
+	if (cnum64_cnum32_intersect(t, s, &t) &&
+	    cnum64_cnum32_intersect(t, u, &t))
+		cnum_update_reg64_bounds(reg, t);
 }
 
 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index cb8dd2f63296b826491c8a010f4ece4aa20c0560..b6f46608d5f18a60238898b406bdfb51aa0dd404 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -478,6 +478,52 @@ static struct range range_refine_in_halves(enum num_t x_t, struct range x,
 
 }
 
+static __always_inline u64 next_u32_block(u64 x) { return x + (1ULL << 32); }
+static __always_inline u64 prev_u32_block(u64 x) { return x - (1ULL << 32); }
+
+/* Is v within the circular u64 range [base, base + len]? */
+static __always_inline bool u64_range_contains(u64 v, u64 base, u64 len)
+{
+	return v - base <= len;
+}
+
+/* Is v within the circular u32 range [base, base + len]? */
+static __always_inline bool u32_range_contains(u32 v, u32 base, u32 len)
+{
+	return v - base <= len;
+}
+
+static bool range64_range32_intersect(enum num_t a_t,
+				      struct range a /* 64 */,
+				      struct range b /* 32 */,
+				      struct range *out /* 64 */)
+{
+	u64 b_len = (u32)(b.b - b.a);
+	u64 a_len = a.b - a.a;
+	u64 lo, hi;
+
+	if (u32_range_contains((u32)a.a, (u32)b.a, b_len)) {
+		lo = a.a;
+	} else {
+		lo = swap_low32(a.a, (u32)b.a);
+		if (!u64_range_contains(lo, a.a, a_len))
+			lo = next_u32_block(lo);
+		if (!u64_range_contains(lo, a.a, a_len))
+			return false;
+	}
+	if (u32_range_contains(a.b, (u32)b.a, b_len)) {
+		hi = a.b;
+	} else {
+		hi = swap_low32(a.b, (u32)b.b);
+		if (!u64_range_contains(hi, a.a, a_len))
+			hi = prev_u32_block(hi);
+		if (!u64_range_contains(hi, a.a, a_len))
+			return false;
+	}
+	*out = range(a_t, lo, hi);
+	return true;
+}
+
 static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, struct range y)
 {
 	struct range y_cast;
@@ -485,40 +531,16 @@ static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t,
 	if (t_is_32(x_t) == t_is_32(y_t))
 		x = range_refine_in_halves(x_t, x, y_t, y);
 
-	y_cast = range_cast(y_t, x_t, y);
-
-	/* If we know that
-	 *   - *x* is in the range of signed 32bit value, and
-	 *   - *y_cast* range is 32-bit signed non-negative
-	 * then *x* range can be improved with *y_cast* such that *x* range
-	 * is 32-bit signed non-negative. Otherwise, if the new range for *x*
-	 * allows upper 32-bit * 0xffffffff then the eventual new range for
-	 * *x* will be out of signed 32-bit range which violates the origin
-	 * *x* range.
-	 */
-	if (x_t == S64 && y_t == S32 && y_cast.a <= S32_MAX  && y_cast.b <= S32_MAX &&
-	    (s64)x.a >= S32_MIN && (s64)x.b <= S32_MAX)
-		return range_intersection(x_t, x, y_cast);
-
-	/* the case when new range knowledge, *y*, is a 32-bit subregister
-	 * range, while previous range knowledge, *x*, is a full register
-	 * 64-bit range, needs special treatment to take into account upper 32
-	 * bits of full register range
-	 */
 	if (t_is_32(y_t) && !t_is_32(x_t)) {
-		struct range x_swap;
+		struct range x1;
 
-		/* some combinations of upper 32 bits and sign bit can lead to
-		 * invalid ranges, in such cases it's easier to detect them
-		 * after cast/swap than try to enumerate all the conditions
-		 * under which transformation and knowledge transfer is valid
-		 */
-		x_swap = range(x_t, swap_low32(x.a, y_cast.a), swap_low32(x.b, y_cast.b));
-		if (!is_valid_range(x_t, x_swap))
-			return x;
-		return range_intersection(x_t, x, x_swap);
+		if (range64_range32_intersect(x_t, x, y, &x1))
+			return x1;
+		return x;
 	}
 
+	y_cast = range_cast(y_t, x_t, y);
+
 	/* otherwise, plain range cast and intersection works */
 	return range_intersection(x_t, x, y_cast);
 }

-- 
2.51.1