Extend the stack argument mechanism to kfunc calls, allowing kfuncs
with more than 5 parameters to receive additional arguments via the
r12-based stack arg area.

For kfuncs, the caller is a BPF program and the callee is a kernel
function. The BPF program writes outgoing args at r12-relative offsets
past its own incoming area.

The following is an example to show how stack arguments are saved:

   int foo(int a1, int a2, int a3, int a4, int a5, int a6, int a7) {
     ...
     kfunc1(a1, a2, a3, a4, a5, a6, a7, a8);
     ...
     kfunc2(a1, a2, a3, a4, a5, a6, a7, a8, a9);
     ...
   }

The following is an illustration:

   Caller (foo)
   ============
       r12-relative stack arg area:

       r12-8:  [incoming arg 6]
       r12-16: [incoming arg 7]

       ---- incoming/outgoing boundary (kfunc1)
       r12-24: [outgoing arg 6 to callee]
       r12-32: [outgoing arg 7 to callee]
       r12-40: [outgoing arg 8 to callee]
       ...
       Back from kfunc1
       ...

       ---- incoming/outgoing boundary
       r12-24: [outgoing arg 6 to callee]
       r12-32: [outgoing arg 7 to callee]
       r12-40: [outgoing arg 8 to callee]
       r12-48: [outgoing arg 9 to callee]

Later JIT will marshal outgoing arguments to the native calling convention
for kfunc1() and kfunc2().

In check_kfunc_args(), for args beyond the 5th, retrieve the spilled
register state from the caller's stack arg slots. Temporarily copy
it into regs[BPF_REG_1] to reuse the existing type checking
infrastructure, then restore after checking. The following is one
of examples based on a later selftest:

  13: (85) call bpf_kfunc_call_stack_arg_mem#152105
  Use reg 1 for stack arg#5
  Use reg 2 to represent mem_size
  mark_precise: frame0: last_idx 13 first_idx 0 subseq_idx -1
  mark_precise: frame0: regs=r2 stack= before 12: (b7) r5 = 5
  mark_precise: frame0: regs=r2 stack= before 11: (b7) r4 = 4
  mark_precise: frame0: regs=r2 stack= before 10: (b7) r3 = 3
  mark_precise: frame0: regs=r2 stack= before 9: (b7) r2 = 2
  mark_precise: frame0: last_idx 13 first_idx 0 subseq_idx -1
  mark_precise: frame0: regs=r2 stack= before 12: (b7) r5 = 5
  mark_precise: frame0: regs=r2 stack= before 11: (b7) r4 = 4
  mark_precise: frame0: regs=r2 stack= before 10: (b7) r3 = 3
  mark_precise: frame0: regs=r2 stack= before 9: (b7) r2 = 2
  End of using reg 1 for stack arg#6
  14: R0=scalar()
  14: (95) exit

The above example is for KF_ARG_PTR_TO_MEM_SIZE case.
Registers 1 and 2 are used as an temporary register for argument
checking. The verifier log will identify when the temporary register
is used and when the temporary register is not used any more.

There are two places where meta->release_regno needs to keep
regno for later releasing the reference. Also, 'cur_aux(env)->arg_prog = regno'
is also keep regno for later fixup. Since regno is a faked one
such three cases are rejected for now if they are in stack arguments.
If possible, new kfuncs could keep them in first 5 registers so
there are no issues at all.

If faked register approach is not favored, the following is an
alternative approach with the following data structure:

   struct reg_or_arg_t {
     struct bpf_reg_state *state;
     union {
       int regno;
       int argno;
     };
     bool is_reg; /* distinguish between reg and arg */
   };

Such a struct can replace existing regno/argno/reg_state parameters
to make it easy to distinguish reg vs. arg.

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
 kernel/bpf/verifier.c | 143 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 124 insertions(+), 19 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 183a4108fd4d..dba889cece1c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3491,7 +3491,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
 	struct bpf_kfunc_meta kfunc;
 	struct bpf_kfunc_desc *desc;
 	unsigned long addr;
-	int err;
+	int i, err;
 
 	prog_aux = env->prog->aux;
 	tab = prog_aux->kfunc_tab;
@@ -3567,6 +3567,14 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
 	if (err)
 		return err;
 
+	for (i = MAX_BPF_FUNC_REG_ARGS; i < func_model.nr_args; i++) {
+		if (func_model.arg_size[i] > sizeof(u64)) {
+			verbose(env, "kfunc %s arg#%d size %d > %zu not supported for stack args\n",
+				kfunc.name, i, func_model.arg_size[i], sizeof(u64));
+			return -EINVAL;
+		}
+	}
+
 	desc = &tab->descs[tab->nr_descs++];
 	desc->func_id = func_id;
 	desc->offset = offset;
@@ -13083,6 +13091,19 @@ static bool is_kfunc_pkt_changing(struct bpf_kfunc_call_arg_meta *meta)
 	return meta->func_id == special_kfunc_list[KF_bpf_xdp_pull_data];
 }
 
+static struct bpf_reg_state *get_kfunc_arg_reg(struct bpf_verifier_env *env, int argno)
+{
+	struct bpf_func_state *caller;
+	int spi;
+
+	if (argno < MAX_BPF_FUNC_REG_ARGS)
+		return &cur_regs(env)[argno + 1];
+
+	caller = cur_func(env);
+	spi = caller->incoming_stack_arg_depth / BPF_REG_SIZE + (argno - MAX_BPF_FUNC_REG_ARGS);
+	return &caller->stack_arg_slots[spi].spilled_ptr;
+}
+
 static enum kfunc_ptr_arg_type
 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
 		       struct bpf_kfunc_call_arg_meta *meta,
@@ -13101,8 +13122,8 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
 		return KF_ARG_PTR_TO_CTX;
 
 	if (argno + 1 < nargs &&
-	    (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]) ||
-	     is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1])))
+	    (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], get_kfunc_arg_reg(env, argno + 1)) ||
+	     is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], get_kfunc_arg_reg(env, argno + 1))))
 		arg_mem_size = true;
 
 	/* In this function, we verify the kfunc's BTF as per the argument type,
@@ -13770,9 +13791,9 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 
 	args = (const struct btf_param *)(meta->func_proto + 1);
 	nargs = btf_type_vlen(meta->func_proto);
-	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
+	if (nargs > MAX_BPF_FUNC_ARGS) {
 		verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
-			MAX_BPF_FUNC_REG_ARGS);
+			MAX_BPF_FUNC_ARGS);
 		return -EINVAL;
 	}
 
@@ -13780,26 +13801,59 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 	 * verifier sees.
 	 */
 	for (i = 0; i < nargs; i++) {
-		struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[i + 1];
+		struct bpf_reg_state *regs = cur_regs(env), *reg;
+		struct bpf_reg_state saved_reg;
 		const struct btf_type *t, *ref_t, *resolve_ret;
 		enum bpf_arg_type arg_type = ARG_DONTCARE;
 		u32 regno = i + 1, ref_id, type_size;
 		bool is_ret_buf_sz = false;
+		bool is_stack_arg = false;
 		int kf_arg_type;
 
+		if (i < MAX_BPF_FUNC_REG_ARGS) {
+			reg = &regs[i + 1];
+		} else {
+			/*
+			 * Retrieve the spilled reg state from the stack arg slot.
+			 * Reuse the existing type checking infrastructure which
+			 * reads from cur_regs(env)[regno], temporarily copy the
+			 * stack arg reg state into regs[BPF_REG_1] and restore
+			 * it after checking.
+			 */
+			struct bpf_func_state *caller = cur_func(env);
+			int spi = caller->incoming_stack_arg_depth / BPF_REG_SIZE +
+				  (i - MAX_BPF_FUNC_REG_ARGS);
+
+			if (!is_stack_arg_slot_initialized(caller, spi)) {
+				verbose(env, "stack arg#%d not properly initialized\n", i);
+				return -EINVAL;
+			}
+
+			is_stack_arg = true;
+			regno = BPF_REG_1;
+			saved_reg = regs[BPF_REG_1];
+			regs[BPF_REG_1] = caller->stack_arg_slots[spi].spilled_ptr;
+			reg = &regs[BPF_REG_1];
+			verbose(env, "Use reg %d for stack arg#%d\n", regno, i);
+		}
+
 		if (is_kfunc_arg_prog_aux(btf, &args[i])) {
 			/* Reject repeated use bpf_prog_aux */
 			if (meta->arg_prog) {
 				verifier_bug(env, "Only 1 prog->aux argument supported per-kfunc");
 				return -EFAULT;
 			}
+			if (is_stack_arg) {
+				verbose(env, "arg#%d prog->aux cannot be a stack argument\n", i);
+				return -EINVAL;
+			}
 			meta->arg_prog = true;
 			cur_aux(env)->arg_prog = regno;
-			continue;
+			goto next_arg;
 		}
 
 		if (is_kfunc_arg_ignore(btf, &args[i]) || is_kfunc_arg_implicit(meta, i))
-			continue;
+			goto next_arg;
 
 		t = btf_type_skip_modifiers(btf, args[i].type, NULL);
 
@@ -13818,9 +13872,11 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 					verbose(env, "R%d must be a known constant\n", regno);
 					return -EINVAL;
 				}
-				ret = mark_chain_precision(env, regno);
-				if (ret < 0)
-					return ret;
+				if (!is_stack_arg) {
+					ret = mark_chain_precision(env, regno);
+					if (ret < 0)
+						return ret;
+				}
 				meta->arg_constant.found = true;
 				meta->arg_constant.value = reg->var_off.value;
 			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
@@ -13842,11 +13898,13 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 				}
 
 				meta->r0_size = reg->var_off.value;
-				ret = mark_chain_precision(env, regno);
-				if (ret)
-					return ret;
+				if (!is_stack_arg) {
+					ret = mark_chain_precision(env, regno);
+					if (ret)
+						return ret;
+				}
 			}
-			continue;
+			goto next_arg;
 		}
 
 		if (!btf_type_is_ptr(t)) {
@@ -13868,8 +13926,13 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 				return -EFAULT;
 			}
 			meta->ref_obj_id = reg->ref_obj_id;
-			if (is_kfunc_release(meta))
+			if (is_kfunc_release(meta)) {
+				if (is_stack_arg) {
+					verbose(env, "arg#%d release arg cannot be a stack argument\n", i);
+					return -EINVAL;
+				}
 				meta->release_regno = regno;
+			}
 		}
 
 		ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
@@ -13881,7 +13944,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 
 		switch (kf_arg_type) {
 		case KF_ARG_PTR_TO_NULL:
-			continue;
+			goto next_arg;
 		case KF_ARG_PTR_TO_MAP:
 			if (!reg->map_ptr) {
 				verbose(env, "pointer in R%d isn't map pointer\n", regno);
@@ -14020,6 +14083,10 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 				dynptr_arg_type |= DYNPTR_TYPE_FILE;
 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_file_discard]) {
 				dynptr_arg_type |= DYNPTR_TYPE_FILE;
+				if (is_stack_arg) {
+					verbose(env, "arg#%d release arg cannot be a stack argument\n", i);
+					return -EINVAL;
+				}
 				meta->release_regno = regno;
 			} else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] &&
 				   (dynptr_arg_type & MEM_UNINIT)) {
@@ -14169,8 +14236,18 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 		{
 			struct bpf_reg_state *buff_reg = &regs[regno];
 			const struct btf_param *buff_arg = &args[i];
-			struct bpf_reg_state *size_reg = &regs[regno + 1];
+			struct bpf_reg_state *size_reg;
 			const struct btf_param *size_arg = &args[i + 1];
+			struct bpf_reg_state saved_size_reg = {};
+			bool size_is_stack_arg = false;
+
+			if (i >= MAX_BPF_FUNC_REG_ARGS) {
+				size_is_stack_arg = true;
+				saved_size_reg = regs[regno + 1];
+				regs[regno + 1] = *get_kfunc_arg_reg(env, i + 1);
+				verbose(env, "Use reg %d to represent mem_size\n", regno + 1);
+			}
+			size_reg = &regs[regno + 1];
 
 			if (!register_is_null(buff_reg) || !is_kfunc_arg_nullable(meta->btf, buff_arg)) {
 				ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
@@ -14193,6 +14270,9 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 				meta->arg_constant.value = size_reg->var_off.value;
 			}
 
+			if (size_is_stack_arg)
+				regs[regno + 1] = saved_size_reg;
+
 			/* Skip next '__sz' or '__szk' argument */
 			i++;
 			break;
@@ -14294,6 +14374,11 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 			break;
 		}
 		}
+next_arg:
+		if (is_stack_arg) {
+			regs[regno] = saved_reg;
+			verbose(env, "End of using reg %d for stack arg#%d\n", regno, i);
+		}
 	}
 
 	if (is_kfunc_release(meta) && !meta->release_regno) {
@@ -15059,7 +15144,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 
 	nargs = btf_type_vlen(meta.func_proto);
 	args = (const struct btf_param *)(meta.func_proto + 1);
-	for (i = 0; i < nargs; i++) {
+	for (i = 0; i < nargs && i < MAX_BPF_FUNC_REG_ARGS; i++) {
 		u32 regno = i + 1;
 
 		t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
@@ -15070,6 +15155,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			mark_btf_func_reg_size(env, regno, t->size);
 	}
 
+	/* Track outgoing stack arg depth for kfuncs with >5 args */
+	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
+		struct bpf_func_state *caller = cur_func(env);
+		struct bpf_subprog_info *caller_info = &env->subprog_info[caller->subprogno];
+		u16 kfunc_stack_arg_depth = (nargs - MAX_BPF_FUNC_REG_ARGS) * BPF_REG_SIZE;
+
+		if (kfunc_stack_arg_depth > caller_info->outgoing_stack_arg_depth)
+			caller_info->outgoing_stack_arg_depth = kfunc_stack_arg_depth;
+	}
+
 	if (is_iter_next_kfunc(&meta)) {
 		err = process_iter_next_call(env, insn_idx, &meta);
 		if (err)
@@ -23975,6 +24070,16 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	if (!bpf_jit_supports_far_kfunc_call())
 		insn->imm = BPF_CALL_IMM(desc->addr);
 
+	/*
+	 * After resolving the kfunc address, insn->off is no longer needed
+	 * for BTF fd index. Repurpose it to store the number of stack args
+	 * so the JIT can marshal them.
+	 */
+	if (desc->func_model.nr_args > MAX_BPF_FUNC_REG_ARGS)
+		insn->off = desc->func_model.nr_args - MAX_BPF_FUNC_REG_ARGS;
+	else
+		insn->off = 0;
+
 	if (is_bpf_obj_new_kfunc(desc->func_id) || is_bpf_percpu_obj_new_kfunc(desc->func_id)) {
 		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
 		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
-- 
2.52.0