From: Fangyu Yu Make riscv_kexec_norelocate a two-pass trampoline so it can drop the kernel page tables while still executing from a mapped address. On the first entry, t3 is initialized to 0 by machine_kexec(). Loads the physical address of riscv_kexec_norelocate and the trampoline SATP value, switches to the trampoline page table, and jumps to the trampoline VA(=PA). On the second entry, t3 contains the physical address of riscv_kexec_norelocate, so the PC comparison matches and execution continues under trampoline VA(=PA). Since the trampoline page table is already active, replace the previous stvec-based handoff with a direct jump to the target entry (jr a2). Signed-off-by: Fangyu Yu --- arch/riscv/kernel/kexec_relocate.S | 30 +++++++++++++++----- arch/riscv/kernel/machine_kexec.c | 44 +++++++++++++++++++++++++++--- 2 files changed, 63 insertions(+), 11 deletions(-) diff --git a/arch/riscv/kernel/kexec_relocate.S b/arch/riscv/kernel/kexec_relocate.S index af6b99f5b0fd..8cfdf6f4032a 100644 --- a/arch/riscv/kernel/kexec_relocate.S +++ b/arch/riscv/kernel/kexec_relocate.S @@ -147,13 +147,35 @@ riscv_kexec_relocate_end: /* Used for jumping to crashkernel */ +.extern kexec_tramp_satp +.extern riscv_kexec_norelocate_pa .section ".kexec.tramp.text", "ax" SYM_CODE_START(riscv_kexec_norelocate) + /* + * Two-pass entry: + * - 1st entry: t3 == 0 (initialized by machine_kexec()). + * + * - 2nd entry: t3 holds the physical address of + * riscv_kexec_norelocate, so auipc matches t3 and we fall through + * to label 1 to continue execution under trampoline VA(=PA). + */ + auipc t0, 0 + beq t0, t3, 1f + + la t0, riscv_kexec_norelocate_pa + REG_L t3, 0(t0) + la t0, kexec_tramp_satp + REG_L t1, 0(t0) + csrw CSR_SATP, t1 + sfence.vma x0, x0 + + jr t3 /* * s0: (const) Phys address to jump to * s1: (const) Phys address of the FDT image * s2: (const) The hartid of the current hart */ +1: mv s0, a1 mv s1, a2 mv s2, a3 @@ -198,14 +220,8 @@ SYM_CODE_START(riscv_kexec_norelocate) csrw CSR_SCAUSE, zero csrw CSR_SSCRATCH, zero - /* - * Switch to physical addressing - * This will also trigger a jump to CSR_STVEC - * which in this case is the address of the new - * kernel. - */ - csrw CSR_STVEC, a2 csrw CSR_SATP, zero + jr a2 SYM_CODE_END(riscv_kexec_norelocate) diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index 1947b7bdf5c4..72817bba5d3b 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -18,6 +18,8 @@ #include #include +unsigned long kexec_tramp_satp; +unsigned long riscv_kexec_norelocate_pa; /* * Trampoline page tables. Both the VA(trampoline)->PA and the * PA(trampoline)->PA identity mapping are installed in this single @@ -155,11 +157,17 @@ machine_kexec_prepare(struct kimage *image) } else { /* * Crash kexec uses riscv_kexec_norelocate as a trampoline. - * Pre-build the trampoline page tables here so the panic - * path only has to switch satp and jump. + * Pre-build the trampoline page tables and capture the + * trampoline SATP value plus the physical address of + * riscv_kexec_norelocate so that the panic path only has + * to switch satp and jump. */ riscv_kexec_build_tramp((unsigned long)__kexec_tramp_text_start, __pa_symbol(__kexec_tramp_text_start)); + WRITE_ONCE(riscv_kexec_norelocate_pa, + __pa_symbol(&riscv_kexec_norelocate)); + WRITE_ONCE(kexec_tramp_satp, + PFN_DOWN(__pa_symbol(kexec_tramp_pgd)) | satp_mode); } return 0; @@ -276,7 +284,35 @@ machine_kexec(struct kimage *image) /* Jump to the relocation code */ pr_notice("Bye...\n"); - kexec_method(first_ind_entry, jump_addr, fdt_addr, - this_hart_id, kernel_map.va_pa_offset); + /* + * Hand off to the trampoline. For KEXEC_TYPE_CRASH we go into + * riscv_kexec_norelocate, which uses t3 as the 1st/2nd-pass + * discriminator (must be 0 on first entry). A bare + * asm volatile ("li t3, 0" ::: "t3") + * before the C call only declares t3 *modified*; the compiler is + * free to use t3 as scratch when materialising args. Pin t3 = 0 + * (and the args) via local register variables and perform the + * indirect jump inside the same inline asm so t3 == 0 is + * guaranteed at the moment control leaves machine_kexec(). + */ + { + register unsigned long a0_val asm("a0") = first_ind_entry; + register unsigned long a1_val asm("a1") = jump_addr; + register unsigned long a2_val asm("a2") = fdt_addr; + register unsigned long a3_val asm("a3") = this_hart_id; + register unsigned long a4_val asm("a4") = kernel_map.va_pa_offset; + register unsigned long t3_zero asm("t3") = 0; + register riscv_kexec_method m asm("t6") = kexec_method; + + asm volatile ( + "jr %[m]" + : + : "r" (a0_val), "r" (a1_val), "r" (a2_val), + "r" (a3_val), "r" (a4_val), + "r" (t3_zero), + [m] "r" (m) + : "memory" + ); + } unreachable(); } -- 2.50.1